diff --git a/crates/hir-def/src/body/pretty.rs b/crates/hir-def/src/body/pretty.rs index c3bd99b948..8f1e6c3bbd 100644 --- a/crates/hir-def/src/body/pretty.rs +++ b/crates/hir-def/src/body/pretty.rs @@ -611,6 +611,7 @@ impl<'a> Printer<'a> { match literal { Literal::String(it) => w!(self, "{:?}", it), Literal::ByteString(it) => w!(self, "\"{}\"", it.escape_ascii()), + Literal::CString(it) => w!(self, "\"{}\\0\"", it), Literal::Char(it) => w!(self, "'{}'", it.escape_debug()), Literal::Bool(it) => w!(self, "{}", it), Literal::Int(i, suffix) => { diff --git a/crates/hir-def/src/hir.rs b/crates/hir-def/src/hir.rs index 1f4577629c..27150d8623 100644 --- a/crates/hir-def/src/hir.rs +++ b/crates/hir-def/src/hir.rs @@ -85,6 +85,7 @@ impl fmt::Display for FloatTypeWrapper { pub enum Literal { String(Box), ByteString(Box<[u8]>), + CString(Box), Char(char), Bool(bool), Int(i128, Option), @@ -135,6 +136,10 @@ impl From for Literal { let text = s.value().map(Box::from).unwrap_or_else(Default::default); Literal::String(text) } + LiteralKind::CString(s) => { + let text = s.value().map(Box::from).unwrap_or_else(Default::default); + Literal::CString(text) + } LiteralKind::Byte(b) => { Literal::Uint(b.value().unwrap_or_default() as u128, Some(BuiltinUint::U8)) } diff --git a/crates/hir-ty/src/infer/expr.rs b/crates/hir-ty/src/infer/expr.rs index 353bf6568c..385766fdad 100644 --- a/crates/hir-ty/src/infer/expr.rs +++ b/crates/hir-ty/src/infer/expr.rs @@ -815,7 +815,7 @@ impl<'a> InferenceContext<'a> { Expr::Array(array) => self.infer_expr_array(array, expected), Expr::Literal(lit) => match lit { Literal::Bool(..) => self.result.standard_types.bool_.clone(), - Literal::String(..) => { + Literal::String(..) | Literal::CString(..) => { TyKind::Ref(Mutability::Not, static_lifetime(), TyKind::Str.intern(Interner)) .intern(Interner) } diff --git a/crates/hir-ty/src/infer/pat.rs b/crates/hir-ty/src/infer/pat.rs index 5748039511..18b05fcf3c 100644 --- a/crates/hir-ty/src/infer/pat.rs +++ b/crates/hir-ty/src/infer/pat.rs @@ -428,9 +428,10 @@ fn is_non_ref_pat(body: &hir_def::body::Body, pat: PatId) -> bool { // FIXME: ConstBlock/Path/Lit might actually evaluate to ref, but inference is unimplemented. Pat::Path(..) => true, Pat::ConstBlock(..) => true, - Pat::Lit(expr) => { - !matches!(body[*expr], Expr::Literal(Literal::String(..) | Literal::ByteString(..))) - } + Pat::Lit(expr) => !matches!( + body[*expr], + Expr::Literal(Literal::String(..) | Literal::CString(..) | Literal::ByteString(..)) + ), Pat::Wild | Pat::Bind { .. } | Pat::Ref { .. } | Pat::Box { .. } | Pat::Missing => false, } } diff --git a/crates/hir-ty/src/mir/lower.rs b/crates/hir-ty/src/mir/lower.rs index 627c36dca9..051ae228e2 100644 --- a/crates/hir-ty/src/mir/lower.rs +++ b/crates/hir-ty/src/mir/lower.rs @@ -1112,15 +1112,24 @@ impl<'ctx> MirLowerCtx<'ctx> { let bytes = match l { hir_def::hir::Literal::String(b) => { let b = b.as_bytes(); - let mut data = vec![]; + let mut data = Vec::with_capacity(mem::size_of::() * 2); data.extend(0usize.to_le_bytes()); data.extend(b.len().to_le_bytes()); let mut mm = MemoryMap::default(); mm.insert(0, b.to_vec()); return Ok(Operand::from_concrete_const(data, mm, ty)); } + hir_def::hir::Literal::CString(b) => { + let b = b.as_bytes(); + let mut data = Vec::with_capacity(mem::size_of::() * 2); + data.extend(0usize.to_le_bytes()); + data.extend(b.len().to_le_bytes()); + let mut mm = MemoryMap::default(); + mm.insert(0, b.iter().copied().chain(iter::once(0)).collect::>()); + return Ok(Operand::from_concrete_const(data, mm, ty)); + } hir_def::hir::Literal::ByteString(b) => { - let mut data = vec![]; + let mut data = Vec::with_capacity(mem::size_of::() * 2); data.extend(0usize.to_le_bytes()); data.extend(b.len().to_le_bytes()); let mut mm = MemoryMap::default(); diff --git a/crates/ide-assists/src/handlers/raw_string.rs b/crates/ide-assists/src/handlers/raw_string.rs index 01420430bb..40ee4771d1 100644 --- a/crates/ide-assists/src/handlers/raw_string.rs +++ b/crates/ide-assists/src/handlers/raw_string.rs @@ -20,6 +20,7 @@ use crate::{utils::required_hashes, AssistContext, AssistId, AssistKind, Assists // } // ``` pub(crate) fn make_raw_string(acc: &mut Assists, ctx: &AssistContext<'_>) -> Option<()> { + // FIXME: This should support byte and c strings as well. let token = ctx.find_token_at_offset::()?; if token.is_raw() { return None; diff --git a/crates/ide/src/extend_selection.rs b/crates/ide/src/extend_selection.rs index 9f78c75e90..f906182224 100644 --- a/crates/ide/src/extend_selection.rs +++ b/crates/ide/src/extend_selection.rs @@ -39,7 +39,7 @@ fn try_extend_selection( ) -> Option { let range = frange.range; - let string_kinds = [COMMENT, STRING, BYTE_STRING]; + let string_kinds = [COMMENT, STRING, BYTE_STRING, C_STRING]; let list_kinds = [ RECORD_PAT_FIELD_LIST, MATCH_ARM_LIST, diff --git a/crates/ide/src/syntax_highlighting.rs b/crates/ide/src/syntax_highlighting.rs index 751e51da0d..8c02fe8164 100644 --- a/crates/ide/src/syntax_highlighting.rs +++ b/crates/ide/src/syntax_highlighting.rs @@ -16,7 +16,10 @@ mod tests; use hir::{Name, Semantics}; use ide_db::{FxHashMap, RootDatabase, SymbolKind}; use syntax::{ - ast, AstNode, AstToken, NodeOrToken, SyntaxKind::*, SyntaxNode, TextRange, WalkEvent, T, + ast::{self, IsString}, + AstNode, AstToken, NodeOrToken, + SyntaxKind::*, + SyntaxNode, TextRange, WalkEvent, T, }; use crate::{ @@ -440,7 +443,17 @@ fn traverse( && ast::ByteString::can_cast(descended_token.kind()) { if let Some(byte_string) = ast::ByteString::cast(token) { - highlight_escape_string(hl, &byte_string, range.start()); + if !byte_string.is_raw() { + highlight_escape_string(hl, &byte_string, range.start()); + } + } + } else if ast::CString::can_cast(token.kind()) + && ast::CString::can_cast(descended_token.kind()) + { + if let Some(c_string) = ast::CString::cast(token) { + if !c_string.is_raw() { + highlight_escape_string(hl, &c_string, range.start()); + } } } else if ast::Char::can_cast(token.kind()) && ast::Char::can_cast(descended_token.kind()) diff --git a/crates/ide/src/syntax_highlighting/highlight.rs b/crates/ide/src/syntax_highlighting/highlight.rs index 936362914a..925057ffaa 100644 --- a/crates/ide/src/syntax_highlighting/highlight.rs +++ b/crates/ide/src/syntax_highlighting/highlight.rs @@ -26,7 +26,7 @@ pub(super) fn token(sema: &Semantics<'_, RootDatabase>, token: SyntaxToken) -> O } let highlight: Highlight = match token.kind() { - STRING | BYTE_STRING => HlTag::StringLiteral.into(), + STRING | BYTE_STRING | C_STRING => HlTag::StringLiteral.into(), INT_NUMBER if token.parent_ancestors().nth(1).map(|it| it.kind()) == Some(FIELD_EXPR) => { SymbolKind::Field.into() } diff --git a/crates/ide/src/syntax_tree.rs b/crates/ide/src/syntax_tree.rs index bb6827e8a4..df19712426 100644 --- a/crates/ide/src/syntax_tree.rs +++ b/crates/ide/src/syntax_tree.rs @@ -1,5 +1,7 @@ -use ide_db::base_db::{FileId, SourceDatabase}; -use ide_db::RootDatabase; +use ide_db::{ + base_db::{FileId, SourceDatabase}, + RootDatabase, +}; use syntax::{ AstNode, NodeOrToken, SourceFile, SyntaxKind::STRING, SyntaxToken, TextRange, TextSize, }; diff --git a/crates/parser/src/grammar/expressions/atom.rs b/crates/parser/src/grammar/expressions/atom.rs index d051dd2682..3cf9c4dd4b 100644 --- a/crates/parser/src/grammar/expressions/atom.rs +++ b/crates/parser/src/grammar/expressions/atom.rs @@ -12,6 +12,8 @@ use super::*; // let _ = r"d"; // let _ = b"e"; // let _ = br"f"; +// let _ = c"g"; +// let _ = cr"h"; // } pub(crate) const LITERAL_FIRST: TokenSet = TokenSet::new(&[ T![true], @@ -22,6 +24,7 @@ pub(crate) const LITERAL_FIRST: TokenSet = TokenSet::new(&[ CHAR, STRING, BYTE_STRING, + C_STRING, ]); pub(crate) fn literal(p: &mut Parser<'_>) -> Option { diff --git a/crates/parser/src/grammar/generic_args.rs b/crates/parser/src/grammar/generic_args.rs index b7d72b8d33..e589b69934 100644 --- a/crates/parser/src/grammar/generic_args.rs +++ b/crates/parser/src/grammar/generic_args.rs @@ -28,6 +28,7 @@ const GENERIC_ARG_FIRST: TokenSet = TokenSet::new(&[ BYTE, STRING, BYTE_STRING, + C_STRING, ]) .union(types::TYPE_FIRST); diff --git a/crates/parser/src/lexed_str.rs b/crates/parser/src/lexed_str.rs index ace6985a58..e4dce21f32 100644 --- a/crates/parser/src/lexed_str.rs +++ b/crates/parser/src/lexed_str.rs @@ -277,7 +277,7 @@ impl<'a> Converter<'a> { if !terminated { err = "Missing trailing `\"` symbol to terminate the string literal"; } - STRING + C_STRING } rustc_lexer::LiteralKind::RawStr { n_hashes } => { if n_hashes.is_none() { @@ -295,7 +295,7 @@ impl<'a> Converter<'a> { if n_hashes.is_none() { err = "Invalid raw string literal"; } - STRING + C_STRING } }; diff --git a/crates/parser/test_data/parser/inline/ok/0085_expr_literals.rast b/crates/parser/test_data/parser/inline/ok/0085_expr_literals.rast index 403c265ea3..fe73d9dfe4 100644 --- a/crates/parser/test_data/parser/inline/ok/0085_expr_literals.rast +++ b/crates/parser/test_data/parser/inline/ok/0085_expr_literals.rast @@ -131,6 +131,30 @@ SOURCE_FILE LITERAL BYTE_STRING "br\"f\"" SEMICOLON ";" + WHITESPACE "\n " + LET_STMT + LET_KW "let" + WHITESPACE " " + WILDCARD_PAT + UNDERSCORE "_" + WHITESPACE " " + EQ "=" + WHITESPACE " " + LITERAL + C_STRING "c\"g\"" + SEMICOLON ";" + WHITESPACE "\n " + LET_STMT + LET_KW "let" + WHITESPACE " " + WILDCARD_PAT + UNDERSCORE "_" + WHITESPACE " " + EQ "=" + WHITESPACE " " + LITERAL + C_STRING "cr\"h\"" + SEMICOLON ";" WHITESPACE "\n" R_CURLY "}" WHITESPACE "\n" diff --git a/crates/parser/test_data/parser/inline/ok/0085_expr_literals.rs b/crates/parser/test_data/parser/inline/ok/0085_expr_literals.rs index 2e11a5a6e6..e7f235a83b 100644 --- a/crates/parser/test_data/parser/inline/ok/0085_expr_literals.rs +++ b/crates/parser/test_data/parser/inline/ok/0085_expr_literals.rs @@ -9,4 +9,6 @@ fn foo() { let _ = r"d"; let _ = b"e"; let _ = br"f"; + let _ = c"g"; + let _ = cr"h"; } diff --git a/crates/syntax/src/ast/expr_ext.rs b/crates/syntax/src/ast/expr_ext.rs index c43d0830b9..1eef286116 100644 --- a/crates/syntax/src/ast/expr_ext.rs +++ b/crates/syntax/src/ast/expr_ext.rs @@ -288,6 +288,7 @@ impl ast::ArrayExpr { pub enum LiteralKind { String(ast::String), ByteString(ast::ByteString), + CString(ast::CString), IntNumber(ast::IntNumber), FloatNumber(ast::FloatNumber), Char(ast::Char), @@ -319,6 +320,9 @@ impl ast::Literal { if let Some(t) = ast::ByteString::cast(token.clone()) { return LiteralKind::ByteString(t); } + if let Some(t) = ast::CString::cast(token.clone()) { + return LiteralKind::CString(t); + } if let Some(t) = ast::Char::cast(token.clone()) { return LiteralKind::Char(t); } diff --git a/crates/syntax/src/ast/generated/tokens.rs b/crates/syntax/src/ast/generated/tokens.rs index a3209c5abd..f5863e9efe 100644 --- a/crates/syntax/src/ast/generated/tokens.rs +++ b/crates/syntax/src/ast/generated/tokens.rs @@ -90,6 +90,27 @@ impl AstToken for ByteString { fn syntax(&self) -> &SyntaxToken { &self.syntax } } +#[derive(Debug, Clone, PartialEq, Eq, Hash)] +pub struct CString { + pub(crate) syntax: SyntaxToken, +} +impl std::fmt::Display for CString { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + std::fmt::Display::fmt(&self.syntax, f) + } +} +impl AstToken for CString { + fn can_cast(kind: SyntaxKind) -> bool { kind == C_STRING } + fn cast(syntax: SyntaxToken) -> Option { + if Self::can_cast(syntax.kind()) { + Some(Self { syntax }) + } else { + None + } + } + fn syntax(&self) -> &SyntaxToken { &self.syntax } +} + #[derive(Debug, Clone, PartialEq, Eq, Hash)] pub struct IntNumber { pub(crate) syntax: SyntaxToken, diff --git a/crates/syntax/src/ast/token_ext.rs b/crates/syntax/src/ast/token_ext.rs index 2cd312e7f4..090eb89f47 100644 --- a/crates/syntax/src/ast/token_ext.rs +++ b/crates/syntax/src/ast/token_ext.rs @@ -145,6 +145,10 @@ impl QuoteOffsets { } pub trait IsString: AstToken { + const RAW_PREFIX: &'static str; + fn is_raw(&self) -> bool { + self.text().starts_with(Self::RAW_PREFIX) + } fn quote_offsets(&self) -> Option { let text = self.text(); let offsets = QuoteOffsets::new(text)?; @@ -183,20 +187,18 @@ pub trait IsString: AstToken { cb(text_range + offset, unescaped_char); }); } -} - -impl IsString for ast::String {} - -impl ast::String { - pub fn is_raw(&self) -> bool { - self.text().starts_with('r') - } - pub fn map_range_up(&self, range: TextRange) -> Option { + fn map_range_up(&self, range: TextRange) -> Option { let contents_range = self.text_range_between_quotes()?; assert!(TextRange::up_to(contents_range.len()).contains_range(range)); Some(range + contents_range.start()) } +} +impl IsString for ast::String { + const RAW_PREFIX: &'static str = "r"; +} + +impl ast::String { pub fn value(&self) -> Option> { if self.is_raw() { let text = self.text(); @@ -235,13 +237,11 @@ impl ast::String { } } -impl IsString for ast::ByteString {} +impl IsString for ast::ByteString { + const RAW_PREFIX: &'static str = "br"; +} impl ast::ByteString { - pub fn is_raw(&self) -> bool { - self.text().starts_with("br") - } - pub fn value(&self) -> Option> { if self.is_raw() { let text = self.text(); @@ -280,6 +280,49 @@ impl ast::ByteString { } } +impl IsString for ast::CString { + const RAW_PREFIX: &'static str = "cr"; +} + +impl ast::CString { + pub fn value(&self) -> Option> { + if self.is_raw() { + let text = self.text(); + let text = + &text[self.text_range_between_quotes()? - self.syntax().text_range().start()]; + return Some(Cow::Borrowed(text)); + } + + let text = self.text(); + let text = &text[self.text_range_between_quotes()? - self.syntax().text_range().start()]; + + let mut buf = String::new(); + let mut prev_end = 0; + let mut has_error = false; + unescape_literal(text, Mode::Str, &mut |char_range, unescaped_char| match ( + unescaped_char, + buf.capacity() == 0, + ) { + (Ok(c), false) => buf.push(c), + (Ok(_), true) if char_range.len() == 1 && char_range.start == prev_end => { + prev_end = char_range.end + } + (Ok(c), true) => { + buf.reserve_exact(text.len()); + buf.push_str(&text[..prev_end]); + buf.push(c); + } + (Err(_), _) => has_error = true, + }); + + match (has_error, buf.capacity() == 0) { + (true, _) => None, + (false, true) => Some(Cow::Borrowed(text)), + (false, false) => Some(Cow::Owned(buf)), + } + } +} + impl ast::IntNumber { pub fn radix(&self) -> Radix { match self.text().get(..2).unwrap_or_default() { diff --git a/crates/syntax/src/parsing/reparsing.rs b/crates/syntax/src/parsing/reparsing.rs index f3d644c680..45e5916098 100644 --- a/crates/syntax/src/parsing/reparsing.rs +++ b/crates/syntax/src/parsing/reparsing.rs @@ -39,7 +39,7 @@ fn reparse_token( let prev_token = root.covering_element(edit.delete).as_token()?.clone(); let prev_token_kind = prev_token.kind(); match prev_token_kind { - WHITESPACE | COMMENT | IDENT | STRING => { + WHITESPACE | COMMENT | IDENT | STRING | BYTE_STRING | C_STRING => { if prev_token_kind == WHITESPACE || prev_token_kind == COMMENT { // removing a new line may extends previous token let deleted_range = edit.delete - prev_token.text_range().start(); diff --git a/crates/syntax/src/tests/sourcegen_ast.rs b/crates/syntax/src/tests/sourcegen_ast.rs index 77a8363a18..c49c5fa108 100644 --- a/crates/syntax/src/tests/sourcegen_ast.rs +++ b/crates/syntax/src/tests/sourcegen_ast.rs @@ -573,10 +573,11 @@ impl Field { fn lower(grammar: &Grammar) -> AstSrc { let mut res = AstSrc { - tokens: "Whitespace Comment String ByteString IntNumber FloatNumber Char Byte Ident" - .split_ascii_whitespace() - .map(|it| it.to_string()) - .collect::>(), + tokens: + "Whitespace Comment String ByteString CString IntNumber FloatNumber Char Byte Ident" + .split_ascii_whitespace() + .map(|it| it.to_string()) + .collect::>(), ..Default::default() }; diff --git a/crates/syntax/src/validation.rs b/crates/syntax/src/validation.rs index 089ad74dad..e0ec6a242f 100644 --- a/crates/syntax/src/validation.rs +++ b/crates/syntax/src/validation.rs @@ -9,7 +9,7 @@ use rustc_lexer::unescape::{self, unescape_literal, Mode}; use crate::{ algo, - ast::{self, HasAttrs, HasVisibility}, + ast::{self, HasAttrs, HasVisibility, IsString}, match_ast, AstNode, SyntaxError, SyntaxKind::{CONST, FN, INT_NUMBER, TYPE_ALIAS}, SyntaxNode, SyntaxToken, TextSize, T, @@ -156,6 +156,17 @@ fn validate_literal(literal: ast::Literal, acc: &mut Vec) { } } } + ast::LiteralKind::CString(s) => { + if !s.is_raw() { + if let Some(without_quotes) = unquote(text, 2, '"') { + unescape_literal(without_quotes, Mode::ByteStr, &mut |range, char| { + if let Err(err) = char { + push_err(1, range.start, err); + } + }); + } + } + } ast::LiteralKind::Char(_) => { if let Some(without_quotes) = unquote(text, 1, '\'') { unescape_literal(without_quotes, Mode::Char, &mut |range, char| {