mirror of
https://github.com/rust-lang/rust-analyzer
synced 2025-01-13 05:38:46 +00:00
internal: move all the lexing to the parser crate
This commit is contained in:
parent
78926027e3
commit
a022ad68c9
16 changed files with 159 additions and 467 deletions
3
Cargo.lock
generated
3
Cargo.lock
generated
|
@ -609,6 +609,7 @@ dependencies = [
|
||||||
"hir",
|
"hir",
|
||||||
"ide_db",
|
"ide_db",
|
||||||
"itertools",
|
"itertools",
|
||||||
|
"parser",
|
||||||
"profile",
|
"profile",
|
||||||
"rustc-hash",
|
"rustc-hash",
|
||||||
"sourcegen",
|
"sourcegen",
|
||||||
|
@ -654,6 +655,7 @@ dependencies = [
|
||||||
"itertools",
|
"itertools",
|
||||||
"limit",
|
"limit",
|
||||||
"once_cell",
|
"once_cell",
|
||||||
|
"parser",
|
||||||
"profile",
|
"profile",
|
||||||
"rayon",
|
"rayon",
|
||||||
"rustc-hash",
|
"rustc-hash",
|
||||||
|
@ -695,6 +697,7 @@ dependencies = [
|
||||||
"hir",
|
"hir",
|
||||||
"ide_db",
|
"ide_db",
|
||||||
"itertools",
|
"itertools",
|
||||||
|
"parser",
|
||||||
"rustc-hash",
|
"rustc-hash",
|
||||||
"syntax",
|
"syntax",
|
||||||
"test_utils",
|
"test_utils",
|
||||||
|
|
|
@ -16,6 +16,7 @@ itertools = "0.10.0"
|
||||||
either = "1.6.1"
|
either = "1.6.1"
|
||||||
|
|
||||||
stdx = { path = "../stdx", version = "0.0.0" }
|
stdx = { path = "../stdx", version = "0.0.0" }
|
||||||
|
parser = { path = "../parser", version = "0.0.0" }
|
||||||
syntax = { path = "../syntax", version = "0.0.0" }
|
syntax = { path = "../syntax", version = "0.0.0" }
|
||||||
text_edit = { path = "../text_edit", version = "0.0.0" }
|
text_edit = { path = "../text_edit", version = "0.0.0" }
|
||||||
profile = { path = "../profile", version = "0.0.0" }
|
profile = { path = "../profile", version = "0.0.0" }
|
||||||
|
|
|
@ -135,7 +135,7 @@ fn normalize(name: &str) -> Option<String> {
|
||||||
}
|
}
|
||||||
|
|
||||||
fn is_valid_name(name: &str) -> bool {
|
fn is_valid_name(name: &str) -> bool {
|
||||||
match syntax::lex_single_syntax_kind(name) {
|
match parser::LexedStr::single_token(name) {
|
||||||
Some((syntax::SyntaxKind::IDENT, _error)) => true,
|
Some((syntax::SyntaxKind::IDENT, _error)) => true,
|
||||||
_ => false,
|
_ => false,
|
||||||
}
|
}
|
||||||
|
|
|
@ -22,6 +22,7 @@ arrayvec = "0.7"
|
||||||
indexmap = "1.7"
|
indexmap = "1.7"
|
||||||
|
|
||||||
stdx = { path = "../stdx", version = "0.0.0" }
|
stdx = { path = "../stdx", version = "0.0.0" }
|
||||||
|
parser = { path = "../parser", version = "0.0.0" }
|
||||||
syntax = { path = "../syntax", version = "0.0.0" }
|
syntax = { path = "../syntax", version = "0.0.0" }
|
||||||
text_edit = { path = "../text_edit", version = "0.0.0" }
|
text_edit = { path = "../text_edit", version = "0.0.0" }
|
||||||
base_db = { path = "../base_db", version = "0.0.0" }
|
base_db = { path = "../base_db", version = "0.0.0" }
|
||||||
|
|
|
@ -28,7 +28,7 @@ use hir::{AsAssocItem, FieldSource, HasSource, InFile, ModuleSource, Semantics};
|
||||||
use stdx::never;
|
use stdx::never;
|
||||||
use syntax::{
|
use syntax::{
|
||||||
ast::{self, HasName},
|
ast::{self, HasName},
|
||||||
lex_single_syntax_kind, AstNode, SyntaxKind, TextRange, T,
|
AstNode, SyntaxKind, TextRange, T,
|
||||||
};
|
};
|
||||||
use text_edit::{TextEdit, TextEditBuilder};
|
use text_edit::{TextEdit, TextEditBuilder};
|
||||||
|
|
||||||
|
@ -490,7 +490,7 @@ pub enum IdentifierKind {
|
||||||
|
|
||||||
impl IdentifierKind {
|
impl IdentifierKind {
|
||||||
pub fn classify(new_name: &str) -> Result<IdentifierKind> {
|
pub fn classify(new_name: &str) -> Result<IdentifierKind> {
|
||||||
match lex_single_syntax_kind(new_name) {
|
match parser::LexedStr::single_token(new_name) {
|
||||||
Some(res) => match res {
|
Some(res) => match res {
|
||||||
(SyntaxKind::IDENT, _) => Ok(IdentifierKind::Ident),
|
(SyntaxKind::IDENT, _) => Ok(IdentifierKind::Ident),
|
||||||
(T![_], _) => Ok(IdentifierKind::Underscore),
|
(T![_], _) => Ok(IdentifierKind::Underscore),
|
||||||
|
|
|
@ -16,6 +16,7 @@ rustc-hash = "1.1.0"
|
||||||
itertools = "0.10.0"
|
itertools = "0.10.0"
|
||||||
|
|
||||||
text_edit = { path = "../text_edit", version = "0.0.0" }
|
text_edit = { path = "../text_edit", version = "0.0.0" }
|
||||||
|
parser = { path = "../parser", version = "0.0.0" }
|
||||||
syntax = { path = "../syntax", version = "0.0.0" }
|
syntax = { path = "../syntax", version = "0.0.0" }
|
||||||
ide_db = { path = "../ide_db", version = "0.0.0" }
|
ide_db = { path = "../ide_db", version = "0.0.0" }
|
||||||
hir = { path = "../hir", version = "0.0.0" }
|
hir = { path = "../hir", version = "0.0.0" }
|
||||||
|
|
|
@ -256,19 +256,13 @@ fn validate_rule(rule: &SsrRule) -> Result<(), SsrError> {
|
||||||
}
|
}
|
||||||
|
|
||||||
fn tokenize(source: &str) -> Result<Vec<Token>, SsrError> {
|
fn tokenize(source: &str) -> Result<Vec<Token>, SsrError> {
|
||||||
let mut start = 0;
|
let lexed = parser::LexedStr::new(source);
|
||||||
let (raw_tokens, errors) = syntax::tokenize(source);
|
if let Some((_, first_error)) = lexed.errors().next() {
|
||||||
if let Some(first_error) = errors.first() {
|
|
||||||
bail!("Failed to parse pattern: {}", first_error);
|
bail!("Failed to parse pattern: {}", first_error);
|
||||||
}
|
}
|
||||||
let mut tokens: Vec<Token> = Vec::new();
|
let mut tokens: Vec<Token> = Vec::new();
|
||||||
for raw_token in raw_tokens {
|
for i in 0..lexed.len() {
|
||||||
let token_len = usize::from(raw_token.len);
|
tokens.push(Token { kind: lexed.kind(i), text: lexed.text(i).into() });
|
||||||
tokens.push(Token {
|
|
||||||
kind: raw_token.kind,
|
|
||||||
text: SmolStr::new(&source[start..start + token_len]),
|
|
||||||
});
|
|
||||||
start += token_len;
|
|
||||||
}
|
}
|
||||||
Ok(tokens)
|
Ok(tokens)
|
||||||
}
|
}
|
||||||
|
|
|
@ -4,10 +4,9 @@ use parser::{ParseError, TreeSink};
|
||||||
use rustc_hash::{FxHashMap, FxHashSet};
|
use rustc_hash::{FxHashMap, FxHashSet};
|
||||||
use syntax::{
|
use syntax::{
|
||||||
ast::{self, make::tokens::doc_comment},
|
ast::{self, make::tokens::doc_comment},
|
||||||
tokenize, AstToken, Parse, PreorderWithTokens, SmolStr, SyntaxElement, SyntaxKind,
|
AstToken, Parse, PreorderWithTokens, SmolStr, SyntaxElement, SyntaxKind,
|
||||||
SyntaxKind::*,
|
SyntaxKind::*,
|
||||||
SyntaxNode, SyntaxToken, SyntaxTreeBuilder, TextRange, TextSize, Token as RawToken, WalkEvent,
|
SyntaxNode, SyntaxToken, SyntaxTreeBuilder, TextRange, TextSize, WalkEvent, T,
|
||||||
T,
|
|
||||||
};
|
};
|
||||||
use tt::buffer::{Cursor, TokenBuffer};
|
use tt::buffer::{Cursor, TokenBuffer};
|
||||||
|
|
||||||
|
@ -69,15 +68,14 @@ pub fn token_tree_to_syntax_node(
|
||||||
|
|
||||||
/// Convert a string to a `TokenTree`
|
/// Convert a string to a `TokenTree`
|
||||||
pub fn parse_to_token_tree(text: &str) -> Option<(tt::Subtree, TokenMap)> {
|
pub fn parse_to_token_tree(text: &str) -> Option<(tt::Subtree, TokenMap)> {
|
||||||
let (tokens, errors) = tokenize(text);
|
let lexed = parser::LexedStr::new(text);
|
||||||
if !errors.is_empty() {
|
if lexed.errors().next().is_some() {
|
||||||
return None;
|
return None;
|
||||||
}
|
}
|
||||||
|
|
||||||
let mut conv = RawConvertor {
|
let mut conv = RawConvertor {
|
||||||
text,
|
lexed: lexed,
|
||||||
offset: TextSize::default(),
|
pos: 0,
|
||||||
inner: tokens.iter(),
|
|
||||||
id_alloc: TokenIdAlloc {
|
id_alloc: TokenIdAlloc {
|
||||||
map: Default::default(),
|
map: Default::default(),
|
||||||
global_offset: TextSize::default(),
|
global_offset: TextSize::default(),
|
||||||
|
@ -146,7 +144,7 @@ fn convert_tokens<C: TokenConvertor>(conv: &mut C) -> tt::Subtree {
|
||||||
Some(it) => it,
|
Some(it) => it,
|
||||||
};
|
};
|
||||||
|
|
||||||
let k: SyntaxKind = token.kind();
|
let k: SyntaxKind = token.kind(&conv);
|
||||||
if k == COMMENT {
|
if k == COMMENT {
|
||||||
if let Some(tokens) = conv.convert_doc_comment(&token) {
|
if let Some(tokens) = conv.convert_doc_comment(&token) {
|
||||||
// FIXME: There has to be a better way to do this
|
// FIXME: There has to be a better way to do this
|
||||||
|
@ -199,19 +197,19 @@ fn convert_tokens<C: TokenConvertor>(conv: &mut C) -> tt::Subtree {
|
||||||
} else {
|
} else {
|
||||||
let spacing = match conv.peek() {
|
let spacing = match conv.peek() {
|
||||||
Some(next)
|
Some(next)
|
||||||
if next.kind().is_trivia()
|
if next.kind(&conv).is_trivia()
|
||||||
|| next.kind() == T!['[']
|
|| next.kind(&conv) == T!['[']
|
||||||
|| next.kind() == T!['{']
|
|| next.kind(&conv) == T!['{']
|
||||||
|| next.kind() == T!['('] =>
|
|| next.kind(&conv) == T!['('] =>
|
||||||
{
|
{
|
||||||
tt::Spacing::Alone
|
tt::Spacing::Alone
|
||||||
}
|
}
|
||||||
Some(next) if next.kind().is_punct() && next.kind() != UNDERSCORE => {
|
Some(next) if next.kind(&conv).is_punct() && next.kind(&conv) != UNDERSCORE => {
|
||||||
tt::Spacing::Joint
|
tt::Spacing::Joint
|
||||||
}
|
}
|
||||||
_ => tt::Spacing::Alone,
|
_ => tt::Spacing::Alone,
|
||||||
};
|
};
|
||||||
let char = match token.to_char() {
|
let char = match token.to_char(&conv) {
|
||||||
Some(c) => c,
|
Some(c) => c,
|
||||||
None => {
|
None => {
|
||||||
panic!("Token from lexer must be single char: token = {:#?}", token);
|
panic!("Token from lexer must be single char: token = {:#?}", token);
|
||||||
|
@ -222,7 +220,7 @@ fn convert_tokens<C: TokenConvertor>(conv: &mut C) -> tt::Subtree {
|
||||||
} else {
|
} else {
|
||||||
macro_rules! make_leaf {
|
macro_rules! make_leaf {
|
||||||
($i:ident) => {
|
($i:ident) => {
|
||||||
tt::$i { id: conv.id_alloc().alloc(range), text: token.to_text() }.into()
|
tt::$i { id: conv.id_alloc().alloc(range), text: token.to_text(conv) }.into()
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
let leaf: tt::Leaf = match k {
|
let leaf: tt::Leaf = match k {
|
||||||
|
@ -243,7 +241,7 @@ fn convert_tokens<C: TokenConvertor>(conv: &mut C) -> tt::Subtree {
|
||||||
|
|
||||||
let r = TextRange::at(range.start() + char_unit, range.len() - char_unit);
|
let r = TextRange::at(range.start() + char_unit, range.len() - char_unit);
|
||||||
let ident = tt::Leaf::from(tt::Ident {
|
let ident = tt::Leaf::from(tt::Ident {
|
||||||
text: SmolStr::new(&token.to_text()[1..]),
|
text: SmolStr::new(&token.to_text(conv)[1..]),
|
||||||
id: conv.id_alloc().alloc(r),
|
id: conv.id_alloc().alloc(r),
|
||||||
});
|
});
|
||||||
result.push(ident.into());
|
result.push(ident.into());
|
||||||
|
@ -392,22 +390,21 @@ impl TokenIdAlloc {
|
||||||
|
|
||||||
/// A Raw Token (straightly from lexer) convertor
|
/// A Raw Token (straightly from lexer) convertor
|
||||||
struct RawConvertor<'a> {
|
struct RawConvertor<'a> {
|
||||||
text: &'a str,
|
lexed: parser::LexedStr<'a>,
|
||||||
offset: TextSize,
|
pos: usize,
|
||||||
id_alloc: TokenIdAlloc,
|
id_alloc: TokenIdAlloc,
|
||||||
inner: std::slice::Iter<'a, RawToken>,
|
|
||||||
}
|
}
|
||||||
|
|
||||||
trait SrcToken: std::fmt::Debug {
|
trait SrcToken<Ctx>: std::fmt::Debug {
|
||||||
fn kind(&self) -> SyntaxKind;
|
fn kind(&self, ctx: &Ctx) -> SyntaxKind;
|
||||||
|
|
||||||
fn to_char(&self) -> Option<char>;
|
fn to_char(&self, ctx: &Ctx) -> Option<char>;
|
||||||
|
|
||||||
fn to_text(&self) -> SmolStr;
|
fn to_text(&self, ctx: &Ctx) -> SmolStr;
|
||||||
}
|
}
|
||||||
|
|
||||||
trait TokenConvertor {
|
trait TokenConvertor: Sized {
|
||||||
type Token: SrcToken;
|
type Token: SrcToken<Self>;
|
||||||
|
|
||||||
fn convert_doc_comment(&self, token: &Self::Token) -> Option<Vec<tt::TokenTree>>;
|
fn convert_doc_comment(&self, token: &Self::Token) -> Option<Vec<tt::TokenTree>>;
|
||||||
|
|
||||||
|
@ -418,42 +415,45 @@ trait TokenConvertor {
|
||||||
fn id_alloc(&mut self) -> &mut TokenIdAlloc;
|
fn id_alloc(&mut self) -> &mut TokenIdAlloc;
|
||||||
}
|
}
|
||||||
|
|
||||||
impl<'a> SrcToken for (&'a RawToken, &'a str) {
|
impl<'a> SrcToken<RawConvertor<'a>> for usize {
|
||||||
fn kind(&self) -> SyntaxKind {
|
fn kind(&self, ctx: &RawConvertor<'a>) -> SyntaxKind {
|
||||||
self.0.kind
|
ctx.lexed.kind(*self)
|
||||||
}
|
}
|
||||||
|
|
||||||
fn to_char(&self) -> Option<char> {
|
fn to_char(&self, ctx: &RawConvertor<'a>) -> Option<char> {
|
||||||
self.1.chars().next()
|
ctx.lexed.text(*self).chars().next()
|
||||||
}
|
}
|
||||||
|
|
||||||
fn to_text(&self) -> SmolStr {
|
fn to_text(&self, ctx: &RawConvertor<'_>) -> SmolStr {
|
||||||
self.1.into()
|
ctx.lexed.text(*self).into()
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
impl<'a> TokenConvertor for RawConvertor<'a> {
|
impl<'a> TokenConvertor for RawConvertor<'a> {
|
||||||
type Token = (&'a RawToken, &'a str);
|
type Token = usize;
|
||||||
|
|
||||||
fn convert_doc_comment(&self, token: &Self::Token) -> Option<Vec<tt::TokenTree>> {
|
fn convert_doc_comment(&self, token: &usize) -> Option<Vec<tt::TokenTree>> {
|
||||||
convert_doc_comment(&doc_comment(token.1))
|
let text = self.lexed.text(*token);
|
||||||
|
convert_doc_comment(&doc_comment(text))
|
||||||
}
|
}
|
||||||
|
|
||||||
fn bump(&mut self) -> Option<(Self::Token, TextRange)> {
|
fn bump(&mut self) -> Option<(Self::Token, TextRange)> {
|
||||||
let token = self.inner.next()?;
|
if self.pos == self.lexed.len() {
|
||||||
let range = TextRange::at(self.offset, token.len);
|
return None;
|
||||||
self.offset += token.len;
|
}
|
||||||
|
let token = self.pos;
|
||||||
|
self.pos += 1;
|
||||||
|
let range = self.lexed.text_range(token);
|
||||||
|
let range = TextRange::new(range.start.try_into().unwrap(), range.end.try_into().unwrap());
|
||||||
|
|
||||||
Some(((token, &self.text[range]), range))
|
Some((token, range))
|
||||||
}
|
}
|
||||||
|
|
||||||
fn peek(&self) -> Option<Self::Token> {
|
fn peek(&self) -> Option<Self::Token> {
|
||||||
let token = self.inner.as_slice().get(0);
|
if self.pos == self.lexed.len() {
|
||||||
|
return None;
|
||||||
token.map(|it| {
|
}
|
||||||
let range = TextRange::at(self.offset, it.len);
|
Some(self.pos)
|
||||||
(it, &self.text[range])
|
|
||||||
})
|
|
||||||
}
|
}
|
||||||
|
|
||||||
fn id_alloc(&mut self) -> &mut TokenIdAlloc {
|
fn id_alloc(&mut self) -> &mut TokenIdAlloc {
|
||||||
|
@ -523,17 +523,17 @@ impl SynToken {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
impl SrcToken for SynToken {
|
impl<'a> SrcToken<Convertor<'a>> for SynToken {
|
||||||
fn kind(&self) -> SyntaxKind {
|
fn kind(&self, _ctx: &Convertor<'a>) -> SyntaxKind {
|
||||||
self.token().kind()
|
self.token().kind()
|
||||||
}
|
}
|
||||||
fn to_char(&self) -> Option<char> {
|
fn to_char(&self, _ctx: &Convertor<'a>) -> Option<char> {
|
||||||
match self {
|
match self {
|
||||||
SynToken::Ordinary(_) => None,
|
SynToken::Ordinary(_) => None,
|
||||||
SynToken::Punch(it, i) => it.text().chars().nth((*i).into()),
|
SynToken::Punch(it, i) => it.text().chars().nth((*i).into()),
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
fn to_text(&self) -> SmolStr {
|
fn to_text(&self, _ctx: &Convertor<'a>) -> SmolStr {
|
||||||
self.token().text().into()
|
self.token().text().into()
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -1,7 +1,7 @@
|
||||||
//! Convert macro-by-example tokens which are specific to macro expansion into a
|
//! Convert macro-by-example tokens which are specific to macro expansion into a
|
||||||
//! format that works for our parser.
|
//! format that works for our parser.
|
||||||
|
|
||||||
use syntax::{lex_single_syntax_kind, SyntaxKind, SyntaxKind::*, T};
|
use syntax::{SyntaxKind, SyntaxKind::*, T};
|
||||||
use tt::buffer::TokenBuffer;
|
use tt::buffer::TokenBuffer;
|
||||||
|
|
||||||
pub(crate) fn to_parser_tokens(buffer: &TokenBuffer) -> parser::Tokens {
|
pub(crate) fn to_parser_tokens(buffer: &TokenBuffer) -> parser::Tokens {
|
||||||
|
@ -35,7 +35,7 @@ pub(crate) fn to_parser_tokens(buffer: &TokenBuffer) -> parser::Tokens {
|
||||||
let is_negated = lit.text.starts_with('-');
|
let is_negated = lit.text.starts_with('-');
|
||||||
let inner_text = &lit.text[if is_negated { 1 } else { 0 }..];
|
let inner_text = &lit.text[if is_negated { 1 } else { 0 }..];
|
||||||
|
|
||||||
let kind = lex_single_syntax_kind(inner_text)
|
let kind = parser::LexedStr::single_token(inner_text)
|
||||||
.map(|(kind, _error)| kind)
|
.map(|(kind, _error)| kind)
|
||||||
.filter(|kind| {
|
.filter(|kind| {
|
||||||
kind.is_literal()
|
kind.is_literal()
|
||||||
|
|
|
@ -8,6 +8,8 @@
|
||||||
//! Note that these tokens, unlike the tokens we feed into the parser, do
|
//! Note that these tokens, unlike the tokens we feed into the parser, do
|
||||||
//! include info about comments and whitespace.
|
//! include info about comments and whitespace.
|
||||||
|
|
||||||
|
use std::ops;
|
||||||
|
|
||||||
use crate::{
|
use crate::{
|
||||||
SyntaxKind::{self, *},
|
SyntaxKind::{self, *},
|
||||||
T,
|
T,
|
||||||
|
@ -52,7 +54,7 @@ impl<'a> LexedStr<'a> {
|
||||||
res
|
res
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn single_token(text: &'a str) -> Option<SyntaxKind> {
|
pub fn single_token(text: &'a str) -> Option<(SyntaxKind, Option<String>)> {
|
||||||
if text.is_empty() {
|
if text.is_empty() {
|
||||||
return None;
|
return None;
|
||||||
}
|
}
|
||||||
|
@ -63,11 +65,7 @@ impl<'a> LexedStr<'a> {
|
||||||
}
|
}
|
||||||
|
|
||||||
let (kind, err) = from_rustc(&token.kind, text);
|
let (kind, err) = from_rustc(&token.kind, text);
|
||||||
if err.is_some() {
|
Some((kind, err.map(|it| it.to_owned())))
|
||||||
return None;
|
|
||||||
}
|
|
||||||
|
|
||||||
Some(kind)
|
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn as_str(&self) -> &str {
|
pub fn as_str(&self) -> &str {
|
||||||
|
@ -78,16 +76,40 @@ impl<'a> LexedStr<'a> {
|
||||||
self.kind.len() - 1
|
self.kind.len() - 1
|
||||||
}
|
}
|
||||||
|
|
||||||
|
pub fn is_empty(&self) -> bool {
|
||||||
|
self.len() == 0
|
||||||
|
}
|
||||||
|
|
||||||
pub fn kind(&self, i: usize) -> SyntaxKind {
|
pub fn kind(&self, i: usize) -> SyntaxKind {
|
||||||
assert!(i < self.len());
|
assert!(i < self.len());
|
||||||
self.kind[i]
|
self.kind[i]
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn text(&self, i: usize) -> &str {
|
pub fn text(&self, i: usize) -> &str {
|
||||||
|
self.range_text(i..i + 1)
|
||||||
|
}
|
||||||
|
pub fn range_text(&self, r: ops::Range<usize>) -> &str {
|
||||||
|
assert!(r.start < r.end && r.end <= self.len());
|
||||||
|
let lo = self.start[r.start] as usize;
|
||||||
|
let hi = self.start[r.end] as usize;
|
||||||
|
&self.text[lo..hi]
|
||||||
|
}
|
||||||
|
|
||||||
|
// Naming is hard.
|
||||||
|
pub fn text_range(&self, i: usize) -> ops::Range<usize> {
|
||||||
assert!(i < self.len());
|
assert!(i < self.len());
|
||||||
let lo = self.start[i] as usize;
|
let lo = self.start[i] as usize;
|
||||||
let hi = self.start[i + 1] as usize;
|
let hi = self.start[i + 1] as usize;
|
||||||
&self.text[lo..hi]
|
lo..hi
|
||||||
|
}
|
||||||
|
pub fn text_start(&self, i: usize) -> usize {
|
||||||
|
assert!(i <= self.len());
|
||||||
|
self.start[i] as usize
|
||||||
|
}
|
||||||
|
pub fn text_len(&self, i: usize) -> usize {
|
||||||
|
assert!(i < self.len());
|
||||||
|
let r = self.text_range(i);
|
||||||
|
r.end - r.start
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn error(&self, i: usize) -> Option<&str> {
|
pub fn error(&self, i: usize) -> Option<&str> {
|
||||||
|
@ -96,6 +118,10 @@ impl<'a> LexedStr<'a> {
|
||||||
Some(self.error[err].msg.as_str())
|
Some(self.error[err].msg.as_str())
|
||||||
}
|
}
|
||||||
|
|
||||||
|
pub fn errors(&self) -> impl Iterator<Item = (usize, &str)> + '_ {
|
||||||
|
self.error.iter().map(|it| (it.token as usize, it.msg.as_str()))
|
||||||
|
}
|
||||||
|
|
||||||
pub fn to_tokens(&self) -> crate::Tokens {
|
pub fn to_tokens(&self) -> crate::Tokens {
|
||||||
let mut res = crate::Tokens::default();
|
let mut res = crate::Tokens::default();
|
||||||
let mut was_joint = false;
|
let mut was_joint = false;
|
||||||
|
|
|
@ -48,7 +48,6 @@ use text_edit::Indel;
|
||||||
|
|
||||||
pub use crate::{
|
pub use crate::{
|
||||||
ast::{AstNode, AstToken},
|
ast::{AstNode, AstToken},
|
||||||
parsing::lexer::{lex_single_syntax_kind, tokenize, Token},
|
|
||||||
ptr::{AstPtr, SyntaxNodePtr},
|
ptr::{AstPtr, SyntaxNodePtr},
|
||||||
syntax_error::SyntaxError,
|
syntax_error::SyntaxError,
|
||||||
syntax_node::{
|
syntax_node::{
|
||||||
|
|
|
@ -1,7 +1,6 @@
|
||||||
//! Lexing, bridging to parser (which does the actual parsing) and
|
//! Lexing, bridging to parser (which does the actual parsing) and
|
||||||
//! incremental reparsing.
|
//! incremental reparsing.
|
||||||
|
|
||||||
pub(crate) mod lexer;
|
|
||||||
mod text_tree_sink;
|
mod text_tree_sink;
|
||||||
mod reparsing;
|
mod reparsing;
|
||||||
|
|
||||||
|
@ -10,18 +9,17 @@ use text_tree_sink::TextTreeSink;
|
||||||
|
|
||||||
use crate::{syntax_node::GreenNode, AstNode, SyntaxError, SyntaxNode};
|
use crate::{syntax_node::GreenNode, AstNode, SyntaxError, SyntaxNode};
|
||||||
|
|
||||||
pub(crate) use crate::parsing::{lexer::*, reparsing::incremental_reparse};
|
pub(crate) use crate::parsing::reparsing::incremental_reparse;
|
||||||
|
|
||||||
pub(crate) fn parse_text(text: &str) -> (GreenNode, Vec<SyntaxError>) {
|
pub(crate) fn parse_text(text: &str) -> (GreenNode, Vec<SyntaxError>) {
|
||||||
let (lexer_tokens, lexer_errors) = tokenize(text);
|
let lexed = parser::LexedStr::new(text);
|
||||||
let parser_tokens = to_parser_tokens(text, &lexer_tokens);
|
let parser_tokens = lexed.to_tokens();
|
||||||
|
|
||||||
let mut tree_sink = TextTreeSink::new(text, &lexer_tokens);
|
let mut tree_sink = TextTreeSink::new(lexed);
|
||||||
|
|
||||||
parser::parse_source_file(&parser_tokens, &mut tree_sink);
|
parser::parse_source_file(&parser_tokens, &mut tree_sink);
|
||||||
|
|
||||||
let (tree, mut parser_errors) = tree_sink.finish();
|
let (tree, parser_errors) = tree_sink.finish();
|
||||||
parser_errors.extend(lexer_errors);
|
|
||||||
|
|
||||||
(tree, parser_errors)
|
(tree, parser_errors)
|
||||||
}
|
}
|
||||||
|
@ -31,14 +29,13 @@ pub(crate) fn parse_text_as<T: AstNode>(
|
||||||
text: &str,
|
text: &str,
|
||||||
entry_point: parser::ParserEntryPoint,
|
entry_point: parser::ParserEntryPoint,
|
||||||
) -> Result<T, ()> {
|
) -> Result<T, ()> {
|
||||||
let (lexer_tokens, lexer_errors) = tokenize(text);
|
let lexed = parser::LexedStr::new(text);
|
||||||
if !lexer_errors.is_empty() {
|
if lexed.errors().next().is_some() {
|
||||||
return Err(());
|
return Err(());
|
||||||
}
|
}
|
||||||
|
let parser_tokens = lexed.to_tokens();
|
||||||
|
|
||||||
let parser_tokens = to_parser_tokens(text, &lexer_tokens);
|
let mut tree_sink = TextTreeSink::new(lexed);
|
||||||
|
|
||||||
let mut tree_sink = TextTreeSink::new(text, &lexer_tokens);
|
|
||||||
|
|
||||||
// TextTreeSink assumes that there's at least some root node to which it can attach errors and
|
// TextTreeSink assumes that there's at least some root node to which it can attach errors and
|
||||||
// tokens. We arbitrarily give it a SourceFile.
|
// tokens. We arbitrarily give it a SourceFile.
|
||||||
|
@ -54,29 +51,3 @@ pub(crate) fn parse_text_as<T: AstNode>(
|
||||||
|
|
||||||
SyntaxNode::new_root(tree).first_child().and_then(T::cast).ok_or(())
|
SyntaxNode::new_root(tree).first_child().and_then(T::cast).ok_or(())
|
||||||
}
|
}
|
||||||
|
|
||||||
pub(crate) fn to_parser_tokens(text: &str, lexer_tokens: &[lexer::Token]) -> ::parser::Tokens {
|
|
||||||
let mut off = 0;
|
|
||||||
let mut res = parser::Tokens::default();
|
|
||||||
let mut was_joint = false;
|
|
||||||
for t in lexer_tokens {
|
|
||||||
if t.kind.is_trivia() {
|
|
||||||
was_joint = false;
|
|
||||||
} else {
|
|
||||||
if t.kind == SyntaxKind::IDENT {
|
|
||||||
let token_text = &text[off..][..usize::from(t.len)];
|
|
||||||
let contextual_kw =
|
|
||||||
SyntaxKind::from_contextual_keyword(token_text).unwrap_or(SyntaxKind::IDENT);
|
|
||||||
res.push_ident(contextual_kw);
|
|
||||||
} else {
|
|
||||||
if was_joint {
|
|
||||||
res.was_joint();
|
|
||||||
}
|
|
||||||
res.push(t.kind);
|
|
||||||
}
|
|
||||||
was_joint = true;
|
|
||||||
}
|
|
||||||
off += usize::from(t.len);
|
|
||||||
}
|
|
||||||
res
|
|
||||||
}
|
|
||||||
|
|
|
@ -1,249 +0,0 @@
|
||||||
//! Lexer analyzes raw input string and produces lexemes (tokens).
|
|
||||||
//! It is just a bridge to `rustc_lexer`.
|
|
||||||
|
|
||||||
use std::convert::TryInto;
|
|
||||||
|
|
||||||
use rustc_lexer::RawStrError;
|
|
||||||
|
|
||||||
use crate::{
|
|
||||||
SyntaxError,
|
|
||||||
SyntaxKind::{self, *},
|
|
||||||
TextRange, TextSize, T,
|
|
||||||
};
|
|
||||||
|
|
||||||
/// A token of Rust source.
|
|
||||||
#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)]
|
|
||||||
pub struct Token {
|
|
||||||
/// The kind of token.
|
|
||||||
pub kind: SyntaxKind,
|
|
||||||
/// The length of the token.
|
|
||||||
pub len: TextSize,
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Break a string up into its component tokens.
|
|
||||||
/// Beware that it checks for shebang first and its length contributes to resulting
|
|
||||||
/// tokens offsets.
|
|
||||||
pub fn tokenize(text: &str) -> (Vec<Token>, Vec<SyntaxError>) {
|
|
||||||
// non-empty string is a precondition of `rustc_lexer::strip_shebang()`.
|
|
||||||
if text.is_empty() {
|
|
||||||
return Default::default();
|
|
||||||
}
|
|
||||||
|
|
||||||
let mut tokens = Vec::new();
|
|
||||||
let mut errors = Vec::new();
|
|
||||||
|
|
||||||
let mut offset = match rustc_lexer::strip_shebang(text) {
|
|
||||||
Some(shebang_len) => {
|
|
||||||
tokens.push(Token { kind: SHEBANG, len: shebang_len.try_into().unwrap() });
|
|
||||||
shebang_len
|
|
||||||
}
|
|
||||||
None => 0,
|
|
||||||
};
|
|
||||||
|
|
||||||
let text_without_shebang = &text[offset..];
|
|
||||||
|
|
||||||
for rustc_token in rustc_lexer::tokenize(text_without_shebang) {
|
|
||||||
let token_len: TextSize = rustc_token.len.try_into().unwrap();
|
|
||||||
let token_range = TextRange::at(offset.try_into().unwrap(), token_len);
|
|
||||||
|
|
||||||
let (syntax_kind, err_message) =
|
|
||||||
rustc_token_kind_to_syntax_kind(&rustc_token.kind, &text[token_range]);
|
|
||||||
|
|
||||||
tokens.push(Token { kind: syntax_kind, len: token_len });
|
|
||||||
|
|
||||||
if let Some(err_message) = err_message {
|
|
||||||
errors.push(SyntaxError::new(err_message, token_range));
|
|
||||||
}
|
|
||||||
|
|
||||||
offset += rustc_token.len;
|
|
||||||
}
|
|
||||||
|
|
||||||
(tokens, errors)
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Returns `SyntaxKind` and `Option<SyntaxError>` if `text` parses as a single token.
|
|
||||||
///
|
|
||||||
/// Returns `None` if the string contains zero *or two or more* tokens.
|
|
||||||
/// The token is malformed if the returned error is not `None`.
|
|
||||||
///
|
|
||||||
/// Beware that unescape errors are not checked at tokenization time.
|
|
||||||
pub fn lex_single_syntax_kind(text: &str) -> Option<(SyntaxKind, Option<SyntaxError>)> {
|
|
||||||
let (first_token, err) = lex_first_token(text)?;
|
|
||||||
if first_token.len != TextSize::of(text) {
|
|
||||||
return None;
|
|
||||||
}
|
|
||||||
Some((first_token.kind, err))
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Returns `SyntaxKind` and `Option<SyntaxError>` of the first token
|
|
||||||
/// encountered at the beginning of the string.
|
|
||||||
///
|
|
||||||
/// Returns `None` if the string contains zero tokens or if the token was parsed
|
|
||||||
/// with an error.
|
|
||||||
/// The token is malformed if the returned error is not `None`.
|
|
||||||
///
|
|
||||||
/// Beware that unescape errors are not checked at tokenization time.
|
|
||||||
fn lex_first_token(text: &str) -> Option<(Token, Option<SyntaxError>)> {
|
|
||||||
// non-empty string is a precondition of `rustc_lexer::first_token()`.
|
|
||||||
if text.is_empty() {
|
|
||||||
return None;
|
|
||||||
}
|
|
||||||
|
|
||||||
let rustc_token = rustc_lexer::first_token(text);
|
|
||||||
let (syntax_kind, err_message) = rustc_token_kind_to_syntax_kind(&rustc_token.kind, text);
|
|
||||||
|
|
||||||
let token = Token { kind: syntax_kind, len: rustc_token.len.try_into().unwrap() };
|
|
||||||
let optional_error = err_message
|
|
||||||
.map(|err_message| SyntaxError::new(err_message, TextRange::up_to(TextSize::of(text))));
|
|
||||||
|
|
||||||
Some((token, optional_error))
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Returns `SyntaxKind` and an optional tokenize error message.
|
|
||||||
fn rustc_token_kind_to_syntax_kind(
|
|
||||||
rustc_token_kind: &rustc_lexer::TokenKind,
|
|
||||||
token_text: &str,
|
|
||||||
) -> (SyntaxKind, Option<&'static str>) {
|
|
||||||
// A note on an intended tradeoff:
|
|
||||||
// We drop some useful information here (see patterns with double dots `..`)
|
|
||||||
// Storing that info in `SyntaxKind` is not possible due to its layout requirements of
|
|
||||||
// being `u16` that come from `rowan::SyntaxKind`.
|
|
||||||
|
|
||||||
let syntax_kind = {
|
|
||||||
match rustc_token_kind {
|
|
||||||
rustc_lexer::TokenKind::LineComment { doc_style: _ } => COMMENT,
|
|
||||||
|
|
||||||
rustc_lexer::TokenKind::BlockComment { doc_style: _, terminated: true } => COMMENT,
|
|
||||||
rustc_lexer::TokenKind::BlockComment { doc_style: _, terminated: false } => {
|
|
||||||
return (
|
|
||||||
COMMENT,
|
|
||||||
Some("Missing trailing `*/` symbols to terminate the block comment"),
|
|
||||||
);
|
|
||||||
}
|
|
||||||
|
|
||||||
rustc_lexer::TokenKind::Whitespace => WHITESPACE,
|
|
||||||
|
|
||||||
rustc_lexer::TokenKind::Ident => {
|
|
||||||
if token_text == "_" {
|
|
||||||
UNDERSCORE
|
|
||||||
} else {
|
|
||||||
SyntaxKind::from_keyword(token_text).unwrap_or(IDENT)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
rustc_lexer::TokenKind::RawIdent => IDENT,
|
|
||||||
rustc_lexer::TokenKind::Literal { kind, .. } => return match_literal_kind(kind),
|
|
||||||
|
|
||||||
rustc_lexer::TokenKind::Lifetime { starts_with_number: false } => LIFETIME_IDENT,
|
|
||||||
rustc_lexer::TokenKind::Lifetime { starts_with_number: true } => {
|
|
||||||
return (LIFETIME_IDENT, Some("Lifetime name cannot start with a number"))
|
|
||||||
}
|
|
||||||
|
|
||||||
rustc_lexer::TokenKind::Semi => T![;],
|
|
||||||
rustc_lexer::TokenKind::Comma => T![,],
|
|
||||||
rustc_lexer::TokenKind::Dot => T![.],
|
|
||||||
rustc_lexer::TokenKind::OpenParen => T!['('],
|
|
||||||
rustc_lexer::TokenKind::CloseParen => T![')'],
|
|
||||||
rustc_lexer::TokenKind::OpenBrace => T!['{'],
|
|
||||||
rustc_lexer::TokenKind::CloseBrace => T!['}'],
|
|
||||||
rustc_lexer::TokenKind::OpenBracket => T!['['],
|
|
||||||
rustc_lexer::TokenKind::CloseBracket => T![']'],
|
|
||||||
rustc_lexer::TokenKind::At => T![@],
|
|
||||||
rustc_lexer::TokenKind::Pound => T![#],
|
|
||||||
rustc_lexer::TokenKind::Tilde => T![~],
|
|
||||||
rustc_lexer::TokenKind::Question => T![?],
|
|
||||||
rustc_lexer::TokenKind::Colon => T![:],
|
|
||||||
rustc_lexer::TokenKind::Dollar => T![$],
|
|
||||||
rustc_lexer::TokenKind::Eq => T![=],
|
|
||||||
rustc_lexer::TokenKind::Bang => T![!],
|
|
||||||
rustc_lexer::TokenKind::Lt => T![<],
|
|
||||||
rustc_lexer::TokenKind::Gt => T![>],
|
|
||||||
rustc_lexer::TokenKind::Minus => T![-],
|
|
||||||
rustc_lexer::TokenKind::And => T![&],
|
|
||||||
rustc_lexer::TokenKind::Or => T![|],
|
|
||||||
rustc_lexer::TokenKind::Plus => T![+],
|
|
||||||
rustc_lexer::TokenKind::Star => T![*],
|
|
||||||
rustc_lexer::TokenKind::Slash => T![/],
|
|
||||||
rustc_lexer::TokenKind::Caret => T![^],
|
|
||||||
rustc_lexer::TokenKind::Percent => T![%],
|
|
||||||
rustc_lexer::TokenKind::Unknown => ERROR,
|
|
||||||
}
|
|
||||||
};
|
|
||||||
|
|
||||||
return (syntax_kind, None);
|
|
||||||
|
|
||||||
fn match_literal_kind(kind: &rustc_lexer::LiteralKind) -> (SyntaxKind, Option<&'static str>) {
|
|
||||||
let mut err = "";
|
|
||||||
let syntax_kind = match *kind {
|
|
||||||
rustc_lexer::LiteralKind::Int { empty_int, base: _ } => {
|
|
||||||
if empty_int {
|
|
||||||
err = "Missing digits after the integer base prefix";
|
|
||||||
}
|
|
||||||
INT_NUMBER
|
|
||||||
}
|
|
||||||
rustc_lexer::LiteralKind::Float { empty_exponent, base: _ } => {
|
|
||||||
if empty_exponent {
|
|
||||||
err = "Missing digits after the exponent symbol";
|
|
||||||
}
|
|
||||||
FLOAT_NUMBER
|
|
||||||
}
|
|
||||||
rustc_lexer::LiteralKind::Char { terminated } => {
|
|
||||||
if !terminated {
|
|
||||||
err = "Missing trailing `'` symbol to terminate the character literal";
|
|
||||||
}
|
|
||||||
CHAR
|
|
||||||
}
|
|
||||||
rustc_lexer::LiteralKind::Byte { terminated } => {
|
|
||||||
if !terminated {
|
|
||||||
err = "Missing trailing `'` symbol to terminate the byte literal";
|
|
||||||
}
|
|
||||||
BYTE
|
|
||||||
}
|
|
||||||
rustc_lexer::LiteralKind::Str { terminated } => {
|
|
||||||
if !terminated {
|
|
||||||
err = "Missing trailing `\"` symbol to terminate the string literal";
|
|
||||||
}
|
|
||||||
STRING
|
|
||||||
}
|
|
||||||
rustc_lexer::LiteralKind::ByteStr { terminated } => {
|
|
||||||
if !terminated {
|
|
||||||
err = "Missing trailing `\"` symbol to terminate the byte string literal";
|
|
||||||
}
|
|
||||||
BYTE_STRING
|
|
||||||
}
|
|
||||||
rustc_lexer::LiteralKind::RawStr { err: raw_str_err, .. } => {
|
|
||||||
if let Some(raw_str_err) = raw_str_err {
|
|
||||||
err = match raw_str_err {
|
|
||||||
RawStrError::InvalidStarter { .. } => "Missing `\"` symbol after `#` symbols to begin the raw string literal",
|
|
||||||
RawStrError::NoTerminator { expected, found, .. } => if expected == found {
|
|
||||||
"Missing trailing `\"` to terminate the raw string literal"
|
|
||||||
} else {
|
|
||||||
"Missing trailing `\"` with `#` symbols to terminate the raw string literal"
|
|
||||||
},
|
|
||||||
RawStrError::TooManyDelimiters { .. } => "Too many `#` symbols: raw strings may be delimited by up to 65535 `#` symbols",
|
|
||||||
};
|
|
||||||
};
|
|
||||||
STRING
|
|
||||||
}
|
|
||||||
rustc_lexer::LiteralKind::RawByteStr { err: raw_str_err, .. } => {
|
|
||||||
if let Some(raw_str_err) = raw_str_err {
|
|
||||||
err = match raw_str_err {
|
|
||||||
RawStrError::InvalidStarter { .. } => "Missing `\"` symbol after `#` symbols to begin the raw byte string literal",
|
|
||||||
RawStrError::NoTerminator { expected, found, .. } => if expected == found {
|
|
||||||
"Missing trailing `\"` to terminate the raw byte string literal"
|
|
||||||
} else {
|
|
||||||
"Missing trailing `\"` with `#` symbols to terminate the raw byte string literal"
|
|
||||||
},
|
|
||||||
RawStrError::TooManyDelimiters { .. } => "Too many `#` symbols: raw byte strings may be delimited by up to 65535 `#` symbols",
|
|
||||||
};
|
|
||||||
};
|
|
||||||
|
|
||||||
BYTE_STRING
|
|
||||||
}
|
|
||||||
};
|
|
||||||
|
|
||||||
let err = if err.is_empty() { None } else { Some(err) };
|
|
||||||
|
|
||||||
(syntax_kind, err)
|
|
||||||
}
|
|
||||||
}
|
|
|
@ -10,11 +10,7 @@ use parser::Reparser;
|
||||||
use text_edit::Indel;
|
use text_edit::Indel;
|
||||||
|
|
||||||
use crate::{
|
use crate::{
|
||||||
parsing::{
|
parsing::text_tree_sink::TextTreeSink,
|
||||||
lexer::{lex_single_syntax_kind, tokenize, Token},
|
|
||||||
text_tree_sink::TextTreeSink,
|
|
||||||
to_parser_tokens,
|
|
||||||
},
|
|
||||||
syntax_node::{GreenNode, GreenToken, NodeOrToken, SyntaxElement, SyntaxNode},
|
syntax_node::{GreenNode, GreenToken, NodeOrToken, SyntaxElement, SyntaxNode},
|
||||||
SyntaxError,
|
SyntaxError,
|
||||||
SyntaxKind::*,
|
SyntaxKind::*,
|
||||||
|
@ -53,7 +49,7 @@ fn reparse_token(
|
||||||
}
|
}
|
||||||
|
|
||||||
let mut new_text = get_text_after_edit(prev_token.clone().into(), edit);
|
let mut new_text = get_text_after_edit(prev_token.clone().into(), edit);
|
||||||
let (new_token_kind, new_err) = lex_single_syntax_kind(&new_text)?;
|
let (new_token_kind, new_err) = parser::LexedStr::single_token(&new_text)?;
|
||||||
|
|
||||||
if new_token_kind != prev_token_kind
|
if new_token_kind != prev_token_kind
|
||||||
|| (new_token_kind == IDENT && is_contextual_kw(&new_text))
|
|| (new_token_kind == IDENT && is_contextual_kw(&new_text))
|
||||||
|
@ -66,7 +62,7 @@ fn reparse_token(
|
||||||
// `b` no longer remains an identifier, but becomes a part of byte string literal
|
// `b` no longer remains an identifier, but becomes a part of byte string literal
|
||||||
if let Some(next_char) = root.text().char_at(prev_token.text_range().end()) {
|
if let Some(next_char) = root.text().char_at(prev_token.text_range().end()) {
|
||||||
new_text.push(next_char);
|
new_text.push(next_char);
|
||||||
let token_with_next_char = lex_single_syntax_kind(&new_text);
|
let token_with_next_char = parser::LexedStr::single_token(&new_text);
|
||||||
if let Some((_kind, _error)) = token_with_next_char {
|
if let Some((_kind, _error)) = token_with_next_char {
|
||||||
return None;
|
return None;
|
||||||
}
|
}
|
||||||
|
@ -74,9 +70,10 @@ fn reparse_token(
|
||||||
}
|
}
|
||||||
|
|
||||||
let new_token = GreenToken::new(rowan::SyntaxKind(prev_token_kind.into()), &new_text);
|
let new_token = GreenToken::new(rowan::SyntaxKind(prev_token_kind.into()), &new_text);
|
||||||
|
let range = TextRange::up_to(TextSize::of(&new_text));
|
||||||
Some((
|
Some((
|
||||||
prev_token.replace_with(new_token),
|
prev_token.replace_with(new_token),
|
||||||
new_err.into_iter().collect(),
|
new_err.into_iter().map(|msg| SyntaxError::new(msg, range)).collect(),
|
||||||
prev_token.text_range(),
|
prev_token.text_range(),
|
||||||
))
|
))
|
||||||
}
|
}
|
||||||
|
@ -91,17 +88,17 @@ fn reparse_block(
|
||||||
let (node, reparser) = find_reparsable_node(root, edit.delete)?;
|
let (node, reparser) = find_reparsable_node(root, edit.delete)?;
|
||||||
let text = get_text_after_edit(node.clone().into(), edit);
|
let text = get_text_after_edit(node.clone().into(), edit);
|
||||||
|
|
||||||
let (lexer_tokens, new_lexer_errors) = tokenize(&text);
|
let lexed = parser::LexedStr::new(text.as_str());
|
||||||
if !is_balanced(&lexer_tokens) {
|
let parser_tokens = lexed.to_tokens();
|
||||||
|
if !is_balanced(&lexed) {
|
||||||
return None;
|
return None;
|
||||||
}
|
}
|
||||||
let parser_tokens = to_parser_tokens(&text, &lexer_tokens);
|
|
||||||
|
|
||||||
let mut tree_sink = TextTreeSink::new(&text, &lexer_tokens);
|
let mut tree_sink = TextTreeSink::new(lexed);
|
||||||
|
|
||||||
reparser.parse(&parser_tokens, &mut tree_sink);
|
reparser.parse(&parser_tokens, &mut tree_sink);
|
||||||
|
|
||||||
let (green, mut new_parser_errors) = tree_sink.finish();
|
let (green, new_parser_errors) = tree_sink.finish();
|
||||||
new_parser_errors.extend(new_lexer_errors);
|
|
||||||
|
|
||||||
Some((node.replace_with(green), new_parser_errors, node.text_range()))
|
Some((node.replace_with(green), new_parser_errors, node.text_range()))
|
||||||
}
|
}
|
||||||
|
@ -131,16 +128,13 @@ fn find_reparsable_node(node: &SyntaxNode, range: TextRange) -> Option<(SyntaxNo
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
|
|
||||||
fn is_balanced(tokens: &[Token]) -> bool {
|
fn is_balanced(lexed: &parser::LexedStr<'_>) -> bool {
|
||||||
if tokens.is_empty()
|
if lexed.is_empty() || lexed.kind(0) != T!['{'] || lexed.kind(lexed.len() - 1) != T!['}'] {
|
||||||
|| tokens.first().unwrap().kind != T!['{']
|
|
||||||
|| tokens.last().unwrap().kind != T!['}']
|
|
||||||
{
|
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
let mut balance = 0usize;
|
let mut balance = 0usize;
|
||||||
for t in &tokens[1..tokens.len() - 1] {
|
for i in 1..lexed.len() - 1 {
|
||||||
match t.kind {
|
match lexed.kind(i) {
|
||||||
T!['{'] => balance += 1,
|
T!['{'] => balance += 1,
|
||||||
T!['}'] => {
|
T!['}'] => {
|
||||||
balance = match balance.checked_sub(1) {
|
balance = match balance.checked_sub(1) {
|
||||||
|
|
|
@ -2,25 +2,22 @@
|
||||||
|
|
||||||
use std::mem;
|
use std::mem;
|
||||||
|
|
||||||
use parser::{ParseError, TreeSink};
|
use parser::{LexedStr, ParseError, TreeSink};
|
||||||
|
|
||||||
use crate::{
|
use crate::{
|
||||||
ast,
|
ast,
|
||||||
parsing::Token,
|
|
||||||
syntax_node::GreenNode,
|
syntax_node::GreenNode,
|
||||||
SyntaxError,
|
SyntaxError,
|
||||||
SyntaxKind::{self, *},
|
SyntaxKind::{self, *},
|
||||||
SyntaxTreeBuilder, TextRange, TextSize,
|
SyntaxTreeBuilder, TextRange,
|
||||||
};
|
};
|
||||||
|
|
||||||
/// Bridges the parser with our specific syntax tree representation.
|
/// Bridges the parser with our specific syntax tree representation.
|
||||||
///
|
///
|
||||||
/// `TextTreeSink` also handles attachment of trivia (whitespace) to nodes.
|
/// `TextTreeSink` also handles attachment of trivia (whitespace) to nodes.
|
||||||
pub(crate) struct TextTreeSink<'a> {
|
pub(crate) struct TextTreeSink<'a> {
|
||||||
text: &'a str,
|
lexed: LexedStr<'a>,
|
||||||
tokens: &'a [Token],
|
pos: usize,
|
||||||
text_pos: TextSize,
|
|
||||||
token_pos: usize,
|
|
||||||
state: State,
|
state: State,
|
||||||
inner: SyntaxTreeBuilder,
|
inner: SyntaxTreeBuilder,
|
||||||
}
|
}
|
||||||
|
@ -39,12 +36,7 @@ impl<'a> TreeSink for TextTreeSink<'a> {
|
||||||
State::Normal => (),
|
State::Normal => (),
|
||||||
}
|
}
|
||||||
self.eat_trivias();
|
self.eat_trivias();
|
||||||
let n_tokens = n_tokens as usize;
|
self.do_token(kind, n_tokens as usize);
|
||||||
let len = self.tokens[self.token_pos..self.token_pos + n_tokens]
|
|
||||||
.iter()
|
|
||||||
.map(|it| it.len)
|
|
||||||
.sum::<TextSize>();
|
|
||||||
self.do_token(kind, len, n_tokens);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
fn start_node(&mut self, kind: SyntaxKind) {
|
fn start_node(&mut self, kind: SyntaxKind) {
|
||||||
|
@ -60,20 +52,12 @@ impl<'a> TreeSink for TextTreeSink<'a> {
|
||||||
}
|
}
|
||||||
|
|
||||||
let n_trivias =
|
let n_trivias =
|
||||||
self.tokens[self.token_pos..].iter().take_while(|it| it.kind.is_trivia()).count();
|
(self.pos..self.lexed.len()).take_while(|&it| self.lexed.kind(it).is_trivia()).count();
|
||||||
let leading_trivias = &self.tokens[self.token_pos..self.token_pos + n_trivias];
|
let leading_trivias = self.pos..self.pos + n_trivias;
|
||||||
let mut trivia_end =
|
let n_attached_trivias = n_attached_trivias(
|
||||||
self.text_pos + leading_trivias.iter().map(|it| it.len).sum::<TextSize>();
|
kind,
|
||||||
|
leading_trivias.rev().map(|it| (self.lexed.kind(it), self.lexed.text(it))),
|
||||||
let n_attached_trivias = {
|
);
|
||||||
let leading_trivias = leading_trivias.iter().rev().map(|it| {
|
|
||||||
let next_end = trivia_end - it.len;
|
|
||||||
let range = TextRange::new(next_end, trivia_end);
|
|
||||||
trivia_end = next_end;
|
|
||||||
(it.kind, &self.text[range])
|
|
||||||
});
|
|
||||||
n_attached_trivias(kind, leading_trivias)
|
|
||||||
};
|
|
||||||
self.eat_n_trivias(n_trivias - n_attached_trivias);
|
self.eat_n_trivias(n_trivias - n_attached_trivias);
|
||||||
self.inner.start_node(kind);
|
self.inner.start_node(kind);
|
||||||
self.eat_n_trivias(n_attached_trivias);
|
self.eat_n_trivias(n_attached_trivias);
|
||||||
|
@ -88,20 +72,14 @@ impl<'a> TreeSink for TextTreeSink<'a> {
|
||||||
}
|
}
|
||||||
|
|
||||||
fn error(&mut self, error: ParseError) {
|
fn error(&mut self, error: ParseError) {
|
||||||
self.inner.error(error, self.text_pos);
|
let text_pos = self.lexed.text_start(self.pos).try_into().unwrap();
|
||||||
|
self.inner.error(error, text_pos);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
impl<'a> TextTreeSink<'a> {
|
impl<'a> TextTreeSink<'a> {
|
||||||
pub(super) fn new(text: &'a str, tokens: &'a [Token]) -> Self {
|
pub(super) fn new(lexed: parser::LexedStr<'a>) -> Self {
|
||||||
Self {
|
Self { lexed, pos: 0, state: State::PendingStart, inner: SyntaxTreeBuilder::default() }
|
||||||
text,
|
|
||||||
tokens,
|
|
||||||
text_pos: 0.into(),
|
|
||||||
token_pos: 0,
|
|
||||||
state: State::PendingStart,
|
|
||||||
inner: SyntaxTreeBuilder::default(),
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
pub(super) fn finish_eof(mut self) -> (GreenNode, Vec<SyntaxError>, bool) {
|
pub(super) fn finish_eof(mut self) -> (GreenNode, Vec<SyntaxError>, bool) {
|
||||||
|
@ -113,8 +91,17 @@ impl<'a> TextTreeSink<'a> {
|
||||||
State::PendingStart | State::Normal => unreachable!(),
|
State::PendingStart | State::Normal => unreachable!(),
|
||||||
}
|
}
|
||||||
|
|
||||||
let (node, errors) = self.inner.finish_raw();
|
let (node, mut errors) = self.inner.finish_raw();
|
||||||
let is_eof = self.token_pos == self.tokens.len();
|
for (i, err) in self.lexed.errors() {
|
||||||
|
let text_range = self.lexed.text_range(i);
|
||||||
|
let text_range = TextRange::new(
|
||||||
|
text_range.start.try_into().unwrap(),
|
||||||
|
text_range.end.try_into().unwrap(),
|
||||||
|
);
|
||||||
|
errors.push(SyntaxError::new(err, text_range))
|
||||||
|
}
|
||||||
|
|
||||||
|
let is_eof = self.pos == self.lexed.len();
|
||||||
|
|
||||||
(node, errors, is_eof)
|
(node, errors, is_eof)
|
||||||
}
|
}
|
||||||
|
@ -125,27 +112,26 @@ impl<'a> TextTreeSink<'a> {
|
||||||
}
|
}
|
||||||
|
|
||||||
fn eat_trivias(&mut self) {
|
fn eat_trivias(&mut self) {
|
||||||
while let Some(&token) = self.tokens.get(self.token_pos) {
|
while self.pos < self.lexed.len() {
|
||||||
if !token.kind.is_trivia() {
|
let kind = self.lexed.kind(self.pos);
|
||||||
|
if !kind.is_trivia() {
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
self.do_token(token.kind, token.len, 1);
|
self.do_token(kind, 1);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
fn eat_n_trivias(&mut self, n: usize) {
|
fn eat_n_trivias(&mut self, n: usize) {
|
||||||
for _ in 0..n {
|
for _ in 0..n {
|
||||||
let token = self.tokens[self.token_pos];
|
let kind = self.lexed.kind(self.pos);
|
||||||
assert!(token.kind.is_trivia());
|
assert!(kind.is_trivia());
|
||||||
self.do_token(token.kind, token.len, 1);
|
self.do_token(kind, 1);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
fn do_token(&mut self, kind: SyntaxKind, len: TextSize, n_tokens: usize) {
|
fn do_token(&mut self, kind: SyntaxKind, n_tokens: usize) {
|
||||||
let range = TextRange::at(self.text_pos, len);
|
let text = &self.lexed.range_text(self.pos..self.pos + n_tokens);
|
||||||
let text = &self.text[range];
|
self.pos += n_tokens;
|
||||||
self.text_pos += len;
|
|
||||||
self.token_pos += n_tokens;
|
|
||||||
self.inner.token(kind, text);
|
self.inner.token(kind, text);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -3,7 +3,6 @@ mod sourcegen_ast;
|
||||||
mod ast_src;
|
mod ast_src;
|
||||||
|
|
||||||
use std::{
|
use std::{
|
||||||
fmt::Write,
|
|
||||||
fs,
|
fs,
|
||||||
path::{Path, PathBuf},
|
path::{Path, PathBuf},
|
||||||
};
|
};
|
||||||
|
@ -13,25 +12,7 @@ use expect_test::expect_file;
|
||||||
use rayon::prelude::*;
|
use rayon::prelude::*;
|
||||||
use test_utils::{bench, bench_fixture, project_root};
|
use test_utils::{bench, bench_fixture, project_root};
|
||||||
|
|
||||||
use crate::{ast, fuzz, tokenize, AstNode, SourceFile, SyntaxError, TextRange, TextSize, Token};
|
use crate::{ast, fuzz, AstNode, SourceFile, SyntaxError};
|
||||||
|
|
||||||
#[test]
|
|
||||||
fn lexer_tests() {
|
|
||||||
// FIXME:
|
|
||||||
// * Add tests for unicode escapes in byte-character and [raw]-byte-string literals
|
|
||||||
// * Add tests for unescape errors
|
|
||||||
|
|
||||||
dir_tests(&test_data_dir(), &["lexer/ok"], "txt", |text, path| {
|
|
||||||
let (tokens, errors) = tokenize(text);
|
|
||||||
assert_errors_are_absent(&errors, path);
|
|
||||||
dump_tokens_and_errors(&tokens, &errors, text)
|
|
||||||
});
|
|
||||||
dir_tests(&test_data_dir(), &["lexer/err"], "txt", |text, path| {
|
|
||||||
let (tokens, errors) = tokenize(text);
|
|
||||||
assert_errors_are_present(&errors, path);
|
|
||||||
dump_tokens_and_errors(&tokens, &errors, text)
|
|
||||||
});
|
|
||||||
}
|
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn parse_smoke_test() {
|
fn parse_smoke_test() {
|
||||||
|
@ -206,22 +187,6 @@ fn assert_errors_are_absent(errors: &[SyntaxError], path: &Path) {
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
|
|
||||||
fn dump_tokens_and_errors(tokens: &[Token], errors: &[SyntaxError], text: &str) -> String {
|
|
||||||
let mut acc = String::new();
|
|
||||||
let mut offset: TextSize = 0.into();
|
|
||||||
for token in tokens {
|
|
||||||
let token_len = token.len;
|
|
||||||
let token_text = &text[TextRange::at(offset, token.len)];
|
|
||||||
offset += token.len;
|
|
||||||
writeln!(acc, "{:?} {:?} {:?}", token.kind, token_len, token_text).unwrap();
|
|
||||||
}
|
|
||||||
for err in errors {
|
|
||||||
writeln!(acc, "> error{:?} token({:?}) msg({})", err.range(), &text[err.range()], err)
|
|
||||||
.unwrap();
|
|
||||||
}
|
|
||||||
acc
|
|
||||||
}
|
|
||||||
|
|
||||||
fn fragment_parser_dir_test<T, F>(ok_paths: &[&str], err_paths: &[&str], f: F)
|
fn fragment_parser_dir_test<T, F>(ok_paths: &[&str], err_paths: &[&str], f: F)
|
||||||
where
|
where
|
||||||
T: crate::AstNode,
|
T: crate::AstNode,
|
||||||
|
|
Loading…
Reference in a new issue