From 622c843a4c598befaf459e64e9f75f31b4886a5b Mon Sep 17 00:00:00 2001 From: Edwin Cheng Date: Sat, 21 Mar 2020 03:04:11 +0800 Subject: [PATCH] Add TokenConvertor trait --- crates/ra_mbe/src/syntax_bridge.rs | 396 ++++++++++++----------------- crates/ra_mbe/src/tests.rs | 6 +- 2 files changed, 159 insertions(+), 243 deletions(-) diff --git a/crates/ra_mbe/src/syntax_bridge.rs b/crates/ra_mbe/src/syntax_bridge.rs index 0678c37eeb..540afc87c7 100644 --- a/crates/ra_mbe/src/syntax_bridge.rs +++ b/crates/ra_mbe/src/syntax_bridge.rs @@ -3,12 +3,11 @@ use ra_parser::{FragmentKind, ParseError, TreeSink}; use ra_syntax::{ ast::{self, make::tokens::doc_comment}, - tokenize, AstToken, NodeOrToken, Parse, SmolStr, SyntaxKind, + tokenize, AstToken, Parse, SmolStr, SyntaxKind, SyntaxKind::*, - SyntaxNode, SyntaxTreeBuilder, TextRange, TextUnit, Token, T, + SyntaxNode, SyntaxToken, SyntaxTreeBuilder, TextRange, TextUnit, Token as RawToken, T, }; use rustc_hash::FxHashMap; -use std::iter::successors; use tt::buffer::{Cursor, TokenBuffer}; use crate::subtree_source::SubtreeTokenSource; @@ -50,10 +49,8 @@ pub fn ast_to_token_tree(ast: &impl ast::AstNode) -> Option<(tt::Subtree, TokenM /// will consume). pub fn syntax_node_to_token_tree(node: &SyntaxNode) -> Option<(tt::Subtree, TokenMap)> { let global_offset = node.text_range().start(); - let mut c = Convertor { - id_alloc: { TokenIdAlloc { map: TokenMap::default(), global_offset, next_id: 0 } }, - }; - let subtree = c.go(node)?; + let mut c = Convertor::new(node, global_offset); + let subtree = c.go()?; Some((subtree, c.id_alloc.map)) } @@ -237,16 +234,6 @@ impl TokenIdAlloc { token_id } - fn delim(&mut self, open_abs_range: TextRange, close_abs_range: TextRange) -> tt::TokenId { - let open_relative_range = open_abs_range - self.global_offset; - let close_relative_range = close_abs_range - self.global_offset; - let token_id = tt::TokenId(self.next_id); - self.next_id += 1; - - self.map.insert_delim(token_id, open_relative_range, close_relative_range); - token_id - } - fn open_delim(&mut self, open_abs_range: TextRange) -> tt::TokenId { let token_id = tt::TokenId(self.next_id); self.next_id += 1; @@ -264,15 +251,19 @@ struct RawConvertor<'a> { text: &'a str, offset: TextUnit, id_alloc: TokenIdAlloc, - inner: std::slice::Iter<'a, Token>, + inner: std::slice::Iter<'a, RawToken>, } trait SrcToken { - fn kind() -> SyntaxKind; + fn kind(&self) -> SyntaxKind; + + fn to_char(&self) -> Option; + + fn to_text(&self) -> SmolStr; } -trait TokenConvertor { - type Token : SrcToken; +trait TokenConvertor { + type Token: SrcToken; fn go(&mut self) -> Option { let mut subtree = tt::Subtree::default(); @@ -291,10 +282,6 @@ trait TokenConvertor { Some(subtree) } - fn bump(&mut self) -> Option<(Self::Token, TextRange)>; - - fn peek(&self) -> Option; - fn collect_leaf(&mut self, result: &mut Vec) { let (token, range) = match self.bump() { None => return, @@ -303,8 +290,7 @@ trait TokenConvertor { let k: SyntaxKind = token.kind(); if k == COMMENT { - let node = doc_comment(&self.text[range]); - if let Some(tokens) = convert_doc_comment(&node) { + if let Some(tokens) = self.convert_doc_comment(&token) { result.extend(tokens); } return; @@ -320,40 +306,39 @@ trait TokenConvertor { if let Some((kind, closed)) = delim { let mut subtree = tt::Subtree::default(); - let id = self.id_alloc.open_delim(range); + let id = self.id_alloc().open_delim(range); subtree.delimiter = Some(tt::Delimiter { kind, id }); - while self.peek().map(|it| it.kind != closed).unwrap_or(false) { + while self.peek().map(|it| it.kind() != closed).unwrap_or(false) { self.collect_leaf(&mut subtree.token_trees); } let last_range = match self.bump() { None => return, Some(it) => it.1, }; - self.id_alloc.close_delim(id, last_range); + self.id_alloc().close_delim(id, last_range); subtree.into() } else { let spacing = match self.peek() { Some(next) - if next.kind.is_trivia() - || next.kind == T!['['] - || next.kind == T!['{'] - || next.kind == T!['('] => + if next.kind().is_trivia() + || next.kind() == T!['['] + || next.kind() == T!['{'] + || next.kind() == T!['('] => { tt::Spacing::Alone } - Some(next) if next.kind.is_punct() => tt::Spacing::Joint, + Some(next) if next.kind().is_punct() => tt::Spacing::Joint, _ => tt::Spacing::Alone, }; - let char = - self.text[range].chars().next().expect("Token from lexer must be single char"); + let char = token.to_char().expect("Token from lexer must be single char"); - tt::Leaf::from(tt::Punct { char, spacing, id: self.id_alloc.alloc(range) }).into() + tt::Leaf::from(tt::Punct { char, spacing, id: self.id_alloc().alloc(range) }).into() } } else { macro_rules! make_leaf { ($i:ident) => { - tt::$i { id: self.id_alloc.alloc(range), text: self.text[range].into() }.into() + tt::$i { id: self.id_alloc().alloc(range), text: token.to_text() }.into() }; } let leaf: tt::Leaf = match k { @@ -367,237 +352,168 @@ trait TokenConvertor { leaf.into() }); } + + fn convert_doc_comment(&self, token: &Self::Token) -> Option>; + + fn bump(&mut self) -> Option<(Self::Token, TextRange)>; + + fn peek(&self) -> Option; + + fn id_alloc(&mut self) -> &mut TokenIdAlloc; } -impl RawConvertor<'_> { - fn go(&mut self) -> Option { - let mut subtree = tt::Subtree::default(); - subtree.delimiter = None; - while self.peek().is_some() { - self.collect_leaf(&mut subtree.token_trees); - } - if subtree.token_trees.is_empty() { - return None; - } - if subtree.token_trees.len() == 1 { - if let tt::TokenTree::Subtree(first) = &subtree.token_trees[0] { - return Some(first.clone()); - } - } - Some(subtree) +impl<'a> SrcToken for (RawToken, &'a str) { + fn kind(&self) -> SyntaxKind { + self.0.kind } - fn bump(&mut self) -> Option<(Token, TextRange)> { + fn to_char(&self) -> Option { + self.1.chars().next() + } + + fn to_text(&self) -> SmolStr { + self.1.into() + } +} + +impl RawConvertor<'_> {} + +impl<'a> TokenConvertor for RawConvertor<'a> { + type Token = (RawToken, &'a str); + + fn convert_doc_comment(&self, token: &Self::Token) -> Option> { + convert_doc_comment(&doc_comment(token.1)) + } + + fn bump(&mut self) -> Option<(Self::Token, TextRange)> { let token = self.inner.next()?; let range = TextRange::offset_len(self.offset, token.len); self.offset += token.len; - Some((*token, range)) + + Some(((*token, &self.text[range]), range)) } - fn peek(&self) -> Option { - self.inner.as_slice().get(0).cloned() + fn peek(&self) -> Option { + let token = self.inner.as_slice().get(0).cloned(); + + token.map(|it| { + let range = TextRange::offset_len(self.offset, it.len); + (it, &self.text[range]) + }) } - - fn collect_leaf(&mut self, result: &mut Vec) { - let (token, range) = match self.bump() { - None => return, - Some(it) => it, - }; - - let k: SyntaxKind = token.kind; - if k == COMMENT { - let node = doc_comment(&self.text[range]); - if let Some(tokens) = convert_doc_comment(&node) { - result.extend(tokens); - } - return; - } - - result.push(if k.is_punct() { - let delim = match k { - T!['('] => Some((tt::DelimiterKind::Parenthesis, T![')'])), - T!['{'] => Some((tt::DelimiterKind::Brace, T!['}'])), - T!['['] => Some((tt::DelimiterKind::Bracket, T![']'])), - _ => None, - }; - - if let Some((kind, closed)) = delim { - let mut subtree = tt::Subtree::default(); - let id = self.id_alloc.open_delim(range); - subtree.delimiter = Some(tt::Delimiter { kind, id }); - - while self.peek().map(|it| it.kind != closed).unwrap_or(false) { - self.collect_leaf(&mut subtree.token_trees); - } - let last_range = match self.bump() { - None => return, - Some(it) => it.1, - }; - self.id_alloc.close_delim(id, last_range); - subtree.into() - } else { - let spacing = match self.peek() { - Some(next) - if next.kind.is_trivia() - || next.kind == T!['['] - || next.kind == T!['{'] - || next.kind == T!['('] => - { - tt::Spacing::Alone - } - Some(next) if next.kind.is_punct() => tt::Spacing::Joint, - _ => tt::Spacing::Alone, - }; - let char = - self.text[range].chars().next().expect("Token from lexer must be single char"); - - tt::Leaf::from(tt::Punct { char, spacing, id: self.id_alloc.alloc(range) }).into() - } - } else { - macro_rules! make_leaf { - ($i:ident) => { - tt::$i { id: self.id_alloc.alloc(range), text: self.text[range].into() }.into() - }; - } - let leaf: tt::Leaf = match k { - T![true] | T![false] => make_leaf!(Literal), - IDENT | LIFETIME => make_leaf!(Ident), - k if k.is_keyword() => make_leaf!(Ident), - k if k.is_literal() => make_leaf!(Literal), - _ => return, - }; - - leaf.into() - }); + fn id_alloc(&mut self) -> &mut TokenIdAlloc { + &mut self.id_alloc } } -// FIXME: There are some duplicate logic between RawConvertor and Convertor -// It would be nice to refactor to converting SyntaxNode to ra_parser::Token and thus -// use RawConvertor directly. But performance-wise it may not be a good idea ? struct Convertor { id_alloc: TokenIdAlloc, + current: Option, + range: TextRange, + punct_offset: Option<(SyntaxToken, TextUnit)>, } impl Convertor { - fn go(&mut self, tt: &SyntaxNode) -> Option { - // This tree is empty - if tt.first_child_or_token().is_none() { - return Some(tt::Subtree { token_trees: vec![], delimiter: None }); + fn new(node: &SyntaxNode, global_offset: TextUnit) -> Convertor { + Convertor { + id_alloc: { TokenIdAlloc { map: TokenMap::default(), global_offset, next_id: 0 } }, + current: node.first_token(), + range: node.text_range(), + punct_offset: None, + } + } +} + +enum SynToken { + Ordiniary(SyntaxToken), + Punch(SyntaxToken, TextUnit), +} + +impl SynToken { + fn token(&self) -> &SyntaxToken { + match self { + SynToken::Ordiniary(it) => it, + SynToken::Punch(it, _) => it, + } + } +} + +impl SrcToken for SynToken { + fn kind(&self) -> SyntaxKind { + self.token().kind() + } + fn to_char(&self) -> Option { + match self { + SynToken::Ordiniary(_) => None, + SynToken::Punch(it, i) => it.text().chars().nth(i.to_usize()), + } + } + fn to_text(&self) -> SmolStr { + self.token().text().clone() + } +} + +impl TokenConvertor for Convertor { + type Token = SynToken; + fn convert_doc_comment(&self, token: &Self::Token) -> Option> { + convert_doc_comment(token.token()) + } + + fn bump(&mut self) -> Option<(Self::Token, TextRange)> { + let curr = self.current.clone()?; + if !curr.text_range().is_subrange(&self.range) { + return None; } - let first_child = tt.first_child_or_token()?; - let last_child = tt.last_child_or_token()?; - - // ignore trivial first_child and last_child - let first_child = successors(Some(first_child), |it| { - if it.kind().is_trivia() { - it.next_sibling_or_token() - } else { - None + if let Some((punct, offset)) = self.punct_offset.clone() { + if offset.to_usize() + 1 < punct.text().len() { + let offset = offset + TextUnit::from_usize(1); + let range = punct.text_range(); + self.punct_offset = Some((punct, offset)); + let range = TextRange::offset_len(range.start() + offset, TextUnit::from_usize(1)); + return Some((SynToken::Punch(curr, offset), range)); } - }) - .last() - .unwrap(); - if first_child.kind().is_trivia() { - return Some(tt::Subtree { token_trees: vec![], delimiter: None }); } - let last_child = successors(Some(last_child), |it| { - if it.kind().is_trivia() { - it.prev_sibling_or_token() - } else { - None - } - }) - .last() - .unwrap(); + self.current = curr.next_token(); - let (delimiter_kind, skip_first) = match (first_child.kind(), last_child.kind()) { - (T!['('], T![')']) => (Some(tt::DelimiterKind::Parenthesis), true), - (T!['{'], T!['}']) => (Some(tt::DelimiterKind::Brace), true), - (T!['['], T![']']) => (Some(tt::DelimiterKind::Bracket), true), - _ => (None, false), + let token = if curr.kind().is_punct() { + let range = curr.text_range(); + self.punct_offset = Some((curr.clone(), TextUnit::from_usize(0))); + (SynToken::Punch(curr, TextUnit::from_usize(0)), range) + } else { + self.punct_offset = None; + let range = curr.text_range(); + (SynToken::Ordiniary(curr), range) }; - let delimiter = delimiter_kind.map(|kind| tt::Delimiter { - kind, - id: self.id_alloc.delim(first_child.text_range(), last_child.text_range()), - }); - let mut token_trees = Vec::new(); - let mut child_iter = tt.children_with_tokens().skip(skip_first as usize).peekable(); + Some(token) + } - while let Some(child) = child_iter.next() { - if skip_first && (child == first_child || child == last_child) { - continue; - } - - match child { - NodeOrToken::Token(token) => { - if let Some(doc_tokens) = convert_doc_comment(&token) { - token_trees.extend(doc_tokens); - } else if token.kind().is_trivia() { - continue; - } else if token.kind().is_punct() { - // we need to pull apart joined punctuation tokens - let last_spacing = match child_iter.peek() { - Some(NodeOrToken::Token(token)) => { - if token.kind().is_punct() { - tt::Spacing::Joint - } else { - tt::Spacing::Alone - } - } - _ => tt::Spacing::Alone, - }; - let spacing_iter = std::iter::repeat(tt::Spacing::Joint) - .take(token.text().len() - 1) - .chain(std::iter::once(last_spacing)); - for (char, spacing) in token.text().chars().zip(spacing_iter) { - token_trees.push( - tt::Leaf::from(tt::Punct { - char, - spacing, - id: self.id_alloc.alloc(token.text_range()), - }) - .into(), - ); - } - } else { - macro_rules! make_leaf { - ($i:ident) => { - tt::$i { - id: self.id_alloc.alloc(token.text_range()), - text: token.text().clone(), - } - .into() - }; - } - - let child: tt::Leaf = match token.kind() { - T![true] | T![false] => make_leaf!(Literal), - IDENT | LIFETIME => make_leaf!(Ident), - k if k.is_keyword() => make_leaf!(Ident), - k if k.is_literal() => make_leaf!(Literal), - _ => return None, - }; - token_trees.push(child.into()); - } - } - NodeOrToken::Node(node) => { - let child_subtree = self.go(&node)?; - if child_subtree.delimiter.is_none() && node.kind() != SyntaxKind::TOKEN_TREE { - token_trees.extend(child_subtree.token_trees); - } else { - token_trees.push(child_subtree.into()); - } - } - }; + fn peek(&self) -> Option { + let curr = self.current.clone()?; + if !curr.text_range().is_subrange(&self.range) { + return None; } - let res = tt::Subtree { delimiter, token_trees }; - Some(res) + if let Some((punct, mut offset)) = self.punct_offset.clone() { + offset = offset + TextUnit::from_usize(1); + if offset.to_usize() < punct.text().len() { + return Some(SynToken::Punch(punct, offset)); + } + } + + let token = if curr.kind().is_punct() { + SynToken::Punch(curr, TextUnit::from_usize(0)) + } else { + SynToken::Ordiniary(curr) + }; + Some(token) + } + + fn id_alloc(&mut self) -> &mut TokenIdAlloc { + &mut self.id_alloc } } diff --git a/crates/ra_mbe/src/tests.rs b/crates/ra_mbe/src/tests.rs index 966af1d125..a3f242e495 100644 --- a/crates/ra_mbe/src/tests.rs +++ b/crates/ra_mbe/src/tests.rs @@ -1449,8 +1449,8 @@ impl MacroFixture { let macro_invocation = source_file.syntax().descendants().find_map(ast::MacroCall::cast).unwrap(); - let (invocation_tt, _) = - ast_to_token_tree(¯o_invocation.token_tree().unwrap()).unwrap(); + let (invocation_tt, _) = ast_to_token_tree(¯o_invocation.token_tree().unwrap()) + .ok_or_else(|| ExpandError::ConversionError)?; self.rules.expand(&invocation_tt).result() } @@ -1694,5 +1694,5 @@ fn test_expand_bad_literal() { macro_rules! foo { ($i:literal) => {}; } "#, ) - .assert_expand_err(r#"foo!(&k");"#, &ExpandError::BindingError("".to_string())); + .assert_expand_err(r#"foo!(&k");"#, &ExpandError::ConversionError); }