1312: Introduce TokenBuffer r=matklad a=edwin0cheng

As discussed in Zulip, this PR Introduce `TokenBuffer` , a safe version of  `syn` crate `TokenBuffer` which support cursor based traversal of `tt::TokenTree`. This is the basis of incoming refactoring of `TokenSource` iterator based API.

This PR do the following things:

* Add TokenBuffer in `ra_tt` crate.
* Try to use this new API to refactor the `SubtreeSource` to prove it usage.




Co-authored-by: Edwin Cheng <edwin0cheng@gmail.com>
This commit is contained in:
bors[bot] 2019-05-23 14:31:26 +00:00
commit ef00b5af1c
5 changed files with 260 additions and 202 deletions

View file

@ -2,6 +2,7 @@ use crate::subtree_source::SubtreeTokenSource;
use ra_parser::{TokenSource, TreeSink};
use ra_syntax::{SyntaxKind};
use tt::buffer::TokenBuffer;
struct OffsetTokenSink {
token_pos: usize,
@ -69,7 +70,8 @@ impl<'a> Parser<'a> {
where
F: FnOnce(&dyn TokenSource, &mut dyn TreeSink),
{
let mut src = SubtreeTokenSource::new(&self.subtree.token_trees[*self.cur_pos..]);
let buffer = TokenBuffer::new(&self.subtree.token_trees[*self.cur_pos..]);
let mut src = SubtreeTokenSource::new(&buffer);
let mut sink = OffsetTokenSink { token_pos: 0, error: false };
f(&src, &mut sink);
@ -85,7 +87,7 @@ impl<'a> Parser<'a> {
let res = src.bump_n(parsed_token);
*self.cur_pos += res.len();
let res: Vec<_> = res.into_iter().cloned().collect();
let res: Vec<_> = res.into_iter().collect();
match res.len() {
0 => None,

View file

@ -1,50 +1,10 @@
use ra_parser::{TokenSource};
use ra_syntax::{classify_literal, SmolStr, SyntaxKind, SyntaxKind::*, T};
use std::cell::{RefCell};
use std::cell::{RefCell, Cell};
use tt::buffer::{TokenBuffer, Cursor};
// A Sequece of Token,
#[derive(Debug, Clone, Eq, PartialEq)]
pub(super) enum TokenSeq<'a> {
Subtree(&'a tt::Subtree),
Seq(&'a [tt::TokenTree]),
}
impl<'a> From<&'a tt::Subtree> for TokenSeq<'a> {
fn from(s: &'a tt::Subtree) -> TokenSeq<'a> {
TokenSeq::Subtree(s)
}
}
impl<'a> From<&'a [tt::TokenTree]> for TokenSeq<'a> {
fn from(s: &'a [tt::TokenTree]) -> TokenSeq<'a> {
TokenSeq::Seq(s)
}
}
#[derive(Debug)]
enum DelimToken<'a> {
Delim(&'a tt::Delimiter, bool),
Token(&'a tt::TokenTree),
End,
}
impl<'a> TokenSeq<'a> {
fn get(&self, pos: usize) -> DelimToken<'a> {
match self {
TokenSeq::Subtree(subtree) => {
let len = subtree.token_trees.len() + 2;
match pos {
p if p >= len => DelimToken::End,
p if p == len - 1 => DelimToken::Delim(&subtree.delimiter, true),
0 => DelimToken::Delim(&subtree.delimiter, false),
p => DelimToken::Token(&subtree.token_trees[p - 1]),
}
}
TokenSeq::Seq(tokens) => {
tokens.get(pos).map(DelimToken::Token).unwrap_or(DelimToken::End)
}
}
}
pub(crate) trait Querier {
fn token(&self, uidx: usize) -> (SyntaxKind, SmolStr, bool);
}
#[derive(Debug, Clone, Eq, PartialEq)]
@ -54,183 +14,101 @@ struct TtToken {
pub text: SmolStr,
}
#[derive(Debug, Clone, Eq, PartialEq)]
enum WalkCursor {
Token(usize, TtToken),
Eof,
}
#[derive(Debug)]
struct SubTreeWalker<'a> {
pos: usize,
stack: Vec<(TokenSeq<'a>, usize)>,
cursor: WalkCursor,
ts: TokenSeq<'a>,
}
impl<'a> SubTreeWalker<'a> {
fn new(ts: TokenSeq<'a>) -> SubTreeWalker {
let mut res = SubTreeWalker { pos: 0, stack: vec![], cursor: WalkCursor::Eof, ts };
res.reset();
res
}
fn is_eof(&self) -> bool {
self.cursor == WalkCursor::Eof
}
fn reset(&mut self) {
self.pos = 0;
self.stack = vec![];
self.cursor = match self.ts.get(0) {
DelimToken::Token(token) => match token {
tt::TokenTree::Subtree(subtree) => {
let ts = TokenSeq::from(subtree);
self.stack.push((ts, 0));
WalkCursor::Token(0, convert_delim(subtree.delimiter, false))
}
tt::TokenTree::Leaf(leaf) => WalkCursor::Token(0, convert_leaf(leaf)),
},
DelimToken::Delim(delim, is_end) => {
assert!(!is_end);
WalkCursor::Token(0, convert_delim(*delim, false))
}
DelimToken::End => WalkCursor::Eof,
}
}
fn current(&self) -> Option<&TtToken> {
match &self.cursor {
WalkCursor::Token(_, t) => Some(t),
WalkCursor::Eof => None,
}
}
fn top(&self) -> &TokenSeq {
self.stack.last().map(|(t, _)| t).unwrap_or(&self.ts)
}
/// Move cursor forward by 1 step
fn forward(&mut self) {
if self.is_eof() {
return;
}
self.pos += 1;
if let WalkCursor::Token(u, _) = self.cursor {
self.cursor = self.walk_token(u)
}
}
/// Traversal child token
fn walk_token(&mut self, pos: usize) -> WalkCursor {
let top = self.stack.last().map(|(t, _)| t).unwrap_or(&self.ts);
let pos = pos + 1;
match top.get(pos) {
DelimToken::Token(token) => match token {
tt::TokenTree::Subtree(subtree) => {
let ts = TokenSeq::from(subtree);
self.stack.push((ts, pos));
WalkCursor::Token(0, convert_delim(subtree.delimiter, false))
}
tt::TokenTree::Leaf(leaf) => WalkCursor::Token(pos, convert_leaf(leaf)),
},
DelimToken::Delim(delim, is_end) => {
WalkCursor::Token(pos, convert_delim(*delim, is_end))
}
DelimToken::End => {
// it is the top level
if let Some((_, last_idx)) = self.stack.pop() {
self.walk_token(last_idx)
} else {
WalkCursor::Eof
}
}
}
}
}
pub(crate) trait Querier {
fn token(&self, uidx: usize) -> (SyntaxKind, SmolStr, bool);
}
// A wrapper class for ref cell
#[derive(Debug)]
pub(crate) struct WalkerOwner<'a> {
walker: RefCell<SubTreeWalker<'a>>,
pub(crate) struct SubtreeWalk<'a> {
start: Cursor<'a>,
cursor: Cell<Cursor<'a>>,
cached: RefCell<Vec<Option<TtToken>>>,
}
impl<'a> WalkerOwner<'a> {
fn new<I: Into<TokenSeq<'a>>>(ts: I) -> Self {
WalkerOwner {
walker: RefCell::new(SubTreeWalker::new(ts.into())),
impl<'a> SubtreeWalk<'a> {
fn new(cursor: Cursor<'a>) -> Self {
SubtreeWalk {
start: cursor,
cursor: Cell::new(cursor),
cached: RefCell::new(Vec::with_capacity(10)),
}
}
fn get<'b>(&self, pos: usize) -> Option<TtToken> {
fn get(&self, pos: usize) -> Option<TtToken> {
let mut cached = self.cached.borrow_mut();
if pos < cached.len() {
return cached[pos].clone();
}
while pos >= cached.len() {
self.set_pos(cached.len());
let walker = self.walker.borrow();
cached.push(walker.current().cloned());
let cursor = self.cursor.get();
if cursor.eof() {
cached.push(None);
continue;
}
match cursor.token_tree() {
Some(tt::TokenTree::Leaf(leaf)) => {
cached.push(Some(convert_leaf(&leaf)));
self.cursor.set(cursor.bump());
}
Some(tt::TokenTree::Subtree(subtree)) => {
self.cursor.set(cursor.subtree().unwrap());
cached.push(Some(convert_delim(subtree.delimiter, false)));
}
None => {
if let Some(subtree) = cursor.end() {
cached.push(Some(convert_delim(subtree.delimiter, true)));
self.cursor.set(cursor.bump());
}
}
}
}
return cached[pos].clone();
}
fn set_pos(&self, pos: usize) {
let mut walker = self.walker.borrow_mut();
assert!(walker.pos <= pos);
while pos > walker.pos && !walker.is_eof() {
walker.forward();
}
}
fn collect_token_trees(&mut self, n: usize) -> Vec<&tt::TokenTree> {
fn collect_token_trees(&mut self, n: usize) -> Vec<tt::TokenTree> {
let mut res = vec![];
let mut walker = self.walker.borrow_mut();
walker.reset();
while walker.pos < n {
if let WalkCursor::Token(u, _) = &walker.cursor {
// We only collect the topmost child
if walker.stack.len() == 0 {
if let DelimToken::Token(token) = walker.ts.get(*u) {
res.push(token);
let mut pos = 0;
let mut cursor = self.start;
let mut level = 0;
while pos < n {
if cursor.eof() {
break;
}
match cursor.token_tree() {
Some(tt::TokenTree::Leaf(leaf)) => {
if level == 0 {
res.push(leaf.into());
}
cursor = cursor.bump();
pos += 1;
}
// Check whether the second level is a subtree
// if so, collect its parent which is topmost child
else if walker.stack.len() == 1 {
if let DelimToken::Delim(_, is_end) = walker.top().get(*u) {
if !is_end {
let (_, last_idx) = &walker.stack[0];
if let DelimToken::Token(token) = walker.ts.get(*last_idx) {
res.push(token);
}
}
Some(tt::TokenTree::Subtree(subtree)) => {
if level == 0 {
res.push(subtree.into());
}
pos += 1;
level += 1;
cursor = cursor.subtree().unwrap();
}
None => {
if let Some(_) = cursor.end() {
level -= 1;
pos += 1;
cursor = cursor.bump();
}
}
}
walker.forward();
}
res
}
}
impl<'a> Querier for WalkerOwner<'a> {
impl<'a> Querier for SubtreeWalk<'a> {
fn token(&self, uidx: usize) -> (SyntaxKind, SmolStr, bool) {
self.get(uidx)
.map(|tkn| (tkn.kind, tkn.text, tkn.is_joint_to_next))
@ -239,22 +117,22 @@ impl<'a> Querier for WalkerOwner<'a> {
}
pub(crate) struct SubtreeTokenSource<'a> {
walker: WalkerOwner<'a>,
walker: SubtreeWalk<'a>,
}
impl<'a> SubtreeTokenSource<'a> {
pub fn new<I: Into<TokenSeq<'a>>>(ts: I) -> SubtreeTokenSource<'a> {
SubtreeTokenSource { walker: WalkerOwner::new(ts) }
pub fn new(buffer: &'a TokenBuffer) -> SubtreeTokenSource<'a> {
SubtreeTokenSource { walker: SubtreeWalk::new(buffer.begin()) }
}
pub fn querier<'b>(&'a self) -> &'b WalkerOwner<'a>
pub fn querier<'b>(&'a self) -> &'b SubtreeWalk<'a>
where
'a: 'b,
{
&self.walker
}
pub(crate) fn bump_n(&mut self, parsed_tokens: usize) -> Vec<&tt::TokenTree> {
pub(crate) fn bump_n(&mut self, parsed_tokens: usize) -> Vec<tt::TokenTree> {
let res = self.walker.collect_token_trees(parsed_tokens);
res
}

View file

@ -47,7 +47,8 @@ pub fn syntax_node_to_token_tree(node: &SyntaxNode) -> Option<(tt::Subtree, Toke
/// Parses the token tree (result of macro expansion) to an expression
pub fn token_tree_to_expr(tt: &tt::Subtree) -> Result<TreeArc<ast::Expr>, ExpandError> {
let token_source = SubtreeTokenSource::new(tt);
let buffer = tt::buffer::TokenBuffer::new(&[tt.clone().into()]);
let token_source = SubtreeTokenSource::new(&buffer);
let mut tree_sink = TtTreeSink::new(token_source.querier());
ra_parser::parse_expr(&token_source, &mut tree_sink);
if tree_sink.roots.len() != 1 {
@ -62,7 +63,8 @@ pub fn token_tree_to_expr(tt: &tt::Subtree) -> Result<TreeArc<ast::Expr>, Expand
/// Parses the token tree (result of macro expansion) to a Pattern
pub fn token_tree_to_pat(tt: &tt::Subtree) -> Result<TreeArc<ast::Pat>, ExpandError> {
let token_source = SubtreeTokenSource::new(tt);
let buffer = tt::buffer::TokenBuffer::new(&[tt.clone().into()]);
let token_source = SubtreeTokenSource::new(&buffer);
let mut tree_sink = TtTreeSink::new(token_source.querier());
ra_parser::parse_pat(&token_source, &mut tree_sink);
if tree_sink.roots.len() != 1 {
@ -75,7 +77,8 @@ pub fn token_tree_to_pat(tt: &tt::Subtree) -> Result<TreeArc<ast::Pat>, ExpandEr
/// Parses the token tree (result of macro expansion) to a Type
pub fn token_tree_to_ty(tt: &tt::Subtree) -> Result<TreeArc<ast::TypeRef>, ExpandError> {
let token_source = SubtreeTokenSource::new(tt);
let buffer = tt::buffer::TokenBuffer::new(&[tt.clone().into()]);
let token_source = SubtreeTokenSource::new(&buffer);
let mut tree_sink = TtTreeSink::new(token_source.querier());
ra_parser::parse_ty(&token_source, &mut tree_sink);
if tree_sink.roots.len() != 1 {
@ -89,7 +92,8 @@ pub fn token_tree_to_ty(tt: &tt::Subtree) -> Result<TreeArc<ast::TypeRef>, Expan
pub fn token_tree_to_macro_stmts(
tt: &tt::Subtree,
) -> Result<TreeArc<ast::MacroStmts>, ExpandError> {
let token_source = SubtreeTokenSource::new(tt);
let buffer = tt::buffer::TokenBuffer::new(&[tt.clone().into()]);
let token_source = SubtreeTokenSource::new(&buffer);
let mut tree_sink = TtTreeSink::new(token_source.querier());
ra_parser::parse_macro_stmts(&token_source, &mut tree_sink);
if tree_sink.roots.len() != 1 {
@ -103,7 +107,8 @@ pub fn token_tree_to_macro_stmts(
pub fn token_tree_to_macro_items(
tt: &tt::Subtree,
) -> Result<TreeArc<ast::MacroItems>, ExpandError> {
let token_source = SubtreeTokenSource::new(tt);
let buffer = tt::buffer::TokenBuffer::new(&[tt.clone().into()]);
let token_source = SubtreeTokenSource::new(&buffer);
let mut tree_sink = TtTreeSink::new(token_source.querier());
ra_parser::parse_macro_items(&token_source, &mut tree_sink);
if tree_sink.roots.len() != 1 {
@ -115,7 +120,8 @@ pub fn token_tree_to_macro_items(
/// Parses the token tree (result of macro expansion) as a sequence of items
pub fn token_tree_to_ast_item_list(tt: &tt::Subtree) -> TreeArc<ast::SourceFile> {
let token_source = SubtreeTokenSource::new(tt);
let buffer = tt::buffer::TokenBuffer::new(&[tt.clone().into()]);
let token_source = SubtreeTokenSource::new(&buffer);
let mut tree_sink = TtTreeSink::new(token_source.querier());
ra_parser::parse(&token_source, &mut tree_sink);
let syntax = tree_sink.inner.finish();
@ -381,7 +387,8 @@ mod tests {
"#,
);
let expansion = expand(&rules, "literals!(foo)");
let tt_src = SubtreeTokenSource::new(&expansion);
let buffer = tt::buffer::TokenBuffer::new(&[expansion.clone().into()]);
let tt_src = SubtreeTokenSource::new(&buffer);
let query = tt_src.querier();

169
crates/ra_tt/src/buffer.rs Normal file
View file

@ -0,0 +1,169 @@
use crate::{TokenTree, Subtree, Leaf};
#[derive(Copy, Clone, Debug, Eq, PartialEq)]
struct EntryId(usize);
#[derive(Copy, Clone, Debug, Eq, PartialEq)]
struct EntryPtr(EntryId, usize);
/// Internal type which is used instead of `TokenTree` to represent a token tree
/// within a `TokenBuffer`.
#[derive(Debug)]
enum Entry {
// Mimicking types from proc-macro.
Subtree(Subtree, EntryId),
Leaf(Leaf),
// End entries contain a pointer to the entry from the containing
// token tree, or None if this is the outermost level.
End(Option<EntryPtr>),
}
/// A token tree buffer
/// The safe version of `syn` [`TokenBuffer`](https://github.com/dtolnay/syn/blob/6533607f91686545cb034d2838beea338d9d0742/src/buffer.rs#L41)
#[derive(Debug)]
pub struct TokenBuffer {
buffers: Vec<Box<[Entry]>>,
}
impl TokenBuffer {
pub fn new(tokens: &[TokenTree]) -> TokenBuffer {
let mut buffers = vec![];
let idx = TokenBuffer::new_inner(tokens, &mut buffers, None);
assert_eq!(idx, 0);
TokenBuffer { buffers }
}
fn new_inner(
tokens: &[TokenTree],
buffers: &mut Vec<Box<[Entry]>>,
next: Option<EntryPtr>,
) -> usize {
let mut entries = vec![];
let mut children = vec![];
for (idx, tt) in tokens.iter().cloned().enumerate() {
match tt {
TokenTree::Leaf(leaf) => {
entries.push(Entry::Leaf(leaf));
}
TokenTree::Subtree(subtree) => {
entries.push(Entry::End(None));
children.push((idx, subtree));
}
}
}
entries.push(Entry::End(next));
let res = buffers.len();
buffers.push(entries.into_boxed_slice());
for (child_idx, subtree) in children {
let idx = TokenBuffer::new_inner(
&subtree.token_trees,
buffers,
Some(EntryPtr(EntryId(res), child_idx + 1)),
);
buffers[res].as_mut()[child_idx] = Entry::Subtree(subtree, EntryId(idx));
}
res
}
/// Creates a cursor referencing the first token in the buffer and able to
/// traverse until the end of the buffer.
pub fn begin(&self) -> Cursor {
Cursor::create(self, EntryPtr(EntryId(0), 0))
}
fn entry(&self, ptr: &EntryPtr) -> Option<&Entry> {
let id = ptr.0;
self.buffers[id.0].get(ptr.1)
}
}
/// A safe version of `Cursor` from `syn` crate https://github.com/dtolnay/syn/blob/6533607f91686545cb034d2838beea338d9d0742/src/buffer.rs#L125
#[derive(Copy, Clone, Debug)]
pub struct Cursor<'a> {
buffer: &'a TokenBuffer,
ptr: EntryPtr,
}
impl<'a> PartialEq for Cursor<'a> {
fn eq(&self, other: &Cursor) -> bool {
self.ptr == other.ptr && std::ptr::eq(self.buffer, other.buffer)
}
}
impl<'a> Eq for Cursor<'a> {}
impl<'a> Cursor<'a> {
/// Check whether it is eof
pub fn eof(self) -> bool {
match self.buffer.entry(&self.ptr) {
None | Some(Entry::End(None)) => true,
_ => false,
}
}
/// If the cursor is pointing at the end of a subtree, returns
/// the parent subtree
pub fn end(self) -> Option<(&'a Subtree)> {
match self.entry() {
Some(Entry::End(Some(ptr))) => {
let idx = ptr.1;
if let Some(Entry::Subtree(subtree, _)) =
self.buffer.entry(&EntryPtr(ptr.0, idx - 1))
{
return Some(subtree);
}
None
}
_ => None,
}
}
fn entry(self) -> Option<(&'a Entry)> {
self.buffer.entry(&self.ptr)
}
/// If the cursor is pointing at a `Subtree`, returns
/// a cursor into that subtree
pub fn subtree(self) -> Option<Cursor<'a>> {
match self.entry() {
Some(Entry::Subtree(_, entry_id)) => {
Some(Cursor::create(self.buffer, EntryPtr(*entry_id, 0)))
}
_ => None,
}
}
/// If the cursor is pointing at a `TokenTree`, returns it
pub fn token_tree(self) -> Option<(TokenTree)> {
match self.entry() {
Some(Entry::Leaf(leaf)) => Some(leaf.clone().into()),
Some(Entry::Subtree(subtree, _)) => Some(subtree.clone().into()),
Some(Entry::End(_)) => None,
None => None,
}
}
fn create(buffer: &'a TokenBuffer, ptr: EntryPtr) -> Cursor<'a> {
Cursor { buffer, ptr }
}
/// Bump the cursor
pub fn bump(self) -> Cursor<'a> {
if let Some(Entry::End(exit)) = self.buffer.entry(&self.ptr) {
if let Some(exit) = exit {
Cursor::create(self.buffer, *exit)
} else {
self
}
} else {
Cursor::create(self.buffer, EntryPtr(self.ptr.0, self.ptr.1 + 1))
}
}
}

View file

@ -165,3 +165,5 @@ impl Subtree {
self.token_trees.len() + children_count
}
}
pub mod buffer;