69: Incremental reparsing for single tokens  r=matklad a=darksv

Implement incremental reparsing for `WHITESPACE`, `COMMENT`, `DOC_COMMENT`, `IDENT`, `STRING` and `RAW_STRING`. This allows to avoid reparsing whole blocks when a change was made only within these tokens.

Co-authored-by: darksv <darek969-12@o2.pl>
This commit is contained in:
bors[bot] 2018-09-15 20:57:06 +00:00
commit 2a56b5c4f0
4 changed files with 360 additions and 198 deletions

View file

@ -27,6 +27,10 @@ extern crate parking_lot;
extern crate smol_str;
extern crate text_unit;
#[cfg(test)]
#[macro_use]
extern crate test_utils;
pub mod algo;
pub mod ast;
mod lexer;
@ -35,6 +39,7 @@ mod token_set;
mod parser_api;
mod grammar;
mod parser_impl;
mod reparsing;
mod syntax_kinds;
mod yellow;
@ -49,12 +54,11 @@ pub use {
lexer::{tokenize, Token},
syntax_kinds::SyntaxKind,
yellow::{SyntaxNode, SyntaxNodeRef, OwnedRoot, RefRoot, TreeRoot, SyntaxError},
reparsing::AtomEdit,
};
use {
SyntaxKind::*,
yellow::{GreenNode, SyntaxRoot},
parser_api::Parser,
};
#[derive(Clone, Debug)]
@ -82,25 +86,11 @@ impl File {
self.incremental_reparse(edit).unwrap_or_else(|| self.full_reparse(edit))
}
pub fn incremental_reparse(&self, edit: &AtomEdit) -> Option<File> {
let (node, reparser) = find_reparsable_node(self.syntax(), edit.delete)?;
let text = replace_range(
node.text().to_string(),
edit.delete - node.range().start(),
&edit.insert,
);
let tokens = tokenize(&text);
if !is_balanced(&tokens) {
return None;
}
let (green, new_errors) = parser_impl::parse_with::<yellow::GreenBuilder>(
&text, &tokens, reparser,
);
let green_root = node.replace_with(green);
let errors = merge_errors(self.errors(), new_errors, node, edit);
Some(File::new(green_root, errors))
reparsing::incremental_reparse(self.syntax(), edit, self.errors())
.map(|(green_node, errors)| File::new(green_node, errors))
}
fn full_reparse(&self, edit: &AtomEdit) -> File {
let text = replace_range(self.syntax().text().to_string(), edit.delete, &edit.insert);
let text = text_utils::replace_range(self.syntax().text().to_string(), edit.delete, &edit.insert);
File::parse(&text)
}
pub fn ast(&self) -> ast::Root {
@ -113,107 +103,3 @@ impl File {
self.syntax().root.syntax_root().errors.clone()
}
}
#[derive(Debug, Clone)]
pub struct AtomEdit {
pub delete: TextRange,
pub insert: String,
}
impl AtomEdit {
pub fn replace(range: TextRange, replace_with: String) -> AtomEdit {
AtomEdit { delete: range, insert: replace_with }
}
pub fn delete(range: TextRange) -> AtomEdit {
AtomEdit::replace(range, String::new())
}
pub fn insert(offset: TextUnit, text: String) -> AtomEdit {
AtomEdit::replace(TextRange::offset_len(offset, 0.into()), text)
}
}
fn find_reparsable_node(node: SyntaxNodeRef, range: TextRange) -> Option<(SyntaxNodeRef, fn(&mut Parser))> {
let node = algo::find_covering_node(node, range);
return algo::ancestors(node)
.filter_map(|node| reparser(node).map(|r| (node, r)))
.next();
fn reparser(node: SyntaxNodeRef) -> Option<fn(&mut Parser)> {
let res = match node.kind() {
BLOCK => grammar::block,
NAMED_FIELD_DEF_LIST => grammar::named_field_def_list,
NAMED_FIELD_LIST => grammar::named_field_list,
ENUM_VARIANT_LIST => grammar::enum_variant_list,
MATCH_ARM_LIST => grammar::match_arm_list,
USE_TREE_LIST => grammar::use_tree_list,
EXTERN_ITEM_LIST => grammar::extern_item_list,
TOKEN_TREE if node.first_child().unwrap().kind() == L_CURLY => grammar::token_tree,
ITEM_LIST => {
let parent = node.parent().unwrap();
match parent.kind() {
IMPL_ITEM => grammar::impl_item_list,
TRAIT_DEF => grammar::trait_item_list,
MODULE => grammar::mod_item_list,
_ => return None,
}
},
_ => return None,
};
Some(res)
}
}
pub /*(meh)*/ fn replace_range(mut text: String, range: TextRange, replace_with: &str) -> String {
let start = u32::from(range.start()) as usize;
let end = u32::from(range.end()) as usize;
text.replace_range(start..end, replace_with);
text
}
fn is_balanced(tokens: &[Token]) -> bool {
if tokens.len() == 0
|| tokens.first().unwrap().kind != L_CURLY
|| tokens.last().unwrap().kind != R_CURLY {
return false
}
let mut balance = 0usize;
for t in tokens.iter() {
match t.kind {
L_CURLY => balance += 1,
R_CURLY => balance = match balance.checked_sub(1) {
Some(b) => b,
None => return false,
},
_ => (),
}
}
balance == 0
}
fn merge_errors(
old_errors: Vec<SyntaxError>,
new_errors: Vec<SyntaxError>,
old_node: SyntaxNodeRef,
edit: &AtomEdit,
) -> Vec<SyntaxError> {
let mut res = Vec::new();
for e in old_errors {
if e.offset < old_node.range().start() {
res.push(e)
} else if e.offset > old_node.range().end() {
res.push(SyntaxError {
msg: e.msg,
offset: e.offset + TextUnit::of_str(&edit.insert) - edit.delete.len(),
})
}
}
for e in new_errors {
res.push(SyntaxError {
msg: e.msg,
offset: e.offset + old_node.range().start(),
})
}
res
}

View file

@ -0,0 +1,343 @@
use algo;
use grammar;
use lexer::{tokenize, Token};
use text_unit::{TextRange, TextUnit};
use yellow::{self, SyntaxNodeRef, GreenNode, SyntaxError};
use parser_impl;
use parser_api::Parser;
use {
SyntaxKind::*,
};
use text_utils::replace_range;
#[derive(Debug, Clone)]
pub struct AtomEdit {
pub delete: TextRange,
pub insert: String,
}
impl AtomEdit {
pub fn replace(range: TextRange, replace_with: String) -> AtomEdit {
AtomEdit { delete: range, insert: replace_with }
}
pub fn delete(range: TextRange) -> AtomEdit {
AtomEdit::replace(range, String::new())
}
pub fn insert(offset: TextUnit, text: String) -> AtomEdit {
AtomEdit::replace(TextRange::offset_len(offset, 0.into()), text)
}
}
pub(crate) fn incremental_reparse(
node: SyntaxNodeRef,
edit: &AtomEdit,
errors: Vec<SyntaxError>,
) -> Option<(GreenNode, Vec<SyntaxError>)> {
let (node, green, new_errors) =
reparse_leaf(node, &edit).or_else(|| reparse_block(node, &edit))?;
let green_root = node.replace_with(green);
let errors = merge_errors(errors, new_errors, node, edit);
Some((green_root, errors))
}
fn reparse_leaf<'node>(
node: SyntaxNodeRef<'node>,
edit: &AtomEdit,
) -> Option<(SyntaxNodeRef<'node>, GreenNode, Vec<SyntaxError>)> {
let node = algo::find_covering_node(node, edit.delete);
match node.kind() {
| WHITESPACE
| COMMENT
| DOC_COMMENT
| IDENT
| STRING
| RAW_STRING => {
let text = get_text_after_edit(node, &edit);
let tokens = tokenize(&text);
let token = match tokens[..] {
[token] if token.kind == node.kind() => token,
_ => return None,
};
if token.kind == IDENT && is_contextual_kw(&text) {
return None;
}
let green = GreenNode::new_leaf(node.kind(), &text);
let new_errors = vec![];
Some((node, green, new_errors))
}
_ => None,
}
}
fn reparse_block<'node>(
node: SyntaxNodeRef<'node>,
edit: &AtomEdit,
) -> Option<(SyntaxNodeRef<'node>, GreenNode, Vec<SyntaxError>)> {
let (node, reparser) = find_reparsable_node(node, edit.delete)?;
let text = get_text_after_edit(node, &edit);
let tokens = tokenize(&text);
if !is_balanced(&tokens) {
return None;
}
let (green, new_errors) =
parser_impl::parse_with::<yellow::GreenBuilder>(
&text, &tokens, reparser,
);
Some((node, green, new_errors))
}
fn get_text_after_edit(node: SyntaxNodeRef, edit: &AtomEdit) -> String {
replace_range(
node.text().to_string(),
edit.delete - node.range().start(),
&edit.insert,
)
}
fn is_contextual_kw(text: &str) -> bool {
match text {
| "auto"
| "default"
| "union" => true,
_ => false,
}
}
fn find_reparsable_node<'node>(
node: SyntaxNodeRef<'node>,
range: TextRange,
) -> Option<(SyntaxNodeRef<'node>, fn(&mut Parser))> {
let node = algo::find_covering_node(node, range);
return algo::ancestors(node)
.filter_map(|node| reparser(node).map(|r| (node, r)))
.next();
fn reparser(node: SyntaxNodeRef) -> Option<fn(&mut Parser)> {
let res = match node.kind() {
BLOCK => grammar::block,
NAMED_FIELD_DEF_LIST => grammar::named_field_def_list,
NAMED_FIELD_LIST => grammar::named_field_list,
ENUM_VARIANT_LIST => grammar::enum_variant_list,
MATCH_ARM_LIST => grammar::match_arm_list,
USE_TREE_LIST => grammar::use_tree_list,
EXTERN_ITEM_LIST => grammar::extern_item_list,
TOKEN_TREE if node.first_child().unwrap().kind() == L_CURLY => grammar::token_tree,
ITEM_LIST => {
let parent = node.parent().unwrap();
match parent.kind() {
IMPL_ITEM => grammar::impl_item_list,
TRAIT_DEF => grammar::trait_item_list,
MODULE => grammar::mod_item_list,
_ => return None,
}
}
_ => return None,
};
Some(res)
}
}
fn is_balanced(tokens: &[Token]) -> bool {
if tokens.len() == 0
|| tokens.first().unwrap().kind != L_CURLY
|| tokens.last().unwrap().kind != R_CURLY {
return false;
}
let mut balance = 0usize;
for t in tokens.iter() {
match t.kind {
L_CURLY => balance += 1,
R_CURLY => balance = match balance.checked_sub(1) {
Some(b) => b,
None => return false,
},
_ => (),
}
}
balance == 0
}
fn merge_errors(
old_errors: Vec<SyntaxError>,
new_errors: Vec<SyntaxError>,
old_node: SyntaxNodeRef,
edit: &AtomEdit,
) -> Vec<SyntaxError> {
let mut res = Vec::new();
for e in old_errors {
if e.offset <= old_node.range().start() {
res.push(e)
} else if e.offset >= old_node.range().end() {
res.push(SyntaxError {
msg: e.msg,
offset: e.offset + TextUnit::of_str(&edit.insert) - edit.delete.len(),
})
}
}
for e in new_errors {
res.push(SyntaxError {
msg: e.msg,
offset: e.offset + old_node.range().start(),
})
}
res
}
#[cfg(test)]
mod tests {
use super::{
super::{
File,
test_utils::extract_range,
text_utils::replace_range,
utils::dump_tree,
},
reparse_leaf, reparse_block, AtomEdit, GreenNode, SyntaxError, SyntaxNodeRef,
};
fn do_check<F>(
before: &str,
replace_with: &str,
reparser: F,
) where
for<'a> F: Fn(
SyntaxNodeRef<'a>,
&AtomEdit,
) -> Option<(SyntaxNodeRef<'a>, GreenNode, Vec<SyntaxError>)>
{
let (range, before) = extract_range(before);
let after = replace_range(before.clone(), range, replace_with);
let fully_reparsed = File::parse(&after);
let incrementally_reparsed = {
let f = File::parse(&before);
let edit = AtomEdit { delete: range, insert: replace_with.to_string() };
let (node, green, new_errors) =
reparser(f.syntax(), &edit).expect("cannot incrementally reparse");
let green_root = node.replace_with(green);
let errors = super::merge_errors(f.errors(), new_errors, node, &edit);
File::new(green_root, errors)
};
assert_eq_text!(
&dump_tree(fully_reparsed.syntax()),
&dump_tree(incrementally_reparsed.syntax()),
)
}
#[test]
fn reparse_block_tests() {
let do_check = |before, replace_to|
do_check(before, replace_to, reparse_block);
do_check(r"
fn foo() {
let x = foo + <|>bar<|>
}
", "baz");
do_check(r"
fn foo() {
let x = foo<|> + bar<|>
}
", "baz");
do_check(r"
struct Foo {
f: foo<|><|>
}
", ",\n g: (),");
do_check(r"
fn foo {
let;
1 + 1;
<|>92<|>;
}
", "62");
do_check(r"
mod foo {
fn <|><|>
}
", "bar");
do_check(r"
trait Foo {
type <|>Foo<|>;
}
", "Output");
do_check(r"
impl IntoIterator<Item=i32> for Foo {
f<|><|>
}
", "n next(");
do_check(r"
use a::b::{foo,<|>,bar<|>};
", "baz");
do_check(r"
pub enum A {
Foo<|><|>
}
", "\nBar;\n");
do_check(r"
foo!{a, b<|><|> d}
", ", c[3]");
do_check(r"
fn foo() {
vec![<|><|>]
}
", "123");
do_check(r"
extern {
fn<|>;<|>
}
", " exit(code: c_int)");
}
#[test]
fn reparse_leaf_tests() {
let do_check = |before, replace_to|
do_check(before, replace_to, reparse_leaf);
do_check(r"<|><|>
fn foo() -> i32 { 1 }
", "\n\n\n \n");
do_check(r"
fn foo() -> <|><|> {}
", " \n");
do_check(r"
fn <|>foo<|>() -> i32 { 1 }
", "bar");
do_check(r"
fn foo<|><|>foo() { }
", "bar");
do_check(r"
fn foo /* <|><|> */ () {}
", "some comment");
do_check(r"
fn baz <|><|> () {}
", " \t\t\n\n");
do_check(r"
fn baz <|><|> () {}
", " \t\t\n\n");
do_check(r"
/// foo <|><|>omment
mod { }
", "c");
do_check(r#"
fn -> &str { "Hello<|><|>" }
"#, ", world");
do_check(r#"
fn -> &str { // "Hello<|><|>"
"#, ", world");
do_check(r##"
fn -> &str { r#"Hello<|><|>"#
"##, ", world");
do_check(r"
#[derive(<|>Copy<|>)]
enum Foo {
}
", "Clone");
}
}

View file

@ -17,3 +17,10 @@ pub fn intersect(r1: TextRange, r2: TextRange) -> Option<TextRange> {
None
}
}
pub fn replace_range(mut text: String, range: TextRange, replace_with: &str) -> String {
let start = u32::from(range.start()) as usize;
let end = u32::from(range.end()) as usize;
text.replace_range(start..end, replace_with);
text
}

View file

@ -9,9 +9,8 @@ use std::{
fmt::Write,
};
use test_utils::extract_range;
use libsyntax2::{
File, AtomEdit,
File,
utils::{dump_tree, check_fuzz_invariants},
};
@ -23,79 +22,6 @@ fn lexer_tests() {
})
}
#[test]
fn reparse_test() {
fn do_check(before: &str, replace_with: &str) {
let (range, before) = extract_range(before);
let after = libsyntax2::replace_range(before.clone(), range, replace_with);
let fully_reparsed = File::parse(&after);
let incrementally_reparsed = {
let f = File::parse(&before);
let edit = AtomEdit { delete: range, insert: replace_with.to_string() };
f.incremental_reparse(&edit).unwrap()
};
assert_eq_text!(
&dump_tree(fully_reparsed.syntax()),
&dump_tree(incrementally_reparsed.syntax()),
)
}
do_check(r"
fn foo() {
let x = foo + <|>bar<|>
}
", "baz");
do_check(r"
struct Foo {
f: foo<|><|>
}
", ",\n g: (),");
do_check(r"
fn foo {
let;
1 + 1;
<|>92<|>;
}
", "62");
do_check(r"
mod foo {
fn <|><|>
}
", "bar");
do_check(r"
trait Foo {
type <|>Foo<|>;
}
", "Output");
do_check(r"
impl IntoIterator<Item=i32> for Foo {
f<|><|>
}
", "n next(");
do_check(r"
use a::b::{foo,<|>,bar<|>};
", "baz");
do_check(r"
pub enum A {
Foo<|><|>
}
", "\nBar;\n");
do_check(r"
foo!{a, b<|><|> d}
", ", c[3]");
do_check(r"
fn foo() {
vec![<|><|>]
}
", "123");
do_check(r"
extern {
fn<|>;<|>
}
", " exit(code: c_int)");
}
#[test]
fn parser_tests() {
dir_tests(&["parser/inline", "parser/ok", "parser/err"], |text| {