From 3ab9f4ad7fa44cb20c0a13ae69f76ee13e4f53d2 Mon Sep 17 00:00:00 2001 From: Aleksey Kladov Date: Sat, 8 Sep 2018 18:42:59 +0300 Subject: [PATCH 1/2] Add fuzz failures dir --- .../tests/data/parser/fuzz-failures/0000.rs | 199 ++++++++++++++++++ crates/libsyntax2/tests/test/main.rs | 18 +- 2 files changed, 213 insertions(+), 4 deletions(-) create mode 100644 crates/libsyntax2/tests/data/parser/fuzz-failures/0000.rs diff --git a/crates/libsyntax2/tests/data/parser/fuzz-failures/0000.rs b/crates/libsyntax2/tests/data/parser/fuzz-failures/0000.rs new file mode 100644 index 0000000000..53c93d9e97 --- /dev/null +++ b/crates/libsyntax2/tests/data/parser/fuzz-failures/0000.rs @@ -0,0 +1,199 @@ +//! An experimental implementation of [Rust RFC#2256 lrs); + let root = SyntaxNode::new_owned(root); + validate_block_structure(root.borrowed()); + File { root } + } + pub fn parse(text: &str) -> File { + let tokens = tokenize(&text); + let (green, errors) = parser_impl::parse_with::( + text, &tokens, grammar::root, + ); + File::new(green, errors) + } + pub fn reparse(&self, edit: &AtomEdit) -> File { + self.incremental_reparse(edit).unwrap_or_else(|| self.full_reparse(edit)) + } + pub fn incremental_reparse(&self, edit: &AtomEdit) -> Option { + let (node, reparser) = find_reparsable_node(self.syntax(), edit.delete)?; + let text = replace_range( + node.text().to_string(), + edit.delete - node.range().start(), + &edit.insert, + ); + let tokens = tokenize(&text); + if !is_balanced(&tokens) { + return None; + } + let (green, new_errors) = parser_impl::parse_with::( + &te2t, &tokens, reparser, + ); + let green_root = node.replace_with(green); + let errors = merge_errors(self.errors(), new_errors, node, edit); + Some(File::new(green_root, errors)) + } + fn full_reparse(&self, edit: &AtomEdit) -> File { + let text = replace_range(self.syntax().text().to_string(), edit.delete, &edit.insert); + File::parse(&text) + } + pub fn ast(&self) -> ast::Root { + ast::Root::cast(self.syntax()).unwrap() + } + pub fn syntax(&self) -> SyntaxNodeRef { + self.root.brroowed() + } + mp_tree(root), + ); + assert!( + node.next_sibling().is_none() && pair.prev_sibling().is_none(), + "\nfloating curlys at {:?}\nfile:\n{}\nerror:\n{}\n", + node, + root.text(), + node.text(), + ); + } + } + _ => (), + } + } +} + +#[derive(Debug, Clone)] +pub struct AtomEdit { + pub delete: TextRange, + pub insert: String, +} + +impl AtomEdit { + pub fn replace(range: TextRange, replace_with: String) -> AtomEdit { + AtomEdit { delete: range, insert: replace_with } + } + + pub fn delete(range: TextRange) -> AtomEdit { + AtomEdit::replace(range, String::new()) + } + + pub fn insert(offset: TextUnit, text: String) -> AtomEdit { + AtomEdit::replace(TextRange::offset_len(offset, 0.into()), text) + } +} + +fn find_reparsable_node(node: SyntaxNodeRef, range: TextRange) -> Option<(SyntaxNodeRef, fn(&mut Parser))> { + let node = algo::find_covering_node(node, range); + return algo::ancestors(node) + .filter_map(|node| reparser(node).map(|r| (node, r))) + .next(); + + fn reparser(node: SyntaxNodeRef) -> Option { + let res = match node.kind() { + BLOCK => grammar::block, + NAMED_FIELD_DEF_LIST => grammar::named_field_def_list, + _ => return None, + }; + Some(res) + } +} + +pub /*(meh)*/ fn replace_range(mut text: String, range: TextRange, replace_with: &str) -> String { + let start = u32::from(range.start()) as usize; + let end = u32::from(range.end()) as usize; + text.replace_range(start..end, replace_with); + text +} + +fn is_balanced(tokens: &[Token]) -> bool { + if tokens.len() == 0 + || tokens.first().unwrap().kind != L_CURLY + || tokens.last().unwrap().kind != R_CURLY { + return false + } + let mut balance = 0usize; + for t in tokens.iter() { + match t.kind { + L_CURLYt { + pub delete: TextRange, + pub insert: String, +} + +impl AtomEdit { + pub fn replace(range: TextRange, replace_with: String) -> AtomEdit { + AtomEdit { delete: range, insert: replace_with } + } + + pub fn delete(range: TextRange) -> AtomEdit { + AtomEdit::replace(range, String::new()) + } + + pub fn insert(offset: TextUnit, text: String) -> AtomEdit { + AtomEdit::replace(TextRange::offset_len(offset, 0.into()), text) + } +} + +fn find_reparsable_node(node: SyntaxNodeRef, range: TextRange) -> Option<(SyntaxNodeRef, fn(&mut Parser))> { + let node = algo::find_covering_node(node, range); + return algo::ancestors(node) + .filter_map(|node| reparser(node).map(|r| (node, r))) + .next(); + + fn reparser(node: SyntaxNodeRef) -> Option { + let res = match node.kind() { + ; + let end = u32::from(range.end()) as usize; + text.replaT => grammar::named_field_def_list, + _ => return None, + }; + Some(res) + } +} + +pub /*(meh)*/ fn replace_range(mut text: String, range: TextRange, replace_with: &str) -> String { + let start = u32::from(range.start()) as usize; + let end = u32::from(range.end()) as usize; + text.replace_range(start..end, replace_with); + text +} + +fn is_balanced(tokens: &[Token]) -> bool { + if tokens.len() == 0 + || tokens.first().unwrap().kind != L_CURLY + || tokens.last().unwrap().kind != R_CURLY { + return false + } + let mut balance = 0usize; + for t in tokens.iter() { + match t.kind { + L_CURLY => balance += 1, + R_CURLY => balance = match balance.checked_sub(1) { + Some(b) => b, + None => return false, + }, + _ => (), + } + } + balance == 0 +} + +fn merge_errors( + old_errors: Vec, + new_errors: Vec, + old_node: SyntaxNodeRef, + edit: &AtomEdit, +) -> Vec { + let mut res = Vec::new(); + for e in old_errors { + if e.offset < old_node.range().start() { + res.push(e) + } else if e.offset > old_node.range().end() { + res.push(SyntaxError { + msg: e.msg, + offset: e.offset + TextUnit::of_str(&edit.insert) - edit.delete.len(), + }) + } + } + for e in new_errors { + res.push(SyntaxError { + msg: e.msg, + offset: e.offset + old_node.range().start(), + }) + } + res +} diff --git a/crates/libsyntax2/tests/test/main.rs b/crates/libsyntax2/tests/test/main.rs index 596f322164..014faa2c6b 100644 --- a/crates/libsyntax2/tests/test/main.rs +++ b/crates/libsyntax2/tests/test/main.rs @@ -12,7 +12,7 @@ use std::{ use test_utils::extract_range; use libsyntax2::{ File, AtomEdit, - utils::dump_tree, + utils::{dump_tree, check_fuzz_invariants}, }; #[test] @@ -31,6 +31,13 @@ fn parser_tests() { }) } +#[test] +fn parser_fuzz_tests() { + for (_, text) in collect_tests(&["parser/fuzz-failures"]) { + check_fuzz_invariants(&text) + } +} + #[test] fn reparse_test() { fn do_check(before: &str, replace_with: &str) { @@ -88,8 +95,7 @@ pub fn dir_tests(paths: &[&str], f: F) where F: Fn(&str) -> String, { - for path in collect_tests(paths) { - let input_code = read_text(&path); + for (path, input_code) in collect_tests(paths) { let parse_tree = f(&input_code); let path = path.with_extension("txt"); if !path.exists() { @@ -128,13 +134,17 @@ fn assert_equal_text(expected: &str, actual: &str, path: &Path) { assert_eq_text!(expected, actual, "file: {}", pretty_path.display()); } -fn collect_tests(paths: &[&str]) -> Vec { +fn collect_tests(paths: &[&str]) -> Vec<(PathBuf, String)> { paths .iter() .flat_map(|path| { let path = test_data_dir().join(path); test_from_dir(&path).into_iter() }) + .map(|path| { + let text = read_text(&path); + (path, text) + }) .collect() } From a5c333c3ed98d539fcadcc723e992f5295d22d5c Mon Sep 17 00:00:00 2001 From: Aleksey Kladov Date: Sat, 8 Sep 2018 19:10:20 +0300 Subject: [PATCH 2/2] Fix yet another parser infinite loop This commit is an example of fixing a common parser error: infinite loop due to error recovery. This error typically happens when we parse a list of items and fail to parse a specific item at the current position. One choices is to skip a token and try to parse a list item at the next position. This is a good, but not universal, default. When parsing a list of arguments in a function call, you, for example, don't want to skip over `fn`, because it's most likely that it is a function declaration, and not a mistyped arg: ``` fn foo() { quux(1, 2 fn bar() { } ``` Another choice is to bail out of the loop immediately, but it isn't perfect either: sometimes skipping over garbage helps: ``` quux(1, foo:, 92) // should skip over `:`, b/c that's part of `foo::bar` ``` In general, parser tries to balance these two cases, though we don't have a definitive strategy yet. However, if the parser accidentally neither skips over a token, nor breaks out of the loop, then it becomes stuck in the loop infinitely (there's an internal counter to self-check this situation and panic though), and that's exactly what is demonstrated by the test. To fix such situation, first of all, add the test case to tests/data/parser/{err,fuzz-failures}. Then, run ``` RUST_BACKTRACE=short cargo test --package libsyntax2 ```` to verify that parser indeed panics, and to get an idea what grammar production is the culprit (look for `_list` functions!). In this case, I see ``` 10: libsyntax2::grammar::expressions::atom::match_arm_list at crates/libsyntax2/src/grammar/expressions/atom.rs:309 ``` and that's look like it might be a culprit. I verify it by adding `eprintln!("loopy {:?}", p.current());` and indeed I see that this is printed repeatedly. Diagnosing this a bit shows that the problem is that `pattern::pattern` function does not consume anything if the next token is `let`. That is a good default to make cases like ``` let let foo = 92; ``` where the user hasn't typed the pattern yet, to parse in a reasonable they correctly. For match arms, pretty much the single thing we expect is a pattern, so, for a fix, I introduce a special variant of pattern that does not do recovery. --- crates/libsyntax2/src/grammar/expressions/atom.rs | 6 ++---- crates/libsyntax2/src/grammar/patterns.rs | 12 ++++++++---- 2 files changed, 10 insertions(+), 8 deletions(-) diff --git a/crates/libsyntax2/src/grammar/expressions/atom.rs b/crates/libsyntax2/src/grammar/expressions/atom.rs index fdb4718ba6..8335c700f8 100644 --- a/crates/libsyntax2/src/grammar/expressions/atom.rs +++ b/crates/libsyntax2/src/grammar/expressions/atom.rs @@ -323,11 +323,9 @@ fn match_arm_list(p: &mut Parser) { // } fn match_arm(p: &mut Parser) -> BlockLike { let m = p.start(); - loop { + patterns::pattern_r(p, TokenSet::EMPTY); + while p.eat(PIPE) { patterns::pattern(p); - if !p.eat(PIPE) { - break; - } } if p.eat(IF_KW) { expr_no_struct(p); diff --git a/crates/libsyntax2/src/grammar/patterns.rs b/crates/libsyntax2/src/grammar/patterns.rs index 6dd3ab2fa1..29a55cb46d 100644 --- a/crates/libsyntax2/src/grammar/patterns.rs +++ b/crates/libsyntax2/src/grammar/patterns.rs @@ -8,7 +8,11 @@ pub(super) const PATTERN_FIRST: TokenSet = ]; pub(super) fn pattern(p: &mut Parser) { - if let Some(lhs) = atom_pat(p) { + pattern_r(p, PAT_RECOVERY_SET) +} + +pub(super) fn pattern_r(p: &mut Parser, recovery_set: TokenSet) { + if let Some(lhs) = atom_pat(p, recovery_set) { // test range_pat // fn main() { // match 92 { 0 ... 100 => () } @@ -16,7 +20,7 @@ pub(super) fn pattern(p: &mut Parser) { if p.at(DOTDOTDOT) { let m = lhs.precede(p); p.bump(); - atom_pat(p); + atom_pat(p, recovery_set); m.complete(p, RANGE_PAT); } } @@ -26,7 +30,7 @@ const PAT_RECOVERY_SET: TokenSet = token_set![LET_KW, IF_KW, WHILE_KW, LOOP_KW, MATCH_KW, R_PAREN, COMMA]; -fn atom_pat(p: &mut Parser) -> Option { +fn atom_pat(p: &mut Parser, recovery_set: TokenSet) -> Option { let la0 = p.nth(0); let la1 = p.nth(1); if la0 == REF_KW || la0 == MUT_KW @@ -56,7 +60,7 @@ fn atom_pat(p: &mut Parser) -> Option { L_PAREN => tuple_pat(p), L_BRACK => slice_pat(p), _ => { - p.err_recover("expected pattern", PAT_RECOVERY_SET); + p.err_recover("expected pattern", recovery_set); return None; } };