From ad9595d729eb19cffb09d96b56affcf568d0ca29 Mon Sep 17 00:00:00 2001 From: DropDemBits Date: Sat, 27 Jul 2024 17:09:53 -0400 Subject: [PATCH] wip: new syntax tree editor --- crates/syntax/src/lib.rs | 1 + crates/syntax/src/syntax_editor.rs | 333 +++++++++++++++++++ crates/syntax/src/syntax_editor/edit_algo.rs | 215 ++++++++++++ crates/syntax/src/syntax_editor/mapping.rs | 174 ++++++++++ 4 files changed, 723 insertions(+) create mode 100644 crates/syntax/src/syntax_editor.rs create mode 100644 crates/syntax/src/syntax_editor/edit_algo.rs create mode 100644 crates/syntax/src/syntax_editor/mapping.rs diff --git a/crates/syntax/src/lib.rs b/crates/syntax/src/lib.rs index b68374848b..dbbb290b4f 100644 --- a/crates/syntax/src/lib.rs +++ b/crates/syntax/src/lib.rs @@ -40,6 +40,7 @@ pub mod ast; #[doc(hidden)] pub mod fuzz; pub mod hacks; +pub mod syntax_editor; pub mod ted; pub mod utils; diff --git a/crates/syntax/src/syntax_editor.rs b/crates/syntax/src/syntax_editor.rs new file mode 100644 index 0000000000..3ec5abcaea --- /dev/null +++ b/crates/syntax/src/syntax_editor.rs @@ -0,0 +1,333 @@ +//! Syntax Tree editor +//! +//! Inspired by Roslyn's [`SyntaxEditor`], but is temporarily built upon mutable syntax tree editing. +//! +//! [`SyntaxEditor`]: https://github.com/dotnet/roslyn/blob/43b0b05cc4f492fd5de00f6f6717409091df8daa/src/Workspaces/Core/Portable/Editing/SyntaxEditor.cs + +use std::{ + num::NonZeroU32, + sync::atomic::{AtomicU32, Ordering}, +}; + +use rowan::TextRange; +use rustc_hash::FxHashMap; + +use crate::{SyntaxElement, SyntaxNode, SyntaxToken}; + +mod edit_algo; +mod mapping; + +pub use mapping::{SyntaxMapping, SyntaxMappingBuilder}; + +#[derive(Debug)] +pub struct SyntaxEditor { + root: SyntaxNode, + changes: Vec, + mappings: SyntaxMapping, + annotations: Vec<(SyntaxElement, SyntaxAnnotation)>, +} + +impl SyntaxEditor { + /// Creates a syntax editor to start editing from `root` + pub fn new(root: SyntaxNode) -> Self { + Self { root, changes: vec![], mappings: SyntaxMapping::new(), annotations: vec![] } + } + + pub fn add_annotation(&mut self, element: impl Element, annotation: SyntaxAnnotation) { + self.annotations.push((element.syntax_element(), annotation)) + } + + pub fn combine(&mut self, other: SyntaxEditor) { + todo!() + } + + pub fn delete(&mut self, element: impl Element) { + self.changes.push(Change::Replace(element.syntax_element(), None)); + } + + pub fn replace(&mut self, old: impl Element, new: impl Element) { + self.changes.push(Change::Replace(old.syntax_element(), Some(new.syntax_element()))); + } + + pub fn finish(self) -> SyntaxEdit { + edit_algo::apply_edits(self) + } +} + +pub struct SyntaxEdit { + root: SyntaxNode, + changed_elements: Vec, + annotations: FxHashMap>, +} + +impl SyntaxEdit { + pub fn root(&self) -> &SyntaxNode { + &self.root + } + + pub fn changed_elements(&self) -> &[SyntaxElement] { + self.changed_elements.as_slice() + } + + pub fn find_annotation(&self, annotation: SyntaxAnnotation) -> Option<&[SyntaxElement]> { + self.annotations.get(&annotation).as_ref().map(|it| it.as_slice()) + } +} + +#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)] +#[repr(transparent)] +pub struct SyntaxAnnotation(NonZeroU32); + +impl SyntaxAnnotation { + /// Creates a unique syntax annotation to attach data to. + pub fn new() -> Self { + static COUNTER: AtomicU32 = AtomicU32::new(1); + + // We want the id to be unique across threads, but we don't want to + // tie it to other `SeqCst` operations. + let id = COUNTER.fetch_add(1, Ordering::AcqRel); + + Self(NonZeroU32::new(id).expect("syntax annotation id overflow")) + } +} + +/// Position describing where to insert elements +#[derive(Debug)] +pub struct Position { + repr: PositionRepr, +} + +#[derive(Debug)] +enum PositionRepr { + FirstChild(SyntaxNode), + After(SyntaxElement), +} + +impl Position { + pub fn after(elem: impl Element) -> Position { + let repr = PositionRepr::After(elem.syntax_element()); + Position { repr } + } + + pub fn before(elem: impl Element) -> Position { + let elem = elem.syntax_element(); + let repr = match elem.prev_sibling_or_token() { + Some(it) => PositionRepr::After(it), + None => PositionRepr::FirstChild(elem.parent().unwrap()), + }; + Position { repr } + } + + pub fn first_child_of(node: &(impl Into + Clone)) -> Position { + let repr = PositionRepr::FirstChild(node.clone().into()); + Position { repr } + } + + pub fn last_child_of(node: &(impl Into + Clone)) -> Position { + let node = node.clone().into(); + let repr = match node.last_child_or_token() { + Some(it) => PositionRepr::After(it), + None => PositionRepr::FirstChild(node), + }; + Position { repr } + } +} + +#[derive(Debug)] +enum Change { + /// Represents both a replace single element and a delete element operation. + Replace(SyntaxElement, Option), +} + +impl Change { + fn target_range(&self) -> TextRange { + match self { + Change::Replace(target, _) => target.text_range(), + } + } + + fn target_parent(&self) -> SyntaxNode { + match self { + Change::Replace(target, _) => target.parent().unwrap(), + } + } + + fn change_kind(&self) -> ChangeKind { + match self { + Change::Replace(_, _) => ChangeKind::Replace, + } + } +} + +#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)] +enum ChangeKind { + Insert, + // TODO: deal with replace spans + Replace, +} + +/// Utility trait to allow calling syntax editor functions with references or owned +/// nodes. Do not use outside of this module. +pub trait Element { + fn syntax_element(self) -> SyntaxElement; +} + +impl Element for &'_ E { + fn syntax_element(self) -> SyntaxElement { + self.clone().syntax_element() + } +} + +impl Element for SyntaxElement { + fn syntax_element(self) -> SyntaxElement { + self + } +} + +impl Element for SyntaxNode { + fn syntax_element(self) -> SyntaxElement { + self.into() + } +} + +impl Element for SyntaxToken { + fn syntax_element(self) -> SyntaxElement { + self.into() + } +} + +#[cfg(test)] +mod tests { + use expect_test::expect; + use itertools::Itertools; + + use crate::{ + ast::{self, make, HasName}, + AstNode, + }; + + use super::*; + + fn make_ident_pat( + editor: Option<&mut SyntaxEditor>, + ref_: bool, + mut_: bool, + name: ast::Name, + ) -> ast::IdentPat { + let ast = make::ident_pat(ref_, mut_, name.clone()).clone_for_update(); + + if let Some(editor) = editor { + let mut mapping = SyntaxMappingBuilder::new(ast.syntax().clone()); + mapping.map_node(name.syntax().clone(), ast.name().unwrap().syntax().clone()); + mapping.finish(editor); + } + + ast + } + + fn make_let_stmt( + editor: Option<&mut SyntaxEditor>, + pattern: ast::Pat, + ty: Option, + initializer: Option, + ) -> ast::LetStmt { + let ast = + make::let_stmt(pattern.clone(), ty.clone(), initializer.clone()).clone_for_update(); + + if let Some(editor) = editor { + let mut mapping = SyntaxMappingBuilder::new(ast.syntax().clone()); + mapping.map_node(pattern.syntax().clone(), ast.pat().unwrap().syntax().clone()); + if let Some(input) = ty { + mapping.map_node(input.syntax().clone(), ast.ty().unwrap().syntax().clone()); + } + if let Some(input) = initializer { + mapping + .map_node(input.syntax().clone(), ast.initializer().unwrap().syntax().clone()); + } + mapping.finish(editor); + } + + ast + } + + fn make_block_expr( + editor: Option<&mut SyntaxEditor>, + stmts: impl IntoIterator, + tail_expr: Option, + ) -> ast::BlockExpr { + let stmts = stmts.into_iter().collect_vec(); + let input = stmts.iter().map(|it| it.syntax().clone()).collect_vec(); + + let ast = make::block_expr(stmts, tail_expr.clone()).clone_for_update(); + + if let Some((editor, stmt_list)) = editor.zip(ast.stmt_list()) { + let mut mapping = SyntaxMappingBuilder::new(stmt_list.syntax().clone()); + + mapping.map_children( + input.into_iter(), + stmt_list.statements().map(|it| it.syntax().clone()), + ); + + if let Some((input, output)) = tail_expr.zip(stmt_list.tail_expr()) { + mapping.map_node(input.syntax().clone(), output.syntax().clone()); + } + + mapping.finish(editor); + } + + ast + } + + #[test] + fn it() { + let root = make::match_arm( + [make::wildcard_pat().into()], + None, + make::expr_tuple([ + make::expr_bin_op( + make::expr_literal("2").into(), + ast::BinaryOp::ArithOp(ast::ArithOp::Add), + make::expr_literal("2").into(), + ), + make::expr_literal("true").into(), + ]), + ); + + let to_wrap = root.syntax().descendants().find_map(ast::TupleExpr::cast).unwrap(); + let to_replace = root.syntax().descendants().find_map(ast::BinExpr::cast).unwrap(); + + let mut editor = SyntaxEditor::new(root.syntax().clone()); + + let name = make::name("var_name"); + let name_ref = make::name_ref("var_name").clone_for_update(); + + let placeholder_snippet = SyntaxAnnotation::new(); + editor.add_annotation(name.syntax(), placeholder_snippet); + editor.add_annotation(name_ref.syntax(), placeholder_snippet); + + let make_ident_pat = make_ident_pat(Some(&mut editor), false, false, name); + let make_let_stmt = make_let_stmt( + Some(&mut editor), + make_ident_pat.into(), + None, + Some(to_replace.clone().into()), + ); + let new_block = make_block_expr( + Some(&mut editor), + [make_let_stmt.into()], + Some(to_wrap.clone().into()), + ); + + // should die: + editor.replace(to_replace.syntax(), name_ref.syntax()); + editor.replace(to_wrap.syntax(), new_block.syntax()); + // editor.replace(to_replace.syntax(), name_ref.syntax()); + + // dbg!(&editor.mappings); + let edit = editor.finish(); + + let expect = expect![]; + expect.assert_eq(&edit.root.to_string()); + assert_eq!(edit.find_annotation(placeholder_snippet).map(|it| it.len()), Some(2)); + } +} diff --git a/crates/syntax/src/syntax_editor/edit_algo.rs b/crates/syntax/src/syntax_editor/edit_algo.rs new file mode 100644 index 0000000000..f8354c5eb6 --- /dev/null +++ b/crates/syntax/src/syntax_editor/edit_algo.rs @@ -0,0 +1,215 @@ +use std::{collections::VecDeque, ops::RangeInclusive}; + +use rowan::TextRange; + +use crate::{ + syntax_editor::{Change, ChangeKind}, + ted, SyntaxElement, SyntaxNode, SyntaxNodePtr, +}; + +use super::{SyntaxEdit, SyntaxEditor}; + +pub(super) fn apply_edits(editor: SyntaxEditor) -> SyntaxEdit { + // Algorithm overview: + // + // - Sort changes by (range, type) + // - Ensures that parent edits are before child edits + // - Ensures that inserts will be guaranteed to be inserted at the right range + // - Validate changes + // - Checking for invalid changes is easy since the changes will be sorted by range + // - Fixup change targets + // - standalone change? map to original syntax tree + // - dependent change? + // - try to map to parent change (either independent or another dependent) + // - note: need to keep track of a parent change stack, since a change can be a parent of multiple changes + // - Apply changes + // - find changes to apply to real tree by applying nested changes first + // - changed nodes become part of the changed node set (useful for the formatter to only change those parts) + // - Propagate annotations + + let SyntaxEditor { root, mut changes, mappings, annotations } = editor; + + dbg!(("initial: ", &root)); + dbg!(&changes); + + // Sort changes by range then change kind, so that we can: + // - ensure that parent edits are ordered before child edits + // - ensure that inserts will be guaranteed to be inserted at the right range + // - easily check for disjoint replace ranges + changes.sort_by(|a, b| { + a.target_range() + .start() + .cmp(&b.target_range().start()) + .then(a.change_kind().cmp(&b.change_kind())) + }); + + let disjoint_replaces_ranges = changes.iter().zip(changes.iter().skip(1)).all(|(l, r)| { + l.change_kind() == ChangeKind::Replace + && r.change_kind() == ChangeKind::Replace + && (l.target_parent() != r.target_parent() + || l.target_range().intersect(r.target_range()).is_none()) + }); + + if stdx::never!( + !disjoint_replaces_ranges, + "some replace change ranges intersect: {:?}", + changes + ) { + return SyntaxEdit { root, annotations: Default::default(), changed_elements: vec![] }; + } + + #[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord)] + struct DependentChange { + parent: u32, + child: u32, + } + + // Build change tree + let mut changed_ancestors: VecDeque = VecDeque::new(); + let mut dependent_changes = vec![]; + let mut independent_changes = vec![]; + + for (change_index, change) in changes.iter().enumerate() { + // Check if this change is dependent on another change (i.e. it's contained within another range) + if let Some(index) = changed_ancestors + .iter() + .rev() + .position(|ancestor| ancestor.affected_range().contains_range(change.target_range())) + { + // Pop off any ancestors that aren't applicable + changed_ancestors.drain((index + 1)..); + + let ancestor = &changed_ancestors[index]; + + dependent_changes.push(DependentChange { + parent: ancestor.change_index as u32, + child: change_index as u32, + }); + } else { + // This change is independent of any other change + + // Drain the changed ancestors since we're no longer in a set of dependent changes + changed_ancestors.drain(..); + + independent_changes.push(change_index as u32); + } + + // Add to changed ancestors, if applicable + match change { + Change::Replace(target, _) => { + changed_ancestors.push_back(ChangedAncestor::single(target, change_index)) + } + } + } + + dbg!(("before: ", &changes, &dependent_changes, &independent_changes)); + + // Map change targets to the correct syntax nodes + let tree_mutator = TreeMutator::new(&root); + + for index in independent_changes { + match &mut changes[index as usize] { + Change::Replace(target, _) => { + *target = tree_mutator.make_element_mut(target); + } + } + } + + for DependentChange { parent, child } in dependent_changes.into_iter() { + let (input_ancestor, output_ancestor) = match &changes[parent as usize] { + // insert? unreachable + Change::Replace(target, Some(new_target)) => { + (to_owning_node(target), to_owning_node(new_target)) + } + Change::Replace(_, None) => continue, // silently drop outdated change + }; + + match &mut changes[child as usize] { + Change::Replace(target, _) => { + *target = mappings.upmap_child_element(target, &input_ancestor, output_ancestor) + } + } + } + + dbg!(("after: ", &changes)); + + // Apply changes + for change in changes { + match change { + Change::Replace(target, None) => ted::remove(target), + Change::Replace(target, Some(new_target)) => ted::replace(target, new_target), + } + } + + dbg!(("modified:", tree_mutator.mutable_clone)); + + todo!("draw the rest of the owl") +} + +fn to_owning_node(element: &SyntaxElement) -> SyntaxNode { + match element { + SyntaxElement::Node(node) => node.clone(), + SyntaxElement::Token(token) => token.parent().unwrap().clone(), + } +} + +struct ChangedAncestor { + kind: ChangedAncestorKind, + change_index: usize, +} + +enum ChangedAncestorKind { + Single { node: SyntaxNode }, + Range { changed_nodes: RangeInclusive, in_parent: SyntaxNode }, +} + +impl ChangedAncestor { + fn single(element: &SyntaxElement, change_index: usize) -> Self { + let kind = match element { + SyntaxElement::Node(node) => ChangedAncestorKind::Single { node: node.clone() }, + SyntaxElement::Token(token) => { + ChangedAncestorKind::Single { node: token.parent().unwrap() } + } + }; + + Self { kind, change_index } + } + + fn affected_range(&self) -> TextRange { + match &self.kind { + ChangedAncestorKind::Single { node } => node.text_range(), + ChangedAncestorKind::Range { changed_nodes, in_parent: _ } => TextRange::new( + changed_nodes.start().text_range().start(), + changed_nodes.end().text_range().end(), + ), + } + } +} + +struct TreeMutator { + immutable: SyntaxNode, + mutable_clone: SyntaxNode, +} + +impl TreeMutator { + fn new(immutable: &SyntaxNode) -> TreeMutator { + let immutable = immutable.clone(); + let mutable_clone = immutable.clone_for_update(); + TreeMutator { immutable, mutable_clone } + } + + fn make_element_mut(&self, element: &SyntaxElement) -> SyntaxElement { + match element { + SyntaxElement::Node(node) => SyntaxElement::Node(self.make_syntax_mut(&node)), + SyntaxElement::Token(token) => { + let parent = self.make_syntax_mut(&token.parent().unwrap()); + parent.children_with_tokens().nth(token.index()).unwrap() + } + } + } + + fn make_syntax_mut(&self, node: &SyntaxNode) -> SyntaxNode { + let ptr = SyntaxNodePtr::new(node); + ptr.to_node(&self.mutable_clone) + } +} diff --git a/crates/syntax/src/syntax_editor/mapping.rs b/crates/syntax/src/syntax_editor/mapping.rs new file mode 100644 index 0000000000..11c7b395b3 --- /dev/null +++ b/crates/syntax/src/syntax_editor/mapping.rs @@ -0,0 +1,174 @@ +use itertools::Itertools; +use rustc_hash::FxHashMap; + +use crate::{SyntaxElement, SyntaxNode}; + +use super::SyntaxEditor; + +#[derive(Debug, Default)] +pub struct SyntaxMapping { + // important information to keep track of: + // node -> node + // token -> token (implicit in mappings) + // input parent -> output parent (for deep lookups) + + // mappings -> parents + entry_parents: Vec, + node_mappings: FxHashMap, +} + +impl SyntaxMapping { + pub fn new() -> Self { + Self::default() + } + + pub fn upmap_child_element( + &self, + child: &SyntaxElement, + input_ancestor: &SyntaxNode, + output_ancestor: SyntaxNode, + ) -> SyntaxElement { + match child { + SyntaxElement::Node(node) => { + SyntaxElement::Node(self.upmap_child(node, input_ancestor, output_ancestor)) + } + SyntaxElement::Token(token) => { + let upmap_parent = + self.upmap_child(&token.parent().unwrap(), input_ancestor, output_ancestor); + + let element = upmap_parent.children_with_tokens().nth(token.index()).unwrap(); + debug_assert!( + element.as_token().is_some_and(|it| it.kind() == token.kind()), + "token upmapping mapped to the wrong node ({token:?} -> {element:?})" + ); + + element + } + } + } + + pub fn upmap_child( + &self, + child: &SyntaxNode, + input_ancestor: &SyntaxNode, + output_ancestor: SyntaxNode, + ) -> SyntaxNode { + debug_assert!(child.ancestors().any(|ancestor| &ancestor == input_ancestor)); + + // Build a list mapping up to the first mappable ancestor + let to_first_upmap = + std::iter::successors(Some((child.index(), child.clone())), |(_, current)| { + let parent = current.parent().unwrap(); + + if &parent == input_ancestor { + return None; + } + + Some((parent.index(), parent)) + }) + .map(|(i, _)| i) + .collect::>(); + + // Progressively up-map the input ancestor until we get to the output ancestor + let to_output_ancestor = if input_ancestor != &output_ancestor { + std::iter::successors(Some((input_ancestor.index(), self.upmap_node(input_ancestor).unwrap_or_else(|| input_ancestor.clone()))), |(_, current)| { + let Some(parent) = current.parent() else { + unreachable!("no mappings exist between {current:?} (ancestor of {input_ancestor:?}) and {output_ancestor:?}") + }; + + if &parent == &output_ancestor { + return None; + } + + if let Some(next) = self.upmap_node(&parent) { + Some((parent.index(), next)) + } else { + Some((parent.index(), parent)) + } + }).map(|(i, _)| i).collect::>() + } else { + vec![] + }; + + let to_map_down = + to_output_ancestor.into_iter().rev().chain(to_first_upmap.into_iter().rev()); + + let mut target = output_ancestor; + + for index in to_map_down { + target = target + .children_with_tokens() + .nth(index) + .and_then(|it| it.into_node()) + .expect("yep"); + } + + debug_assert_eq!(child.kind(), target.kind()); + + target + } + + pub fn upmap_node(&self, input: &SyntaxNode) -> Option { + let (parent, child_slot) = self.node_mappings.get(input)?; + + let output = self.entry_parents[*parent as usize] + .children_with_tokens() + .nth(*child_slot as usize) + .and_then(SyntaxElement::into_node) + .unwrap(); + + debug_assert_eq!(input.kind(), output.kind()); + Some(output) + } + + fn add_mapping(&mut self, syntax_mapping: SyntaxMappingBuilder) { + let SyntaxMappingBuilder { parent_node, node_mappings } = syntax_mapping; + + let parent_entry: u32 = self.entry_parents.len() as u32; + self.entry_parents.push(parent_node); + + let node_entries = + node_mappings.into_iter().map(|(node, slot)| (node, (parent_entry, slot))); + + self.node_mappings.extend(node_entries); + } +} + +#[derive(Debug)] +pub struct SyntaxMappingBuilder { + parent_node: SyntaxNode, + node_mappings: Vec<(SyntaxNode, u32)>, +} + +impl SyntaxMappingBuilder { + pub fn new(parent_node: SyntaxNode) -> Self { + Self { parent_node, node_mappings: vec![] } + } + + pub fn map_node(&mut self, input: SyntaxNode, output: SyntaxNode) { + debug_assert_eq!(output.parent().as_ref(), Some(&self.parent_node)); + self.node_mappings.push((input, output.index() as u32)); + } + + pub fn map_children( + &mut self, + input: impl Iterator, + output: impl Iterator, + ) { + for pairs in input.zip_longest(output) { + let (input, output) = match pairs { + itertools::EitherOrBoth::Both(l, r) => (l, r), + itertools::EitherOrBoth::Left(_) => { + unreachable!("mapping more input nodes than there are output nodes") + } + itertools::EitherOrBoth::Right(_) => break, + }; + + self.map_node(input, output); + } + } + + pub fn finish(self, editor: &mut SyntaxEditor) { + editor.mappings.add_mapping(self); + } +}