wip: new syntax tree editor

This commit is contained in:
DropDemBits 2024-07-27 17:09:53 -04:00
parent 779d9eee2e
commit ad9595d729
No known key found for this signature in database
GPG key ID: 7FE02A6C1EDFA075
4 changed files with 723 additions and 0 deletions

View file

@ -40,6 +40,7 @@ pub mod ast;
#[doc(hidden)]
pub mod fuzz;
pub mod hacks;
pub mod syntax_editor;
pub mod ted;
pub mod utils;

View file

@ -0,0 +1,333 @@
//! Syntax Tree editor
//!
//! Inspired by Roslyn's [`SyntaxEditor`], but is temporarily built upon mutable syntax tree editing.
//!
//! [`SyntaxEditor`]: https://github.com/dotnet/roslyn/blob/43b0b05cc4f492fd5de00f6f6717409091df8daa/src/Workspaces/Core/Portable/Editing/SyntaxEditor.cs
use std::{
num::NonZeroU32,
sync::atomic::{AtomicU32, Ordering},
};
use rowan::TextRange;
use rustc_hash::FxHashMap;
use crate::{SyntaxElement, SyntaxNode, SyntaxToken};
mod edit_algo;
mod mapping;
pub use mapping::{SyntaxMapping, SyntaxMappingBuilder};
#[derive(Debug)]
pub struct SyntaxEditor {
root: SyntaxNode,
changes: Vec<Change>,
mappings: SyntaxMapping,
annotations: Vec<(SyntaxElement, SyntaxAnnotation)>,
}
impl SyntaxEditor {
/// Creates a syntax editor to start editing from `root`
pub fn new(root: SyntaxNode) -> Self {
Self { root, changes: vec![], mappings: SyntaxMapping::new(), annotations: vec![] }
}
pub fn add_annotation(&mut self, element: impl Element, annotation: SyntaxAnnotation) {
self.annotations.push((element.syntax_element(), annotation))
}
pub fn combine(&mut self, other: SyntaxEditor) {
todo!()
}
pub fn delete(&mut self, element: impl Element) {
self.changes.push(Change::Replace(element.syntax_element(), None));
}
pub fn replace(&mut self, old: impl Element, new: impl Element) {
self.changes.push(Change::Replace(old.syntax_element(), Some(new.syntax_element())));
}
pub fn finish(self) -> SyntaxEdit {
edit_algo::apply_edits(self)
}
}
pub struct SyntaxEdit {
root: SyntaxNode,
changed_elements: Vec<SyntaxElement>,
annotations: FxHashMap<SyntaxAnnotation, Vec<SyntaxElement>>,
}
impl SyntaxEdit {
pub fn root(&self) -> &SyntaxNode {
&self.root
}
pub fn changed_elements(&self) -> &[SyntaxElement] {
self.changed_elements.as_slice()
}
pub fn find_annotation(&self, annotation: SyntaxAnnotation) -> Option<&[SyntaxElement]> {
self.annotations.get(&annotation).as_ref().map(|it| it.as_slice())
}
}
#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)]
#[repr(transparent)]
pub struct SyntaxAnnotation(NonZeroU32);
impl SyntaxAnnotation {
/// Creates a unique syntax annotation to attach data to.
pub fn new() -> Self {
static COUNTER: AtomicU32 = AtomicU32::new(1);
// We want the id to be unique across threads, but we don't want to
// tie it to other `SeqCst` operations.
let id = COUNTER.fetch_add(1, Ordering::AcqRel);
Self(NonZeroU32::new(id).expect("syntax annotation id overflow"))
}
}
/// Position describing where to insert elements
#[derive(Debug)]
pub struct Position {
repr: PositionRepr,
}
#[derive(Debug)]
enum PositionRepr {
FirstChild(SyntaxNode),
After(SyntaxElement),
}
impl Position {
pub fn after(elem: impl Element) -> Position {
let repr = PositionRepr::After(elem.syntax_element());
Position { repr }
}
pub fn before(elem: impl Element) -> Position {
let elem = elem.syntax_element();
let repr = match elem.prev_sibling_or_token() {
Some(it) => PositionRepr::After(it),
None => PositionRepr::FirstChild(elem.parent().unwrap()),
};
Position { repr }
}
pub fn first_child_of(node: &(impl Into<SyntaxNode> + Clone)) -> Position {
let repr = PositionRepr::FirstChild(node.clone().into());
Position { repr }
}
pub fn last_child_of(node: &(impl Into<SyntaxNode> + Clone)) -> Position {
let node = node.clone().into();
let repr = match node.last_child_or_token() {
Some(it) => PositionRepr::After(it),
None => PositionRepr::FirstChild(node),
};
Position { repr }
}
}
#[derive(Debug)]
enum Change {
/// Represents both a replace single element and a delete element operation.
Replace(SyntaxElement, Option<SyntaxElement>),
}
impl Change {
fn target_range(&self) -> TextRange {
match self {
Change::Replace(target, _) => target.text_range(),
}
}
fn target_parent(&self) -> SyntaxNode {
match self {
Change::Replace(target, _) => target.parent().unwrap(),
}
}
fn change_kind(&self) -> ChangeKind {
match self {
Change::Replace(_, _) => ChangeKind::Replace,
}
}
}
#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)]
enum ChangeKind {
Insert,
// TODO: deal with replace spans
Replace,
}
/// Utility trait to allow calling syntax editor functions with references or owned
/// nodes. Do not use outside of this module.
pub trait Element {
fn syntax_element(self) -> SyntaxElement;
}
impl<E: Element + Clone> Element for &'_ E {
fn syntax_element(self) -> SyntaxElement {
self.clone().syntax_element()
}
}
impl Element for SyntaxElement {
fn syntax_element(self) -> SyntaxElement {
self
}
}
impl Element for SyntaxNode {
fn syntax_element(self) -> SyntaxElement {
self.into()
}
}
impl Element for SyntaxToken {
fn syntax_element(self) -> SyntaxElement {
self.into()
}
}
#[cfg(test)]
mod tests {
use expect_test::expect;
use itertools::Itertools;
use crate::{
ast::{self, make, HasName},
AstNode,
};
use super::*;
fn make_ident_pat(
editor: Option<&mut SyntaxEditor>,
ref_: bool,
mut_: bool,
name: ast::Name,
) -> ast::IdentPat {
let ast = make::ident_pat(ref_, mut_, name.clone()).clone_for_update();
if let Some(editor) = editor {
let mut mapping = SyntaxMappingBuilder::new(ast.syntax().clone());
mapping.map_node(name.syntax().clone(), ast.name().unwrap().syntax().clone());
mapping.finish(editor);
}
ast
}
fn make_let_stmt(
editor: Option<&mut SyntaxEditor>,
pattern: ast::Pat,
ty: Option<ast::Type>,
initializer: Option<ast::Expr>,
) -> ast::LetStmt {
let ast =
make::let_stmt(pattern.clone(), ty.clone(), initializer.clone()).clone_for_update();
if let Some(editor) = editor {
let mut mapping = SyntaxMappingBuilder::new(ast.syntax().clone());
mapping.map_node(pattern.syntax().clone(), ast.pat().unwrap().syntax().clone());
if let Some(input) = ty {
mapping.map_node(input.syntax().clone(), ast.ty().unwrap().syntax().clone());
}
if let Some(input) = initializer {
mapping
.map_node(input.syntax().clone(), ast.initializer().unwrap().syntax().clone());
}
mapping.finish(editor);
}
ast
}
fn make_block_expr(
editor: Option<&mut SyntaxEditor>,
stmts: impl IntoIterator<Item = ast::Stmt>,
tail_expr: Option<ast::Expr>,
) -> ast::BlockExpr {
let stmts = stmts.into_iter().collect_vec();
let input = stmts.iter().map(|it| it.syntax().clone()).collect_vec();
let ast = make::block_expr(stmts, tail_expr.clone()).clone_for_update();
if let Some((editor, stmt_list)) = editor.zip(ast.stmt_list()) {
let mut mapping = SyntaxMappingBuilder::new(stmt_list.syntax().clone());
mapping.map_children(
input.into_iter(),
stmt_list.statements().map(|it| it.syntax().clone()),
);
if let Some((input, output)) = tail_expr.zip(stmt_list.tail_expr()) {
mapping.map_node(input.syntax().clone(), output.syntax().clone());
}
mapping.finish(editor);
}
ast
}
#[test]
fn it() {
let root = make::match_arm(
[make::wildcard_pat().into()],
None,
make::expr_tuple([
make::expr_bin_op(
make::expr_literal("2").into(),
ast::BinaryOp::ArithOp(ast::ArithOp::Add),
make::expr_literal("2").into(),
),
make::expr_literal("true").into(),
]),
);
let to_wrap = root.syntax().descendants().find_map(ast::TupleExpr::cast).unwrap();
let to_replace = root.syntax().descendants().find_map(ast::BinExpr::cast).unwrap();
let mut editor = SyntaxEditor::new(root.syntax().clone());
let name = make::name("var_name");
let name_ref = make::name_ref("var_name").clone_for_update();
let placeholder_snippet = SyntaxAnnotation::new();
editor.add_annotation(name.syntax(), placeholder_snippet);
editor.add_annotation(name_ref.syntax(), placeholder_snippet);
let make_ident_pat = make_ident_pat(Some(&mut editor), false, false, name);
let make_let_stmt = make_let_stmt(
Some(&mut editor),
make_ident_pat.into(),
None,
Some(to_replace.clone().into()),
);
let new_block = make_block_expr(
Some(&mut editor),
[make_let_stmt.into()],
Some(to_wrap.clone().into()),
);
// should die:
editor.replace(to_replace.syntax(), name_ref.syntax());
editor.replace(to_wrap.syntax(), new_block.syntax());
// editor.replace(to_replace.syntax(), name_ref.syntax());
// dbg!(&editor.mappings);
let edit = editor.finish();
let expect = expect![];
expect.assert_eq(&edit.root.to_string());
assert_eq!(edit.find_annotation(placeholder_snippet).map(|it| it.len()), Some(2));
}
}

View file

@ -0,0 +1,215 @@
use std::{collections::VecDeque, ops::RangeInclusive};
use rowan::TextRange;
use crate::{
syntax_editor::{Change, ChangeKind},
ted, SyntaxElement, SyntaxNode, SyntaxNodePtr,
};
use super::{SyntaxEdit, SyntaxEditor};
pub(super) fn apply_edits(editor: SyntaxEditor) -> SyntaxEdit {
// Algorithm overview:
//
// - Sort changes by (range, type)
// - Ensures that parent edits are before child edits
// - Ensures that inserts will be guaranteed to be inserted at the right range
// - Validate changes
// - Checking for invalid changes is easy since the changes will be sorted by range
// - Fixup change targets
// - standalone change? map to original syntax tree
// - dependent change?
// - try to map to parent change (either independent or another dependent)
// - note: need to keep track of a parent change stack, since a change can be a parent of multiple changes
// - Apply changes
// - find changes to apply to real tree by applying nested changes first
// - changed nodes become part of the changed node set (useful for the formatter to only change those parts)
// - Propagate annotations
let SyntaxEditor { root, mut changes, mappings, annotations } = editor;
dbg!(("initial: ", &root));
dbg!(&changes);
// Sort changes by range then change kind, so that we can:
// - ensure that parent edits are ordered before child edits
// - ensure that inserts will be guaranteed to be inserted at the right range
// - easily check for disjoint replace ranges
changes.sort_by(|a, b| {
a.target_range()
.start()
.cmp(&b.target_range().start())
.then(a.change_kind().cmp(&b.change_kind()))
});
let disjoint_replaces_ranges = changes.iter().zip(changes.iter().skip(1)).all(|(l, r)| {
l.change_kind() == ChangeKind::Replace
&& r.change_kind() == ChangeKind::Replace
&& (l.target_parent() != r.target_parent()
|| l.target_range().intersect(r.target_range()).is_none())
});
if stdx::never!(
!disjoint_replaces_ranges,
"some replace change ranges intersect: {:?}",
changes
) {
return SyntaxEdit { root, annotations: Default::default(), changed_elements: vec![] };
}
#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord)]
struct DependentChange {
parent: u32,
child: u32,
}
// Build change tree
let mut changed_ancestors: VecDeque<ChangedAncestor> = VecDeque::new();
let mut dependent_changes = vec![];
let mut independent_changes = vec![];
for (change_index, change) in changes.iter().enumerate() {
// Check if this change is dependent on another change (i.e. it's contained within another range)
if let Some(index) = changed_ancestors
.iter()
.rev()
.position(|ancestor| ancestor.affected_range().contains_range(change.target_range()))
{
// Pop off any ancestors that aren't applicable
changed_ancestors.drain((index + 1)..);
let ancestor = &changed_ancestors[index];
dependent_changes.push(DependentChange {
parent: ancestor.change_index as u32,
child: change_index as u32,
});
} else {
// This change is independent of any other change
// Drain the changed ancestors since we're no longer in a set of dependent changes
changed_ancestors.drain(..);
independent_changes.push(change_index as u32);
}
// Add to changed ancestors, if applicable
match change {
Change::Replace(target, _) => {
changed_ancestors.push_back(ChangedAncestor::single(target, change_index))
}
}
}
dbg!(("before: ", &changes, &dependent_changes, &independent_changes));
// Map change targets to the correct syntax nodes
let tree_mutator = TreeMutator::new(&root);
for index in independent_changes {
match &mut changes[index as usize] {
Change::Replace(target, _) => {
*target = tree_mutator.make_element_mut(target);
}
}
}
for DependentChange { parent, child } in dependent_changes.into_iter() {
let (input_ancestor, output_ancestor) = match &changes[parent as usize] {
// insert? unreachable
Change::Replace(target, Some(new_target)) => {
(to_owning_node(target), to_owning_node(new_target))
}
Change::Replace(_, None) => continue, // silently drop outdated change
};
match &mut changes[child as usize] {
Change::Replace(target, _) => {
*target = mappings.upmap_child_element(target, &input_ancestor, output_ancestor)
}
}
}
dbg!(("after: ", &changes));
// Apply changes
for change in changes {
match change {
Change::Replace(target, None) => ted::remove(target),
Change::Replace(target, Some(new_target)) => ted::replace(target, new_target),
}
}
dbg!(("modified:", tree_mutator.mutable_clone));
todo!("draw the rest of the owl")
}
fn to_owning_node(element: &SyntaxElement) -> SyntaxNode {
match element {
SyntaxElement::Node(node) => node.clone(),
SyntaxElement::Token(token) => token.parent().unwrap().clone(),
}
}
struct ChangedAncestor {
kind: ChangedAncestorKind,
change_index: usize,
}
enum ChangedAncestorKind {
Single { node: SyntaxNode },
Range { changed_nodes: RangeInclusive<SyntaxNode>, in_parent: SyntaxNode },
}
impl ChangedAncestor {
fn single(element: &SyntaxElement, change_index: usize) -> Self {
let kind = match element {
SyntaxElement::Node(node) => ChangedAncestorKind::Single { node: node.clone() },
SyntaxElement::Token(token) => {
ChangedAncestorKind::Single { node: token.parent().unwrap() }
}
};
Self { kind, change_index }
}
fn affected_range(&self) -> TextRange {
match &self.kind {
ChangedAncestorKind::Single { node } => node.text_range(),
ChangedAncestorKind::Range { changed_nodes, in_parent: _ } => TextRange::new(
changed_nodes.start().text_range().start(),
changed_nodes.end().text_range().end(),
),
}
}
}
struct TreeMutator {
immutable: SyntaxNode,
mutable_clone: SyntaxNode,
}
impl TreeMutator {
fn new(immutable: &SyntaxNode) -> TreeMutator {
let immutable = immutable.clone();
let mutable_clone = immutable.clone_for_update();
TreeMutator { immutable, mutable_clone }
}
fn make_element_mut(&self, element: &SyntaxElement) -> SyntaxElement {
match element {
SyntaxElement::Node(node) => SyntaxElement::Node(self.make_syntax_mut(&node)),
SyntaxElement::Token(token) => {
let parent = self.make_syntax_mut(&token.parent().unwrap());
parent.children_with_tokens().nth(token.index()).unwrap()
}
}
}
fn make_syntax_mut(&self, node: &SyntaxNode) -> SyntaxNode {
let ptr = SyntaxNodePtr::new(node);
ptr.to_node(&self.mutable_clone)
}
}

View file

@ -0,0 +1,174 @@
use itertools::Itertools;
use rustc_hash::FxHashMap;
use crate::{SyntaxElement, SyntaxNode};
use super::SyntaxEditor;
#[derive(Debug, Default)]
pub struct SyntaxMapping {
// important information to keep track of:
// node -> node
// token -> token (implicit in mappings)
// input parent -> output parent (for deep lookups)
// mappings -> parents
entry_parents: Vec<SyntaxNode>,
node_mappings: FxHashMap<SyntaxNode, (u32, u32)>,
}
impl SyntaxMapping {
pub fn new() -> Self {
Self::default()
}
pub fn upmap_child_element(
&self,
child: &SyntaxElement,
input_ancestor: &SyntaxNode,
output_ancestor: SyntaxNode,
) -> SyntaxElement {
match child {
SyntaxElement::Node(node) => {
SyntaxElement::Node(self.upmap_child(node, input_ancestor, output_ancestor))
}
SyntaxElement::Token(token) => {
let upmap_parent =
self.upmap_child(&token.parent().unwrap(), input_ancestor, output_ancestor);
let element = upmap_parent.children_with_tokens().nth(token.index()).unwrap();
debug_assert!(
element.as_token().is_some_and(|it| it.kind() == token.kind()),
"token upmapping mapped to the wrong node ({token:?} -> {element:?})"
);
element
}
}
}
pub fn upmap_child(
&self,
child: &SyntaxNode,
input_ancestor: &SyntaxNode,
output_ancestor: SyntaxNode,
) -> SyntaxNode {
debug_assert!(child.ancestors().any(|ancestor| &ancestor == input_ancestor));
// Build a list mapping up to the first mappable ancestor
let to_first_upmap =
std::iter::successors(Some((child.index(), child.clone())), |(_, current)| {
let parent = current.parent().unwrap();
if &parent == input_ancestor {
return None;
}
Some((parent.index(), parent))
})
.map(|(i, _)| i)
.collect::<Vec<_>>();
// Progressively up-map the input ancestor until we get to the output ancestor
let to_output_ancestor = if input_ancestor != &output_ancestor {
std::iter::successors(Some((input_ancestor.index(), self.upmap_node(input_ancestor).unwrap_or_else(|| input_ancestor.clone()))), |(_, current)| {
let Some(parent) = current.parent() else {
unreachable!("no mappings exist between {current:?} (ancestor of {input_ancestor:?}) and {output_ancestor:?}")
};
if &parent == &output_ancestor {
return None;
}
if let Some(next) = self.upmap_node(&parent) {
Some((parent.index(), next))
} else {
Some((parent.index(), parent))
}
}).map(|(i, _)| i).collect::<Vec<_>>()
} else {
vec![]
};
let to_map_down =
to_output_ancestor.into_iter().rev().chain(to_first_upmap.into_iter().rev());
let mut target = output_ancestor;
for index in to_map_down {
target = target
.children_with_tokens()
.nth(index)
.and_then(|it| it.into_node())
.expect("yep");
}
debug_assert_eq!(child.kind(), target.kind());
target
}
pub fn upmap_node(&self, input: &SyntaxNode) -> Option<SyntaxNode> {
let (parent, child_slot) = self.node_mappings.get(input)?;
let output = self.entry_parents[*parent as usize]
.children_with_tokens()
.nth(*child_slot as usize)
.and_then(SyntaxElement::into_node)
.unwrap();
debug_assert_eq!(input.kind(), output.kind());
Some(output)
}
fn add_mapping(&mut self, syntax_mapping: SyntaxMappingBuilder) {
let SyntaxMappingBuilder { parent_node, node_mappings } = syntax_mapping;
let parent_entry: u32 = self.entry_parents.len() as u32;
self.entry_parents.push(parent_node);
let node_entries =
node_mappings.into_iter().map(|(node, slot)| (node, (parent_entry, slot)));
self.node_mappings.extend(node_entries);
}
}
#[derive(Debug)]
pub struct SyntaxMappingBuilder {
parent_node: SyntaxNode,
node_mappings: Vec<(SyntaxNode, u32)>,
}
impl SyntaxMappingBuilder {
pub fn new(parent_node: SyntaxNode) -> Self {
Self { parent_node, node_mappings: vec![] }
}
pub fn map_node(&mut self, input: SyntaxNode, output: SyntaxNode) {
debug_assert_eq!(output.parent().as_ref(), Some(&self.parent_node));
self.node_mappings.push((input, output.index() as u32));
}
pub fn map_children(
&mut self,
input: impl Iterator<Item = SyntaxNode>,
output: impl Iterator<Item = SyntaxNode>,
) {
for pairs in input.zip_longest(output) {
let (input, output) = match pairs {
itertools::EitherOrBoth::Both(l, r) => (l, r),
itertools::EitherOrBoth::Left(_) => {
unreachable!("mapping more input nodes than there are output nodes")
}
itertools::EitherOrBoth::Right(_) => break,
};
self.map_node(input, output);
}
}
pub fn finish(self, editor: &mut SyntaxEditor) {
editor.mappings.add_mapping(self);
}
}