From 68196ccc10c60de52bb771d295879456f73ede95 Mon Sep 17 00:00:00 2001 From: Luca Barbieri Date: Fri, 3 Apr 2020 21:12:08 +0200 Subject: [PATCH] Add AstElement trait, generate tokens, support tokens in enums - Adds a new AstElement trait that is implemented by all generated node, token and enum structs - Overhauls the code generators to code-generate all tokens, and also enhances enums to support including tokens, node, and nested enums --- crates/ra_syntax/src/ast.rs | 114 ++++++++++- crates/ra_syntax/src/ast/tokens.rs | 62 +----- xtask/src/codegen/gen_syntax.rs | 298 ++++++++++++++++++++++++++--- 3 files changed, 380 insertions(+), 94 deletions(-) diff --git a/crates/ra_syntax/src/ast.rs b/crates/ra_syntax/src/ast.rs index 26fafb469a..1ac0201b81 100644 --- a/crates/ra_syntax/src/ast.rs +++ b/crates/ra_syntax/src/ast.rs @@ -11,7 +11,10 @@ pub mod make; use std::marker::PhantomData; use crate::{ - syntax_node::{SyntaxNode, SyntaxNodeChildren, SyntaxToken}, + syntax_node::{ + NodeOrToken, SyntaxElement, SyntaxElementChildren, SyntaxNode, SyntaxNodeChildren, + SyntaxToken, + }, SmolStr, SyntaxKind, }; @@ -30,16 +33,24 @@ pub use self::{ /// conversion itself has zero runtime cost: ast and syntax nodes have exactly /// the same representation: a pointer to the tree root and a pointer to the /// node itself. -pub trait AstNode: std::fmt::Display { +pub trait AstNode: AstElement { fn can_cast(kind: SyntaxKind) -> bool where Self: Sized; + fn cast_or_return(syntax: SyntaxNode) -> Result + where + Self: Sized; + fn cast(syntax: SyntaxNode) -> Option where - Self: Sized; + Self: Sized, + { + ::cast_or_return(syntax).ok() + } fn syntax(&self) -> &SyntaxNode; + fn into_syntax(self) -> SyntaxNode; } #[test] @@ -48,16 +59,51 @@ fn assert_ast_is_object_safe() { } /// Like `AstNode`, but wraps tokens rather than interior nodes. -pub trait AstToken { - fn cast(token: SyntaxToken) -> Option +pub trait AstToken: AstElement { + fn can_cast(token: SyntaxKind) -> bool where Self: Sized; + + fn cast_or_return(syntax: SyntaxToken) -> Result + where + Self: Sized; + + fn cast(syntax: SyntaxToken) -> Option + where + Self: Sized, + { + ::cast_or_return(syntax).ok() + } + fn syntax(&self) -> &SyntaxToken; + fn into_syntax(self) -> SyntaxToken; + fn text(&self) -> &SmolStr { self.syntax().text() } } +/// Like `AstNode`, but wraps either nodes or tokens rather than interior nodes. +pub trait AstElement: std::fmt::Display { + fn can_cast_element(kind: SyntaxKind) -> bool + where + Self: Sized; + + fn cast_or_return_element(syntax: SyntaxElement) -> Result + where + Self: Sized; + + fn cast_element(syntax: SyntaxElement) -> Option + where + Self: Sized, + { + ::cast_or_return_element(syntax).ok() + } + + fn syntax_element(&self) -> NodeOrToken<&SyntaxNode, &SyntaxToken>; + fn into_syntax_element(self) -> SyntaxElement; +} + /// An iterator over `SyntaxNode` children of a particular AST type. #[derive(Debug, Clone)] pub struct AstChildren { @@ -86,6 +132,64 @@ fn children(parent: &P) -> AstChildren { AstChildren::new(parent.syntax()) } +/// An iterator over `SyntaxToken` children of a particular AST type. +#[derive(Debug, Clone)] +pub struct AstChildTokens { + inner: SyntaxElementChildren, + ph: PhantomData, +} + +impl AstChildTokens { + fn new(parent: &SyntaxNode) -> Self { + AstChildTokens { inner: parent.children_with_tokens(), ph: PhantomData } + } +} + +impl Iterator for AstChildTokens { + type Item = N; + fn next(&mut self) -> Option { + self.inner.by_ref().filter_map(|x| x.into_token()).find_map(N::cast) + } +} + +fn child_token_opt(parent: &P) -> Option { + child_tokens(parent).next() +} + +fn child_tokens(parent: &P) -> AstChildTokens { + AstChildTokens::new(parent.syntax()) +} + +/// An iterator over `SyntaxNode` children of a particular AST type. +#[derive(Debug, Clone)] +pub struct AstChildElements { + inner: SyntaxElementChildren, + ph: PhantomData, +} + +impl AstChildElements { + fn new(parent: &SyntaxNode) -> Self { + AstChildElements { inner: parent.children_with_tokens(), ph: PhantomData } + } +} + +impl Iterator for AstChildElements { + type Item = N; + fn next(&mut self) -> Option { + self.inner.by_ref().find_map(N::cast_element) + } +} + +#[allow(dead_code)] +fn child_element_opt(parent: &P) -> Option { + child_elements(parent).next() +} + +#[allow(dead_code)] +fn child_elements(parent: &P) -> AstChildElements { + AstChildElements::new(parent.syntax()) +} + #[test] fn test_doc_comment_none() { let file = SourceFile::parse( diff --git a/crates/ra_syntax/src/ast/tokens.rs b/crates/ra_syntax/src/ast/tokens.rs index 1a51b8d3b9..e8320b57ed 100644 --- a/crates/ra_syntax/src/ast/tokens.rs +++ b/crates/ra_syntax/src/ast/tokens.rs @@ -1,26 +1,10 @@ //! There are many AstNodes, but only a few tokens, so we hand-write them here. use crate::{ - ast::AstToken, - SyntaxKind::{COMMENT, RAW_STRING, STRING, WHITESPACE}, - SyntaxToken, TextRange, TextUnit, + ast::{AstToken, Comment, RawString, String, Whitespace}, + TextRange, TextUnit, }; -#[derive(Debug, Clone, PartialEq, Eq, Hash)] -pub struct Comment(SyntaxToken); - -impl AstToken for Comment { - fn cast(token: SyntaxToken) -> Option { - match token.kind() { - COMMENT => Some(Comment(token)), - _ => None, - } - } - fn syntax(&self) -> &SyntaxToken { - &self.0 - } -} - impl Comment { pub fn kind(&self) -> CommentKind { kind_by_prefix(self.text()) @@ -89,20 +73,6 @@ fn prefix_by_kind(kind: CommentKind) -> &'static str { unreachable!() } -pub struct Whitespace(SyntaxToken); - -impl AstToken for Whitespace { - fn cast(token: SyntaxToken) -> Option { - match token.kind() { - WHITESPACE => Some(Whitespace(token)), - _ => None, - } - } - fn syntax(&self) -> &SyntaxToken { - &self.0 - } -} - impl Whitespace { pub fn spans_multiple_lines(&self) -> bool { let text = self.text(); @@ -168,20 +138,6 @@ pub trait HasStringValue: HasQuotes { fn value(&self) -> Option; } -pub struct String(SyntaxToken); - -impl AstToken for String { - fn cast(token: SyntaxToken) -> Option { - match token.kind() { - STRING => Some(String(token)), - _ => None, - } - } - fn syntax(&self) -> &SyntaxToken { - &self.0 - } -} - impl HasStringValue for String { fn value(&self) -> Option { let text = self.text().as_str(); @@ -201,20 +157,6 @@ impl HasStringValue for String { } } -pub struct RawString(SyntaxToken); - -impl AstToken for RawString { - fn cast(token: SyntaxToken) -> Option { - match token.kind() { - RAW_STRING => Some(RawString(token)), - _ => None, - } - } - fn syntax(&self) -> &SyntaxToken { - &self.0 - } -} - impl HasStringValue for RawString { fn value(&self) -> Option { let text = self.text().as_str(); diff --git a/xtask/src/codegen/gen_syntax.rs b/xtask/src/codegen/gen_syntax.rs index 32afd47bc3..c730c75eed 100644 --- a/xtask/src/codegen/gen_syntax.rs +++ b/xtask/src/codegen/gen_syntax.rs @@ -5,6 +5,8 @@ use proc_macro2::{Punct, Spacing}; use quote::{format_ident, quote}; +use std::borrow::Cow; +use std::collections::{BTreeSet, HashMap, HashSet}; use crate::{ ast_src::{AstSrc, FieldSrc, KindsSrc, AST_SRC, KINDS_SRC}, @@ -18,13 +20,125 @@ pub fn generate_syntax(mode: Mode) -> Result<()> { update(syntax_kinds_file.as_path(), &syntax_kinds, mode)?; let ast_file = project_root().join(codegen::AST); - let ast = generate_ast(AST_SRC)?; + let ast = generate_ast(KINDS_SRC, AST_SRC)?; update(ast_file.as_path(), &ast, mode)?; Ok(()) } -fn generate_ast(grammar: AstSrc<'_>) -> Result { +#[derive(Debug, Default, Clone)] +struct ElementKinds { + kinds: BTreeSet, + has_nodes: bool, + has_tokens: bool, +} + +fn generate_ast(kinds: KindsSrc<'_>, grammar: AstSrc<'_>) -> Result { + let all_token_kinds: Vec<_> = kinds + .punct + .into_iter() + .map(|(_, kind)| kind) + .copied() + .map(|x| x.into()) + .chain( + kinds + .keywords + .into_iter() + .chain(kinds.contextual_keywords.into_iter()) + .map(|name| Cow::Owned(format!("{}_KW", to_upper_snake_case(&name)))), + ) + .chain(kinds.literals.into_iter().copied().map(|x| x.into())) + .chain(kinds.tokens.into_iter().copied().map(|x| x.into())) + .collect(); + + let mut element_kinds_map = HashMap::new(); + for kind in &all_token_kinds { + let kind = &**kind; + let name = to_pascal_case(kind); + element_kinds_map.insert( + name, + ElementKinds { + kinds: Some(format_ident!("{}", kind)).into_iter().collect(), + has_nodes: false, + has_tokens: true, + }, + ); + } + + for kind in kinds.nodes { + let name = to_pascal_case(kind); + element_kinds_map.insert( + name, + ElementKinds { + kinds: Some(format_ident!("{}", *kind)).into_iter().collect(), + has_nodes: true, + has_tokens: false, + }, + ); + } + + for en in grammar.enums { + let mut element_kinds: ElementKinds = Default::default(); + for variant in en.variants { + if let Some(variant_element_kinds) = element_kinds_map.get(*variant) { + element_kinds.kinds.extend(variant_element_kinds.kinds.iter().cloned()); + element_kinds.has_tokens |= variant_element_kinds.has_tokens; + element_kinds.has_nodes |= variant_element_kinds.has_nodes; + } else { + panic!("Enum variant has type that does not exist or was not declared before the enum: {}", *variant); + } + } + element_kinds_map.insert(en.name.to_string(), element_kinds); + } + + let tokens = all_token_kinds.iter().map(|kind_str| { + let kind_str = &**kind_str; + let kind = format_ident!("{}", kind_str); + let name = format_ident!("{}", to_pascal_case(kind_str)); + quote! { + #[derive(Debug, Clone, PartialEq, Eq, Hash)] + pub struct #name(SyntaxToken); + + impl std::fmt::Display for #name { + fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result { + std::fmt::Display::fmt(self.syntax(), f) + } + } + + impl AstToken for #name { + fn can_cast(kind: SyntaxKind) -> bool { + match kind { + #kind => true, + _ => false, + } + } + fn cast_or_return(syntax: SyntaxToken) -> Result { + if Self::can_cast(syntax.kind()) { Ok(Self(syntax)) } else { Err(syntax) } + } + fn syntax(&self) -> &SyntaxToken { &self.0 } + fn into_syntax(self) -> SyntaxToken { self.0 } + } + + impl AstElement for #name { + fn can_cast_element(kind: SyntaxKind) -> bool { + match kind { + #kind => true, + _ => false, + } + } + fn cast_or_return_element(syntax: SyntaxElement) -> Result { + if Self::can_cast_element(syntax.kind()) { Ok(Self(syntax.into_token().unwrap())) } else { Err(syntax) } + } + fn syntax_element(&self) -> NodeOrToken<&SyntaxNode, &SyntaxToken> { + NodeOrToken::Token(&self.0) + } + fn into_syntax_element(self) -> SyntaxElement { + NodeOrToken::Token(self.0) + } + } + } + }); + let nodes = grammar.nodes.iter().map(|node| { let name = format_ident!("{}", node.name); let kind = format_ident!("{}", to_upper_snake_case(&name.to_string())); @@ -42,20 +156,28 @@ fn generate_ast(grammar: AstSrc<'_>) -> Result { FieldSrc::Optional(ty) | FieldSrc::Many(ty) => ty, FieldSrc::Shorthand => name, }; + let element_kinds = &element_kinds_map.get(*ty).unwrap_or_else(|| panic!("type not found: {}", *ty)); + let iter = if !element_kinds.has_tokens { + format_ident!("AstChildren") + } else if !element_kinds.has_nodes { + format_ident!("AstChildTokens") + } else { + format_ident!("AstChildElements") + }; let ty = format_ident!("{}", ty); match field { FieldSrc::Many(_) => { quote! { - pub fn #method_name(&self) -> AstChildren<#ty> { - AstChildren::new(&self.syntax) + pub fn #method_name(&self) -> #iter<#ty> { + #iter::new(&self.syntax) } } } FieldSrc::Optional(_) | FieldSrc::Shorthand => { quote! { pub fn #method_name(&self) -> Option<#ty> { - AstChildren::new(&self.syntax).next() + #iter::new(&self.syntax).next() } } } @@ -81,11 +203,31 @@ fn generate_ast(grammar: AstSrc<'_>) -> Result { _ => false, } } - fn cast(syntax: SyntaxNode) -> Option { - if Self::can_cast(syntax.kind()) { Some(Self { syntax }) } else { None } + fn cast_or_return(syntax: SyntaxNode) -> Result { + if Self::can_cast(syntax.kind()) { Ok(Self { syntax }) } else { Err(syntax) } } fn syntax(&self) -> &SyntaxNode { &self.syntax } + fn into_syntax(self) -> SyntaxNode { self.syntax } } + + impl AstElement for #name { + fn can_cast_element(kind: SyntaxKind) -> bool { + match kind { + #kind => true, + _ => false, + } + } + fn cast_or_return_element(syntax: SyntaxElement) -> Result { + if Self::can_cast_element(syntax.kind()) { Ok(Self { syntax: syntax.into_node().unwrap() }) } else { Err(syntax) } + } + fn syntax_element(&self) -> NodeOrToken<&SyntaxNode, &SyntaxToken> { + NodeOrToken::Node(&self.syntax) + } + fn into_syntax_element(self) -> SyntaxElement { + NodeOrToken::Node(self.syntax) + } + } + #(#traits)* impl #name { @@ -96,16 +238,71 @@ fn generate_ast(grammar: AstSrc<'_>) -> Result { let enums = grammar.enums.iter().map(|en| { let variants = en.variants.iter().map(|var| format_ident!("{}", var)).collect::>(); + let element_kinds = &element_kinds_map[&en.name.to_string()]; let name = format_ident!("{}", en.name); - let kinds = variants + let kinds = en.variants .iter() - .map(|name| format_ident!("{}", to_upper_snake_case(&name.to_string()))) + .map(|name| { + element_kinds_map[*name].kinds.iter().collect::>() + }) .collect::>(); let traits = en.traits.iter().map(|trait_name| { let trait_name = format_ident!("{}", trait_name); quote!(impl ast::#trait_name for #name {}) }); + let all_kinds = &element_kinds.kinds; + + let specific_ast_trait = if element_kinds.has_nodes != element_kinds.has_tokens { + let (ast_trait, syntax_type) = if element_kinds.has_tokens { + ( + quote!(AstToken), + quote!(SyntaxToken), + ) + } else { + ( + quote!(AstNode), + quote!(SyntaxNode), + ) + }; + + quote! { + impl #ast_trait for #name { + fn can_cast(kind: SyntaxKind) -> bool { + match kind { + #(#all_kinds)|* => true, + _ => false, + } + } + #[allow(unreachable_patterns)] + fn cast_or_return(syntax: #syntax_type) -> Result { + match syntax.kind() { + #( + #(#kinds)|* => #variants::cast_or_return(syntax).map(|x| #name::#variants(x)), + )* + _ => Err(syntax), + } + } + fn syntax(&self) -> &#syntax_type { + match self { + #( + #name::#variants(it) => it.syntax(), + )* + } + } + fn into_syntax(self) -> #syntax_type { + match self { + #( + #name::#variants(it) => it.into_syntax(), + )* + } + } + } + } + } else { + Default::default() + }; + quote! { #[derive(Debug, Clone, PartialEq, Eq, Hash)] pub enum #name { @@ -122,44 +319,71 @@ fn generate_ast(grammar: AstSrc<'_>) -> Result { impl std::fmt::Display for #name { fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result { - std::fmt::Display::fmt(self.syntax(), f) + match self { + #( + #name::#variants(it) => std::fmt::Display::fmt(it, f), + )* + } } } - impl AstNode for #name { - fn can_cast(kind: SyntaxKind) -> bool { + #specific_ast_trait + + impl AstElement for #name { + fn can_cast_element(kind: SyntaxKind) -> bool { match kind { - #(#kinds)|* => true, + #(#all_kinds)|* => true, _ => false, } } - fn cast(syntax: SyntaxNode) -> Option { - let res = match syntax.kind() { + #[allow(unreachable_patterns)] + fn cast_or_return_element(syntax: SyntaxElement) -> Result { + match syntax.kind() { #( - #kinds => #name::#variants(#variants { syntax }), + #(#kinds)|* => #variants::cast_or_return_element(syntax).map(|x| #name::#variants(x)), )* - _ => return None, - }; - Some(res) + _ => Err(syntax), + } } - fn syntax(&self) -> &SyntaxNode { + fn syntax_element(&self) -> NodeOrToken<&SyntaxNode, &SyntaxToken> { match self { #( - #name::#variants(it) => &it.syntax, + #name::#variants(it) => it.syntax_element(), + )* + } + } + fn into_syntax_element(self) -> SyntaxElement { + match self { + #( + #name::#variants(it) => it.into_syntax_element(), )* } } } + #(#traits)* } }); + let defined_nodes: HashSet<_> = grammar.nodes.iter().map(|node| node.name).collect(); + + for node in kinds + .nodes + .iter() + .map(|kind| to_pascal_case(*kind)) + .filter(|name| !defined_nodes.contains(&**name)) + { + eprintln!("Warning: node {} not defined in ast source", node); + } + let ast = quote! { + #[allow(unused_imports)] use crate::{ - SyntaxNode, SyntaxKind::{self, *}, - ast::{self, AstNode, AstChildren}, + SyntaxNode, SyntaxToken, SyntaxElement, NodeOrToken, SyntaxKind::{self, *}, + ast::{self, AstNode, AstToken, AstElement, AstChildren, AstChildTokens, AstChildElements}, }; + #(#tokens)* #(#nodes)* #(#enums)* }; @@ -282,12 +506,12 @@ fn generate_syntax_kinds(grammar: KindsSrc<'_>) -> Result { fn to_upper_snake_case(s: &str) -> String { let mut buf = String::with_capacity(s.len()); - let mut prev_is_upper = None; + let mut prev = false; for c in s.chars() { - if c.is_ascii_uppercase() && prev_is_upper == Some(false) { + if c.is_ascii_uppercase() && prev { buf.push('_') } - prev_is_upper = Some(c.is_ascii_uppercase()); + prev = true; buf.push(c.to_ascii_uppercase()); } @@ -296,14 +520,30 @@ fn to_upper_snake_case(s: &str) -> String { fn to_lower_snake_case(s: &str) -> String { let mut buf = String::with_capacity(s.len()); - let mut prev_is_upper = None; + let mut prev = false; for c in s.chars() { - if c.is_ascii_uppercase() && prev_is_upper == Some(false) { + if c.is_ascii_uppercase() && prev { buf.push('_') } - prev_is_upper = Some(c.is_ascii_uppercase()); + prev = true; buf.push(c.to_ascii_lowercase()); } buf } + +fn to_pascal_case(s: &str) -> String { + let mut buf = String::with_capacity(s.len()); + let mut prev_is_underscore = true; + for c in s.chars() { + if c == '_' { + prev_is_underscore = true; + } else if prev_is_underscore { + buf.push(c.to_ascii_uppercase()); + prev_is_underscore = false; + } else { + buf.push(c.to_ascii_lowercase()); + } + } + buf +}