xtask: move codegen to a module

2024-12-25 20:43:21 +00:00 · 2019-10-23 18:13:40 +03:00 · 2019-10-23 18:13:40 +03:00 · b5f13d8d51
commit b5f13d8d51
parent edf4d8e555
9 changed files with 569 additions and 533 deletions
--- a/crates/ra_assists/src/assists/early_return.rs
+++ b/crates/ra_assists/src/assists/early_return.rs
@ -2,7 +2,7 @@
 //!
 //! Replace a large conditional with a guarded return.
 //!
-//! ```notrust
+//! ```text
 //! fn <|>main() {
 //!     if cond {
 //!         foo();
@ -11,7 +11,7 @@
 //! }
 //! ```
 //! ->
-//! ```notrust
+//! ```text
 //! fn main() {
 //!     if !cond {
 //!         return;
--- a/xtask/src/bin/pre-commit.rs
+++ b/xtask/src/bin/pre-commit.rs
@ -2,10 +2,10 @@

 use std::process::Command;

-use xtask::{project_root, run, run_rustfmt, Overwrite, Result};
+use xtask::{codegen::Mode, project_root, run, run_rustfmt, Result};

 fn main() -> Result<()> {
-    run_rustfmt(Overwrite)?;
+    run_rustfmt(Mode::Overwrite)?;
    update_staged()
 }

--- a/xtask/src/boilerplate_gen.rs
+++ b/xtask/src/boilerplate_gen.rs
@ -1,348 +0,0 @@
-//! FIXME: write short doc here
-
-use std::{
-    collections::BTreeMap,
-    fs,
-    io::Write,
-    process::{Command, Stdio},
-};
-
-use proc_macro2::{Punct, Spacing};
-use quote::{format_ident, quote};
-use ron;
-use serde::Deserialize;
-
-use crate::{project_root, update, Mode, Result, AST, GRAMMAR, SYNTAX_KINDS};
-
-pub fn generate_boilerplate(mode: Mode) -> Result<()> {
-    let grammar = project_root().join(GRAMMAR);
-    let grammar: Grammar = {
-        let text = fs::read_to_string(grammar)?;
-        ron::de::from_str(&text)?
-    };
-
-    let syntax_kinds_file = project_root().join(SYNTAX_KINDS);
-    let syntax_kinds = generate_syntax_kinds(&grammar)?;
-    update(syntax_kinds_file.as_path(), &syntax_kinds, mode)?;
-
-    let ast_file = project_root().join(AST);
-    let ast = generate_ast(&grammar)?;
-    update(ast_file.as_path(), &ast, mode)?;
-
-    Ok(())
-}
-
-fn generate_ast(grammar: &Grammar) -> Result<String> {
-    let nodes = grammar.ast.iter().map(|(name, ast_node)| {
-        let variants =
-            ast_node.variants.iter().map(|var| format_ident!("{}", var)).collect::<Vec<_>>();
-        let name = format_ident!("{}", name);
-
-        let adt = if variants.is_empty() {
-            let kind = format_ident!("{}", to_upper_snake_case(&name.to_string()));
-            quote! {
-                #[derive(Debug, Clone, PartialEq, Eq, Hash)]
-                pub struct #name {
-                    pub(crate) syntax: SyntaxNode,
-                }
-
-                impl AstNode for #name {
-                    fn can_cast(kind: SyntaxKind) -> bool {
-                        match kind {
-                            #kind => true,
-                            _ => false,
-                        }
-                    }
-                    fn cast(syntax: SyntaxNode) -> Option<Self> {
-                        if Self::can_cast(syntax.kind()) { Some(Self { syntax }) } else { None }
-                    }
-                    fn syntax(&self) -> &SyntaxNode { &self.syntax }
-                }
-            }
-        } else {
-            let kinds = variants
-                .iter()
-                .map(|name| format_ident!("{}", to_upper_snake_case(&name.to_string())))
-                .collect::<Vec<_>>();
-
-            quote! {
-                #[derive(Debug, Clone, PartialEq, Eq, Hash)]
-                pub enum #name {
-                    #(#variants(#variants),)*
-                }
-
-                #(
-                impl From<#variants> for #name {
-                    fn from(node: #variants) -> #name {
-                        #name::#variants(node)
-                    }
-                }
-                )*
-
-                impl AstNode for #name {
-                    fn can_cast(kind: SyntaxKind) -> bool {
-                        match kind {
-                            #(#kinds)|* => true,
-                            _ => false,
-                        }
-                    }
-                    fn cast(syntax: SyntaxNode) -> Option<Self> {
-                        let res = match syntax.kind() {
-                            #(
-                            #kinds => #name::#variants(#variants { syntax }),
-                            )*
-                            _ => return None,
-                        };
-                        Some(res)
-                    }
-                    fn syntax(&self) -> &SyntaxNode {
-                        match self {
-                            #(
-                            #name::#variants(it) => &it.syntax,
-                            )*
-                        }
-                    }
-                }
-            }
-        };
-
-        let traits = ast_node.traits.iter().map(|trait_name| {
-            let trait_name = format_ident!("{}", trait_name);
-            quote!(impl ast::#trait_name for #name {})
-        });
-
-        let collections = ast_node.collections.iter().map(|(name, kind)| {
-            let method_name = format_ident!("{}", name);
-            let kind = format_ident!("{}", kind);
-            quote! {
-                pub fn #method_name(&self) -> AstChildren<#kind> {
-                    AstChildren::new(&self.syntax)
-                }
-            }
-        });
-
-        let options = ast_node.options.iter().map(|attr| {
-            let method_name = match attr {
-                Attr::Type(t) => format_ident!("{}", to_lower_snake_case(&t)),
-                Attr::NameType(n, _) => format_ident!("{}", n),
-            };
-            let ty = match attr {
-                Attr::Type(t) | Attr::NameType(_, t) => format_ident!("{}", t),
-            };
-            quote! {
-                pub fn #method_name(&self) -> Option<#ty> {
-                    AstChildren::new(&self.syntax).next()
-                }
-            }
-        });
-
-        quote! {
-            #adt
-
-            #(#traits)*
-
-            impl #name {
-                #(#collections)*
-                #(#options)*
-            }
-        }
-    });
-
-    let ast = quote! {
-        use crate::{
-            SyntaxNode, SyntaxKind::{self, *},
-            ast::{self, AstNode, AstChildren},
-        };
-
-        #(#nodes)*
-    };
-
-    let pretty = reformat(ast)?;
-    Ok(pretty)
-}
-
-fn generate_syntax_kinds(grammar: &Grammar) -> Result<String> {
-    let (single_byte_tokens_values, single_byte_tokens): (Vec<_>, Vec<_>) = grammar
-        .punct
-        .iter()
-        .filter(|(token, _name)| token.len() == 1)
-        .map(|(token, name)| (token.chars().next().unwrap(), format_ident!("{}", name)))
-        .unzip();
-
-    let punctuation_values = grammar.punct.iter().map(|(token, _name)| {
-        if "{}[]()".contains(token) {
-            let c = token.chars().next().unwrap();
-            quote! { #c }
-        } else {
-            let cs = token.chars().map(|c| Punct::new(c, Spacing::Joint));
-            quote! { #(#cs)* }
-        }
-    });
-    let punctuation =
-        grammar.punct.iter().map(|(_token, name)| format_ident!("{}", name)).collect::<Vec<_>>();
-
-    let full_keywords_values = &grammar.keywords;
-    let full_keywords =
-        full_keywords_values.iter().map(|kw| format_ident!("{}_KW", to_upper_snake_case(&kw)));
-
-    let all_keywords_values =
-        grammar.keywords.iter().chain(grammar.contextual_keywords.iter()).collect::<Vec<_>>();
-    let all_keywords_idents = all_keywords_values.iter().map(|kw| format_ident!("{}", kw));
-    let all_keywords = all_keywords_values
-        .iter()
-        .map(|name| format_ident!("{}_KW", to_upper_snake_case(&name)))
-        .collect::<Vec<_>>();
-
-    let literals =
-        grammar.literals.iter().map(|name| format_ident!("{}", name)).collect::<Vec<_>>();
-
-    let tokens = grammar.tokens.iter().map(|name| format_ident!("{}", name)).collect::<Vec<_>>();
-
-    let nodes = grammar.nodes.iter().map(|name| format_ident!("{}", name)).collect::<Vec<_>>();
-
-    let ast = quote! {
-        #![allow(bad_style, missing_docs, unreachable_pub)]
-        /// The kind of syntax node, e.g. `IDENT`, `USE_KW`, or `STRUCT_DEF`.
-        #[derive(Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash, Debug)]
-        #[repr(u16)]
-        pub enum SyntaxKind {
-            // Technical SyntaxKinds: they appear temporally during parsing,
-            // but never end up in the final tree
-            #[doc(hidden)]
-            TOMBSTONE,
-            #[doc(hidden)]
-            EOF,
-            #(#punctuation,)*
-            #(#all_keywords,)*
-            #(#literals,)*
-            #(#tokens,)*
-            #(#nodes,)*
-
-            // Technical kind so that we can cast from u16 safely
-            #[doc(hidden)]
-            __LAST,
-        }
-        use self::SyntaxKind::*;
-
-        impl SyntaxKind {
-            pub fn is_keyword(self) -> bool {
-                match self {
-                    #(#all_keywords)|* => true,
-                    _ => false,
-                }
-            }
-
-            pub fn is_punct(self) -> bool {
-                match self {
-                    #(#punctuation)|* => true,
-                    _ => false,
-                }
-            }
-
-            pub fn is_literal(self) -> bool {
-                match self {
-                    #(#literals)|* => true,
-                    _ => false,
-                }
-            }
-
-            pub fn from_keyword(ident: &str) -> Option<SyntaxKind> {
-                let kw = match ident {
-                    #(#full_keywords_values => #full_keywords,)*
-                    _ => return None,
-                };
-                Some(kw)
-            }
-
-            pub fn from_char(c: char) -> Option<SyntaxKind> {
-                let tok = match c {
-                    #(#single_byte_tokens_values => #single_byte_tokens,)*
-                    _ => return None,
-                };
-                Some(tok)
-            }
-        }
-
-        #[macro_export]
-        macro_rules! T {
-            #((#punctuation_values) => { $crate::SyntaxKind::#punctuation };)*
-            #((#all_keywords_idents) => { $crate::SyntaxKind::#all_keywords };)*
-        }
-    };
-
-    reformat(ast)
-}
-
-fn reformat(text: impl std::fmt::Display) -> Result<String> {
-    let mut rustfmt = Command::new("rustfmt")
-        .arg("--config-path")
-        .arg(project_root().join("rustfmt.toml"))
-        .stdin(Stdio::piped())
-        .stdout(Stdio::piped())
-        .spawn()?;
-    write!(rustfmt.stdin.take().unwrap(), "{}", text)?;
-    let output = rustfmt.wait_with_output()?;
-    let stdout = String::from_utf8(output.stdout)?;
-    let preamble = "Generated file, do not edit by hand, see `crate/ra_tools/src/codegen`";
-    Ok(format!("//! {}\n\n{}", preamble, stdout))
-}
-
-#[derive(Deserialize, Debug)]
-struct Grammar {
-    punct: Vec<(String, String)>,
-    keywords: Vec<String>,
-    contextual_keywords: Vec<String>,
-    literals: Vec<String>,
-    tokens: Vec<String>,
-    nodes: Vec<String>,
-    ast: BTreeMap<String, AstNode>,
-}
-
-#[derive(Deserialize, Debug)]
-struct AstNode {
-    #[serde(default)]
-    #[serde(rename = "enum")]
-    variants: Vec<String>,
-
-    #[serde(default)]
-    traits: Vec<String>,
-    #[serde(default)]
-    collections: Vec<(String, String)>,
-    #[serde(default)]
-    options: Vec<Attr>,
-}
-
-#[derive(Deserialize, Debug)]
-#[serde(untagged)]
-enum Attr {
-    Type(String),
-    NameType(String, String),
-}
-
-fn to_upper_snake_case(s: &str) -> String {
-    let mut buf = String::with_capacity(s.len());
-    let mut prev_is_upper = None;
-    for c in s.chars() {
-        if c.is_ascii_uppercase() && prev_is_upper == Some(false) {
-            buf.push('_')
-        }
-        prev_is_upper = Some(c.is_ascii_uppercase());
-
-        buf.push(c.to_ascii_uppercase());
-    }
-    buf
-}
-
-fn to_lower_snake_case(s: &str) -> String {
-    let mut buf = String::with_capacity(s.len());
-    let mut prev_is_upper = None;
-    for c in s.chars() {
-        if c.is_ascii_uppercase() && prev_is_upper == Some(false) {
-            buf.push('_')
-        }
-        prev_is_upper = Some(c.is_ascii_uppercase());
-
-        buf.push(c.to_ascii_lowercase());
-    }
-    buf
-}
--- a/xtask/src/codegen.rs
+++ b/xtask/src/codegen.rs
@ -0,0 +1,46 @@
+//! We use code generation heavily in rust-analyzer.
+//!
+//! Rather then doing it via proc-macros, we use old-school way of just dumping
+//! the source code.
+//!
+//! This module's submodules define specific bits that we generate.
+
+mod gen_syntax;
+mod gen_parser_tests;
+
+use std::{fs, path::Path};
+
+use crate::Result;
+
+pub use self::{gen_parser_tests::generate_parser_tests, gen_syntax::generate_syntax};
+
+pub const GRAMMAR: &str = "crates/ra_syntax/src/grammar.ron";
+const GRAMMAR_DIR: &str = "crates/ra_parser/src/grammar";
+const OK_INLINE_TESTS_DIR: &str = "crates/ra_syntax/test_data/parser/inline/ok";
+const ERR_INLINE_TESTS_DIR: &str = "crates/ra_syntax/test_data/parser/inline/err";
+
+pub const SYNTAX_KINDS: &str = "crates/ra_parser/src/syntax_kind/generated.rs";
+pub const AST: &str = "crates/ra_syntax/src/ast/generated.rs";
+
+#[derive(Debug, PartialEq, Eq, Clone, Copy)]
+pub enum Mode {
+    Overwrite,
+    Verify,
+}
+
+/// A helper to update file on disk if it has changed.
+/// With verify = false,
+pub fn update(path: &Path, contents: &str, mode: Mode) -> Result<()> {
+    match fs::read_to_string(path) {
+        Ok(ref old_contents) if old_contents == contents => {
+            return Ok(());
+        }
+        _ => (),
+    }
+    if mode == Mode::Verify {
+        Err(format!("`{}` is not up-to-date", path.display()))?;
+    }
+    eprintln!("updating {}", path.display());
+    fs::write(path, contents)?;
+    Ok(())
+}
--- a/xtask/src/codegen/gen_parser_tests.rs
+++ b/xtask/src/codegen/gen_parser_tests.rs
@ -0,0 +1,150 @@
+//! This module greps parser's code for specially formatted comments and turnes
+//! them into tests.
+
+use std::{
+    collections::HashMap,
+    fs,
+    path::{Path, PathBuf},
+};
+
+use itertools::Itertools;
+
+use crate::{
+    codegen::{self, update, Mode},
+    project_root, Result,
+};
+
+pub fn generate_parser_tests(mode: Mode) -> Result<()> {
+    let tests = tests_from_dir(&project_root().join(Path::new(codegen::GRAMMAR_DIR)))?;
+    fn install_tests(tests: &HashMap<String, Test>, into: &str, mode: Mode) -> Result<()> {
+        let tests_dir = project_root().join(into);
+        if !tests_dir.is_dir() {
+            fs::create_dir_all(&tests_dir)?;
+        }
+        // ok is never actually read, but it needs to be specified to create a Test in existing_tests
+        let existing = existing_tests(&tests_dir, true)?;
+        for t in existing.keys().filter(|&t| !tests.contains_key(t)) {
+            panic!("Test is deleted: {}", t);
+        }
+
+        let mut new_idx = existing.len() + 1;
+        for (name, test) in tests {
+            let path = match existing.get(name) {
+                Some((path, _test)) => path.clone(),
+                None => {
+                    let file_name = format!("{:04}_{}.rs", new_idx, name);
+                    new_idx += 1;
+                    tests_dir.join(file_name)
+                }
+            };
+            update(&path, &test.text, mode)?;
+        }
+        Ok(())
+    }
+    install_tests(&tests.ok, codegen::OK_INLINE_TESTS_DIR, mode)?;
+    install_tests(&tests.err, codegen::ERR_INLINE_TESTS_DIR, mode)
+}
+
+#[derive(Debug)]
+struct Test {
+    pub name: String,
+    pub text: String,
+    pub ok: bool,
+}
+
+#[derive(Default, Debug)]
+struct Tests {
+    pub ok: HashMap<String, Test>,
+    pub err: HashMap<String, Test>,
+}
+
+fn collect_tests(s: &str) -> Vec<(usize, Test)> {
+    let mut res = vec![];
+    let prefix = "// ";
+    let comment_blocks = s
+        .lines()
+        .map(str::trim_start)
+        .enumerate()
+        .group_by(|(_idx, line)| line.starts_with(prefix));
+
+    'outer: for (is_comment, block) in comment_blocks.into_iter() {
+        if !is_comment {
+            continue;
+        }
+        let mut block = block.map(|(idx, line)| (idx, &line[prefix.len()..]));
+
+        let mut ok = true;
+        let (start_line, name) = loop {
+            match block.next() {
+                Some((idx, line)) if line.starts_with("test ") => {
+                    break (idx, line["test ".len()..].to_string());
+                }
+                Some((idx, line)) if line.starts_with("test_err ") => {
+                    ok = false;
+                    break (idx, line["test_err ".len()..].to_string());
+                }
+                Some(_) => (),
+                None => continue 'outer,
+            }
+        };
+        let text: String =
+            itertools::join(block.map(|(_, line)| line).chain(::std::iter::once("")), "\n");
+        assert!(!text.trim().is_empty() && text.ends_with('\n'));
+        res.push((start_line, Test { name, text, ok }))
+    }
+    res
+}
+
+fn tests_from_dir(dir: &Path) -> Result<Tests> {
+    let mut res = Tests::default();
+    for entry in ::walkdir::WalkDir::new(dir) {
+        let entry = entry.unwrap();
+        if !entry.file_type().is_file() {
+            continue;
+        }
+        if entry.path().extension().unwrap_or_default() != "rs" {
+            continue;
+        }
+        process_file(&mut res, entry.path())?;
+    }
+    let grammar_rs = dir.parent().unwrap().join("grammar.rs");
+    process_file(&mut res, &grammar_rs)?;
+    return Ok(res);
+    fn process_file(res: &mut Tests, path: &Path) -> Result<()> {
+        let text = fs::read_to_string(path)?;
+
+        for (_, test) in collect_tests(&text) {
+            if test.ok {
+                if let Some(old_test) = res.ok.insert(test.name.clone(), test) {
+                    Err(format!("Duplicate test: {}", old_test.name))?
+                }
+            } else {
+                if let Some(old_test) = res.err.insert(test.name.clone(), test) {
+                    Err(format!("Duplicate test: {}", old_test.name))?
+                }
+            }
+        }
+        Ok(())
+    }
+}
+
+fn existing_tests(dir: &Path, ok: bool) -> Result<HashMap<String, (PathBuf, Test)>> {
+    let mut res = HashMap::new();
+    for file in fs::read_dir(dir)? {
+        let file = file?;
+        let path = file.path();
+        if path.extension().unwrap_or_default() != "rs" {
+            continue;
+        }
+        let name = {
+            let file_name = path.file_name().unwrap().to_str().unwrap();
+            file_name[5..file_name.len() - 3].to_string()
+        };
+        let text = fs::read_to_string(&path)?;
+        let test = Test { name: name.clone(), text, ok };
+        if let Some(old) = res.insert(name, (path, test)) {
+            println!("Duplicate test: {:?}", old);
+        }
+    }
+    Ok(res)
+}
--- a/xtask/src/codegen/gen_syntax.rs
+++ b/xtask/src/codegen/gen_syntax.rs
@ -0,0 +1,354 @@
+//! This module generate AST datatype used by rust-analyzer.
+//!
+//! Specifically, it generates the `SyntaxKind` enum and a number of newtype
+//! wrappers around `SyntaxNode` which implement `ra_syntax::AstNode`.
+
+use std::{
+    collections::BTreeMap,
+    fs,
+    io::Write,
+    process::{Command, Stdio},
+};
+
+use proc_macro2::{Punct, Spacing};
+use quote::{format_ident, quote};
+use ron;
+use serde::Deserialize;
+
+use crate::{
+    codegen::{self, update, Mode},
+    project_root, Result,
+};
+
+pub fn generate_syntax(mode: Mode) -> Result<()> {
+    let grammar = project_root().join(codegen::GRAMMAR);
+    let grammar: Grammar = {
+        let text = fs::read_to_string(grammar)?;
+        ron::de::from_str(&text)?
+    };
+
+    let syntax_kinds_file = project_root().join(codegen::SYNTAX_KINDS);
+    let syntax_kinds = generate_syntax_kinds(&grammar)?;
+    update(syntax_kinds_file.as_path(), &syntax_kinds, mode)?;
+
+    let ast_file = project_root().join(codegen::AST);
+    let ast = generate_ast(&grammar)?;
+    update(ast_file.as_path(), &ast, mode)?;
+
+    Ok(())
+}
+
+fn generate_ast(grammar: &Grammar) -> Result<String> {
+    let nodes = grammar.ast.iter().map(|(name, ast_node)| {
+        let variants =
+            ast_node.variants.iter().map(|var| format_ident!("{}", var)).collect::<Vec<_>>();
+        let name = format_ident!("{}", name);
+
+        let adt = if variants.is_empty() {
+            let kind = format_ident!("{}", to_upper_snake_case(&name.to_string()));
+            quote! {
+                #[derive(Debug, Clone, PartialEq, Eq, Hash)]
+                pub struct #name {
+                    pub(crate) syntax: SyntaxNode,
+                }
+
+                impl AstNode for #name {
+                    fn can_cast(kind: SyntaxKind) -> bool {
+                        match kind {
+                            #kind => true,
+                            _ => false,
+                        }
+                    }
+                    fn cast(syntax: SyntaxNode) -> Option<Self> {
+                        if Self::can_cast(syntax.kind()) { Some(Self { syntax }) } else { None }
+                    }
+                    fn syntax(&self) -> &SyntaxNode { &self.syntax }
+                }
+            }
+        } else {
+            let kinds = variants
+                .iter()
+                .map(|name| format_ident!("{}", to_upper_snake_case(&name.to_string())))
+                .collect::<Vec<_>>();
+
+            quote! {
+                #[derive(Debug, Clone, PartialEq, Eq, Hash)]
+                pub enum #name {
+                    #(#variants(#variants),)*
+                }
+
+                #(
+                impl From<#variants> for #name {
+                    fn from(node: #variants) -> #name {
+                        #name::#variants(node)
+                    }
+                }
+                )*
+
+                impl AstNode for #name {
+                    fn can_cast(kind: SyntaxKind) -> bool {
+                        match kind {
+                            #(#kinds)|* => true,
+                            _ => false,
+                        }
+                    }
+                    fn cast(syntax: SyntaxNode) -> Option<Self> {
+                        let res = match syntax.kind() {
+                            #(
+                            #kinds => #name::#variants(#variants { syntax }),
+                            )*
+                            _ => return None,
+                        };
+                        Some(res)
+                    }
+                    fn syntax(&self) -> &SyntaxNode {
+                        match self {
+                            #(
+                            #name::#variants(it) => &it.syntax,
+                            )*
+                        }
+                    }
+                }
+            }
+        };
+
+        let traits = ast_node.traits.iter().map(|trait_name| {
+            let trait_name = format_ident!("{}", trait_name);
+            quote!(impl ast::#trait_name for #name {})
+        });
+
+        let collections = ast_node.collections.iter().map(|(name, kind)| {
+            let method_name = format_ident!("{}", name);
+            let kind = format_ident!("{}", kind);
+            quote! {
+                pub fn #method_name(&self) -> AstChildren<#kind> {
+                    AstChildren::new(&self.syntax)
+                }
+            }
+        });
+
+        let options = ast_node.options.iter().map(|attr| {
+            let method_name = match attr {
+                Attr::Type(t) => format_ident!("{}", to_lower_snake_case(&t)),
+                Attr::NameType(n, _) => format_ident!("{}", n),
+            };
+            let ty = match attr {
+                Attr::Type(t) | Attr::NameType(_, t) => format_ident!("{}", t),
+            };
+            quote! {
+                pub fn #method_name(&self) -> Option<#ty> {
+                    AstChildren::new(&self.syntax).next()
+                }
+            }
+        });
+
+        quote! {
+            #adt
+
+            #(#traits)*
+
+            impl #name {
+                #(#collections)*
+                #(#options)*
+            }
+        }
+    });
+
+    let ast = quote! {
+        use crate::{
+            SyntaxNode, SyntaxKind::{self, *},
+            ast::{self, AstNode, AstChildren},
+        };
+
+        #(#nodes)*
+    };
+
+    let pretty = reformat(ast)?;
+    Ok(pretty)
+}
+
+fn generate_syntax_kinds(grammar: &Grammar) -> Result<String> {
+    let (single_byte_tokens_values, single_byte_tokens): (Vec<_>, Vec<_>) = grammar
+        .punct
+        .iter()
+        .filter(|(token, _name)| token.len() == 1)
+        .map(|(token, name)| (token.chars().next().unwrap(), format_ident!("{}", name)))
+        .unzip();
+
+    let punctuation_values = grammar.punct.iter().map(|(token, _name)| {
+        if "{}[]()".contains(token) {
+            let c = token.chars().next().unwrap();
+            quote! { #c }
+        } else {
+            let cs = token.chars().map(|c| Punct::new(c, Spacing::Joint));
+            quote! { #(#cs)* }
+        }
+    });
+    let punctuation =
+        grammar.punct.iter().map(|(_token, name)| format_ident!("{}", name)).collect::<Vec<_>>();
+
+    let full_keywords_values = &grammar.keywords;
+    let full_keywords =
+        full_keywords_values.iter().map(|kw| format_ident!("{}_KW", to_upper_snake_case(&kw)));
+
+    let all_keywords_values =
+        grammar.keywords.iter().chain(grammar.contextual_keywords.iter()).collect::<Vec<_>>();
+    let all_keywords_idents = all_keywords_values.iter().map(|kw| format_ident!("{}", kw));
+    let all_keywords = all_keywords_values
+        .iter()
+        .map(|name| format_ident!("{}_KW", to_upper_snake_case(&name)))
+        .collect::<Vec<_>>();
+
+    let literals =
+        grammar.literals.iter().map(|name| format_ident!("{}", name)).collect::<Vec<_>>();
+
+    let tokens = grammar.tokens.iter().map(|name| format_ident!("{}", name)).collect::<Vec<_>>();
+
+    let nodes = grammar.nodes.iter().map(|name| format_ident!("{}", name)).collect::<Vec<_>>();
+
+    let ast = quote! {
+        #![allow(bad_style, missing_docs, unreachable_pub)]
+        /// The kind of syntax node, e.g. `IDENT`, `USE_KW`, or `STRUCT_DEF`.
+        #[derive(Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash, Debug)]
+        #[repr(u16)]
+        pub enum SyntaxKind {
+            // Technical SyntaxKinds: they appear temporally during parsing,
+            // but never end up in the final tree
+            #[doc(hidden)]
+            TOMBSTONE,
+            #[doc(hidden)]
+            EOF,
+            #(#punctuation,)*
+            #(#all_keywords,)*
+            #(#literals,)*
+            #(#tokens,)*
+            #(#nodes,)*
+
+            // Technical kind so that we can cast from u16 safely
+            #[doc(hidden)]
+            __LAST,
+        }
+        use self::SyntaxKind::*;
+
+        impl SyntaxKind {
+            pub fn is_keyword(self) -> bool {
+                match self {
+                    #(#all_keywords)|* => true,
+                    _ => false,
+                }
+            }
+
+            pub fn is_punct(self) -> bool {
+                match self {
+                    #(#punctuation)|* => true,
+                    _ => false,
+                }
+            }
+
+            pub fn is_literal(self) -> bool {
+                match self {
+                    #(#literals)|* => true,
+                    _ => false,
+                }
+            }
+
+            pub fn from_keyword(ident: &str) -> Option<SyntaxKind> {
+                let kw = match ident {
+                    #(#full_keywords_values => #full_keywords,)*
+                    _ => return None,
+                };
+                Some(kw)
+            }
+
+            pub fn from_char(c: char) -> Option<SyntaxKind> {
+                let tok = match c {
+                    #(#single_byte_tokens_values => #single_byte_tokens,)*
+                    _ => return None,
+                };
+                Some(tok)
+            }
+        }
+
+        #[macro_export]
+        macro_rules! T {
+            #((#punctuation_values) => { $crate::SyntaxKind::#punctuation };)*
+            #((#all_keywords_idents) => { $crate::SyntaxKind::#all_keywords };)*
+        }
+    };
+
+    reformat(ast)
+}
+
+fn reformat(text: impl std::fmt::Display) -> Result<String> {
+    let mut rustfmt = Command::new("rustfmt")
+        .arg("--config-path")
+        .arg(project_root().join("rustfmt.toml"))
+        .stdin(Stdio::piped())
+        .stdout(Stdio::piped())
+        .spawn()?;
+    write!(rustfmt.stdin.take().unwrap(), "{}", text)?;
+    let output = rustfmt.wait_with_output()?;
+    let stdout = String::from_utf8(output.stdout)?;
+    let preamble = "Generated file, do not edit by hand, see `crate/ra_tools/src/codegen`";
+    Ok(format!("//! {}\n\n{}", preamble, stdout))
+}
+
+#[derive(Deserialize, Debug)]
+struct Grammar {
+    punct: Vec<(String, String)>,
+    keywords: Vec<String>,
+    contextual_keywords: Vec<String>,
+    literals: Vec<String>,
+    tokens: Vec<String>,
+    nodes: Vec<String>,
+    ast: BTreeMap<String, AstNode>,
+}
+
+#[derive(Deserialize, Debug)]
+struct AstNode {
+    #[serde(default)]
+    #[serde(rename = "enum")]
+    variants: Vec<String>,
+
+    #[serde(default)]
+    traits: Vec<String>,
+    #[serde(default)]
+    collections: Vec<(String, String)>,
+    #[serde(default)]
+    options: Vec<Attr>,
+}
+
+#[derive(Deserialize, Debug)]
+#[serde(untagged)]
+enum Attr {
+    Type(String),
+    NameType(String, String),
+}
+
+fn to_upper_snake_case(s: &str) -> String {
+    let mut buf = String::with_capacity(s.len());
+    let mut prev_is_upper = None;
+    for c in s.chars() {
+        if c.is_ascii_uppercase() && prev_is_upper == Some(false) {
+            buf.push('_')
+        }
+        prev_is_upper = Some(c.is_ascii_uppercase());
+
+        buf.push(c.to_ascii_uppercase());
+    }
+    buf
+}
+
+fn to_lower_snake_case(s: &str) -> String {
+    let mut buf = String::with_capacity(s.len());
+    let mut prev_is_upper = None;
+    for c in s.chars() {
+        if c.is_ascii_uppercase() && prev_is_upper == Some(false) {
+            buf.push('_')
+        }
+        prev_is_upper = Some(c.is_ascii_uppercase());
+
+        buf.push(c.to_ascii_lowercase());
+    }
+    buf
+}
--- a/xtask/src/lib.rs
+++ b/xtask/src/lib.rs
@ -1,9 +1,8 @@
 //! FIXME: write short doc here

-mod boilerplate_gen;
+pub mod codegen;

 use std::{
-    collections::HashMap,
    error::Error,
    fs,
    io::{Error as IoError, ErrorKind},
@ -11,72 +10,12 @@ use std::{
    process::{Command, Output, Stdio},
 };

-use itertools::Itertools;
-
-pub use self::boilerplate_gen::generate_boilerplate;
+use crate::codegen::Mode;

 pub type Result<T> = std::result::Result<T, Box<dyn Error>>;

-pub const GRAMMAR: &str = "crates/ra_syntax/src/grammar.ron";
-const GRAMMAR_DIR: &str = "crates/ra_parser/src/grammar";
-const OK_INLINE_TESTS_DIR: &str = "crates/ra_syntax/test_data/parser/inline/ok";
-const ERR_INLINE_TESTS_DIR: &str = "crates/ra_syntax/test_data/parser/inline/err";
-
-pub const SYNTAX_KINDS: &str = "crates/ra_parser/src/syntax_kind/generated.rs";
-pub const AST: &str = "crates/ra_syntax/src/ast/generated.rs";
 const TOOLCHAIN: &str = "stable";

-#[derive(Debug, PartialEq, Eq, Clone, Copy)]
-pub enum Mode {
-    Overwrite,
-    Verify,
-}
-pub use Mode::*;
-
-#[derive(Debug)]
-pub struct Test {
-    pub name: String,
-    pub text: String,
-    pub ok: bool,
-}
-
-pub fn collect_tests(s: &str) -> Vec<(usize, Test)> {
-    let mut res = vec![];
-    let prefix = "// ";
-    let comment_blocks = s
-        .lines()
-        .map(str::trim_start)
-        .enumerate()
-        .group_by(|(_idx, line)| line.starts_with(prefix));
-
-    'outer: for (is_comment, block) in comment_blocks.into_iter() {
-        if !is_comment {
-            continue;
-        }
-        let mut block = block.map(|(idx, line)| (idx, &line[prefix.len()..]));
-
-        let mut ok = true;
-        let (start_line, name) = loop {
-            match block.next() {
-                Some((idx, line)) if line.starts_with("test ") => {
-                    break (idx, line["test ".len()..].to_string());
-                }
-                Some((idx, line)) if line.starts_with("test_err ") => {
-                    ok = false;
-                    break (idx, line["test_err ".len()..].to_string());
-                }
-                Some(_) => (),
-                None => continue 'outer,
-            }
-        };
-        let text: String =
-            itertools::join(block.map(|(_, line)| line).chain(::std::iter::once("")), "\n");
-        assert!(!text.trim().is_empty() && text.ends_with('\n'));
-        res.push((start_line, Test { name, text, ok }))
-    }
-    res
-}
-
 pub fn project_root() -> PathBuf {
    Path::new(&env!("CARGO_MANIFEST_DIR")).ancestors().nth(1).unwrap().to_path_buf()
 }
@ -126,7 +65,7 @@ pub fn run_rustfmt(mode: Mode) -> Result<()> {
        _ => install_rustfmt()?,
    };

-    if mode == Verify {
+    if mode == Mode::Verify {
        run(&format!("rustup run {} -- cargo fmt -- --check", TOOLCHAIN), ".")?;
    } else {
        run(&format!("rustup run {} -- cargo fmt", TOOLCHAIN), ".")?;
@ -206,37 +145,6 @@ pub fn run_fuzzer() -> Result<()> {
    run("rustup run nightly -- cargo fuzz run parser", "./crates/ra_syntax")
 }

-pub fn gen_tests(mode: Mode) -> Result<()> {
-    let tests = tests_from_dir(&project_root().join(Path::new(GRAMMAR_DIR)))?;
-    fn install_tests(tests: &HashMap<String, Test>, into: &str, mode: Mode) -> Result<()> {
-        let tests_dir = project_root().join(into);
-        if !tests_dir.is_dir() {
-            fs::create_dir_all(&tests_dir)?;
-        }
-        // ok is never actually read, but it needs to be specified to create a Test in existing_tests
-        let existing = existing_tests(&tests_dir, true)?;
-        for t in existing.keys().filter(|&t| !tests.contains_key(t)) {
-            panic!("Test is deleted: {}", t);
-        }
-
-        let mut new_idx = existing.len() + 1;
-        for (name, test) in tests {
-            let path = match existing.get(name) {
-                Some((path, _test)) => path.clone(),
-                None => {
-                    let file_name = format!("{:04}_{}.rs", new_idx, name);
-                    new_idx += 1;
-                    tests_dir.join(file_name)
-                }
-            };
-            update(&path, &test.text, mode)?;
-        }
-        Ok(())
-    }
-    install_tests(&tests.ok, OK_INLINE_TESTS_DIR, mode)?;
-    install_tests(&tests.err, ERR_INLINE_TESTS_DIR, mode)
-}
-
 fn do_run<F>(cmdline: &str, dir: &str, mut f: F) -> Result<Output>
 where
    F: FnMut(&mut Command),
@ -253,80 +161,3 @@ where
    }
    Ok(output)
 }
-
-#[derive(Default, Debug)]
-struct Tests {
-    pub ok: HashMap<String, Test>,
-    pub err: HashMap<String, Test>,
-}
-
-fn tests_from_dir(dir: &Path) -> Result<Tests> {
-    let mut res = Tests::default();
-    for entry in ::walkdir::WalkDir::new(dir) {
-        let entry = entry.unwrap();
-        if !entry.file_type().is_file() {
-            continue;
-        }
-        if entry.path().extension().unwrap_or_default() != "rs" {
-            continue;
-        }
-        process_file(&mut res, entry.path())?;
-    }
-    let grammar_rs = dir.parent().unwrap().join("grammar.rs");
-    process_file(&mut res, &grammar_rs)?;
-    return Ok(res);
-    fn process_file(res: &mut Tests, path: &Path) -> Result<()> {
-        let text = fs::read_to_string(path)?;
-
-        for (_, test) in collect_tests(&text) {
-            if test.ok {
-                if let Some(old_test) = res.ok.insert(test.name.clone(), test) {
-                    Err(format!("Duplicate test: {}", old_test.name))?
-                }
-            } else {
-                if let Some(old_test) = res.err.insert(test.name.clone(), test) {
-                    Err(format!("Duplicate test: {}", old_test.name))?
-                }
-            }
-        }
-        Ok(())
-    }
-}
-
-fn existing_tests(dir: &Path, ok: bool) -> Result<HashMap<String, (PathBuf, Test)>> {
-    let mut res = HashMap::new();
-    for file in fs::read_dir(dir)? {
-        let file = file?;
-        let path = file.path();
-        if path.extension().unwrap_or_default() != "rs" {
-            continue;
-        }
-        let name = {
-            let file_name = path.file_name().unwrap().to_str().unwrap();
-            file_name[5..file_name.len() - 3].to_string()
-        };
-        let text = fs::read_to_string(&path)?;
-        let test = Test { name: name.clone(), text, ok };
-        if let Some(old) = res.insert(name, (path, test)) {
-            println!("Duplicate test: {:?}", old);
-        }
-    }
-    Ok(res)
-}
-
-/// A helper to update file on disk if it has changed.
-/// With verify = false,
-pub fn update(path: &Path, contents: &str, mode: Mode) -> Result<()> {
-    match fs::read_to_string(path) {
-        Ok(ref old_contents) if old_contents == contents => {
-            return Ok(());
-        }
-        _ => (),
-    }
-    if mode == Verify {
-        Err(format!("`{}` is not up-to-date", path.display()))?;
-    }
-    eprintln!("updating {}", path.display());
-    fs::write(path, contents)?;
-    Ok(())
-}
--- a/xtask/src/main.rs
+++ b/xtask/src/main.rs
@ -7,8 +7,8 @@ use core::str;
 use pico_args::Arguments;
 use std::{env, path::PathBuf};
 use xtask::{
-    gen_tests, generate_boilerplate, install_format_hook, run, run_clippy, run_fuzzer, run_rustfmt,
-    run_with_output, Cmd, Overwrite, Result,
+    codegen::{self, Mode},
+    install_format_hook, run, run_clippy, run_fuzzer, run_rustfmt, run_with_output, Cmd, Result,
 };

 // Latest stable, feel free to send a PR if this lags behind.
@ -62,21 +62,21 @@ fn main() -> Result<()> {
                help::print_no_param_subcommand_help(&subcommand);
                return Ok(());
            }
-            gen_tests(Overwrite)?
+            codegen::generate_parser_tests(Mode::Overwrite)?
        }
        "codegen" => {
            if matches.contains(["-h", "--help"]) {
                help::print_no_param_subcommand_help(&subcommand);
                return Ok(());
            }
-            generate_boilerplate(Overwrite)?
+            codegen::generate_syntax(Mode::Overwrite)?
        }
        "format" => {
            if matches.contains(["-h", "--help"]) {
                help::print_no_param_subcommand_help(&subcommand);
                return Ok(());
            }
-            run_rustfmt(Overwrite)?
+            run_rustfmt(Mode::Overwrite)?
        }
        "format-hook" => {
            if matches.contains(["-h", "--help"]) {
--- a/xtask/tests/tidy-tests/cli.rs
+++ b/xtask/tests/tidy-tests/cli.rs
@ -1,23 +1,26 @@
 use walkdir::WalkDir;
-use xtask::{gen_tests, generate_boilerplate, project_root, run_rustfmt, Verify};
+use xtask::{
+    codegen::{self, Mode},
+    project_root, run_rustfmt,
+};

 #[test]
 fn generated_grammar_is_fresh() {
-    if let Err(error) = generate_boilerplate(Verify) {
+    if let Err(error) = codegen::generate_syntax(Mode::Verify) {
        panic!("{}. Please update it by running `cargo xtask codegen`", error);
    }
 }

 #[test]
 fn generated_tests_are_fresh() {
-    if let Err(error) = gen_tests(Verify) {
+    if let Err(error) = codegen::generate_parser_tests(Mode::Verify) {
        panic!("{}. Please update tests by running `cargo xtask gen-tests`", error);
    }
 }

 #[test]
 fn check_code_formatting() {
-    if let Err(error) = run_rustfmt(Verify) {
+    if let Err(error) = run_rustfmt(Mode::Verify) {
        panic!("{}. Please format the code by running `cargo format`", error);
    }
 }