From b5f13d8d51ef9107363a60b894a741ab596921ce Mon Sep 17 00:00:00 2001 From: Aleksey Kladov Date: Wed, 23 Oct 2019 18:13:40 +0300 Subject: [PATCH] xtask: move codegen to a module --- crates/ra_assists/src/assists/early_return.rs | 4 +- xtask/src/bin/pre-commit.rs | 4 +- xtask/src/boilerplate_gen.rs | 348 ----------------- xtask/src/codegen.rs | 46 +++ xtask/src/codegen/gen_parser_tests.rs | 150 ++++++++ xtask/src/codegen/gen_syntax.rs | 354 ++++++++++++++++++ xtask/src/lib.rs | 175 +-------- xtask/src/main.rs | 10 +- xtask/tests/tidy-tests/cli.rs | 11 +- 9 files changed, 569 insertions(+), 533 deletions(-) create mode 100644 xtask/src/codegen.rs create mode 100644 xtask/src/codegen/gen_parser_tests.rs create mode 100644 xtask/src/codegen/gen_syntax.rs diff --git a/crates/ra_assists/src/assists/early_return.rs b/crates/ra_assists/src/assists/early_return.rs index 9c95adc53c..f7d7e12e73 100644 --- a/crates/ra_assists/src/assists/early_return.rs +++ b/crates/ra_assists/src/assists/early_return.rs @@ -2,7 +2,7 @@ //! //! Replace a large conditional with a guarded return. //! -//! ```notrust +//! ```text //! fn <|>main() { //! if cond { //! foo(); @@ -11,7 +11,7 @@ //! } //! ``` //! -> -//! ```notrust +//! ```text //! fn main() { //! if !cond { //! return; diff --git a/xtask/src/bin/pre-commit.rs b/xtask/src/bin/pre-commit.rs index 4ee864756d..cc6ccb25ee 100644 --- a/xtask/src/bin/pre-commit.rs +++ b/xtask/src/bin/pre-commit.rs @@ -2,10 +2,10 @@ use std::process::Command; -use xtask::{project_root, run, run_rustfmt, Overwrite, Result}; +use xtask::{codegen::Mode, project_root, run, run_rustfmt, Result}; fn main() -> Result<()> { - run_rustfmt(Overwrite)?; + run_rustfmt(Mode::Overwrite)?; update_staged() } diff --git a/xtask/src/boilerplate_gen.rs b/xtask/src/boilerplate_gen.rs index 39f1cae665..e69de29bb2 100644 --- a/xtask/src/boilerplate_gen.rs +++ b/xtask/src/boilerplate_gen.rs @@ -1,348 +0,0 @@ -//! FIXME: write short doc here - -use std::{ - collections::BTreeMap, - fs, - io::Write, - process::{Command, Stdio}, -}; - -use proc_macro2::{Punct, Spacing}; -use quote::{format_ident, quote}; -use ron; -use serde::Deserialize; - -use crate::{project_root, update, Mode, Result, AST, GRAMMAR, SYNTAX_KINDS}; - -pub fn generate_boilerplate(mode: Mode) -> Result<()> { - let grammar = project_root().join(GRAMMAR); - let grammar: Grammar = { - let text = fs::read_to_string(grammar)?; - ron::de::from_str(&text)? - }; - - let syntax_kinds_file = project_root().join(SYNTAX_KINDS); - let syntax_kinds = generate_syntax_kinds(&grammar)?; - update(syntax_kinds_file.as_path(), &syntax_kinds, mode)?; - - let ast_file = project_root().join(AST); - let ast = generate_ast(&grammar)?; - update(ast_file.as_path(), &ast, mode)?; - - Ok(()) -} - -fn generate_ast(grammar: &Grammar) -> Result { - let nodes = grammar.ast.iter().map(|(name, ast_node)| { - let variants = - ast_node.variants.iter().map(|var| format_ident!("{}", var)).collect::>(); - let name = format_ident!("{}", name); - - let adt = if variants.is_empty() { - let kind = format_ident!("{}", to_upper_snake_case(&name.to_string())); - quote! { - #[derive(Debug, Clone, PartialEq, Eq, Hash)] - pub struct #name { - pub(crate) syntax: SyntaxNode, - } - - impl AstNode for #name { - fn can_cast(kind: SyntaxKind) -> bool { - match kind { - #kind => true, - _ => false, - } - } - fn cast(syntax: SyntaxNode) -> Option { - if Self::can_cast(syntax.kind()) { Some(Self { syntax }) } else { None } - } - fn syntax(&self) -> &SyntaxNode { &self.syntax } - } - } - } else { - let kinds = variants - .iter() - .map(|name| format_ident!("{}", to_upper_snake_case(&name.to_string()))) - .collect::>(); - - quote! { - #[derive(Debug, Clone, PartialEq, Eq, Hash)] - pub enum #name { - #(#variants(#variants),)* - } - - #( - impl From<#variants> for #name { - fn from(node: #variants) -> #name { - #name::#variants(node) - } - } - )* - - impl AstNode for #name { - fn can_cast(kind: SyntaxKind) -> bool { - match kind { - #(#kinds)|* => true, - _ => false, - } - } - fn cast(syntax: SyntaxNode) -> Option { - let res = match syntax.kind() { - #( - #kinds => #name::#variants(#variants { syntax }), - )* - _ => return None, - }; - Some(res) - } - fn syntax(&self) -> &SyntaxNode { - match self { - #( - #name::#variants(it) => &it.syntax, - )* - } - } - } - } - }; - - let traits = ast_node.traits.iter().map(|trait_name| { - let trait_name = format_ident!("{}", trait_name); - quote!(impl ast::#trait_name for #name {}) - }); - - let collections = ast_node.collections.iter().map(|(name, kind)| { - let method_name = format_ident!("{}", name); - let kind = format_ident!("{}", kind); - quote! { - pub fn #method_name(&self) -> AstChildren<#kind> { - AstChildren::new(&self.syntax) - } - } - }); - - let options = ast_node.options.iter().map(|attr| { - let method_name = match attr { - Attr::Type(t) => format_ident!("{}", to_lower_snake_case(&t)), - Attr::NameType(n, _) => format_ident!("{}", n), - }; - let ty = match attr { - Attr::Type(t) | Attr::NameType(_, t) => format_ident!("{}", t), - }; - quote! { - pub fn #method_name(&self) -> Option<#ty> { - AstChildren::new(&self.syntax).next() - } - } - }); - - quote! { - #adt - - #(#traits)* - - impl #name { - #(#collections)* - #(#options)* - } - } - }); - - let ast = quote! { - use crate::{ - SyntaxNode, SyntaxKind::{self, *}, - ast::{self, AstNode, AstChildren}, - }; - - #(#nodes)* - }; - - let pretty = reformat(ast)?; - Ok(pretty) -} - -fn generate_syntax_kinds(grammar: &Grammar) -> Result { - let (single_byte_tokens_values, single_byte_tokens): (Vec<_>, Vec<_>) = grammar - .punct - .iter() - .filter(|(token, _name)| token.len() == 1) - .map(|(token, name)| (token.chars().next().unwrap(), format_ident!("{}", name))) - .unzip(); - - let punctuation_values = grammar.punct.iter().map(|(token, _name)| { - if "{}[]()".contains(token) { - let c = token.chars().next().unwrap(); - quote! { #c } - } else { - let cs = token.chars().map(|c| Punct::new(c, Spacing::Joint)); - quote! { #(#cs)* } - } - }); - let punctuation = - grammar.punct.iter().map(|(_token, name)| format_ident!("{}", name)).collect::>(); - - let full_keywords_values = &grammar.keywords; - let full_keywords = - full_keywords_values.iter().map(|kw| format_ident!("{}_KW", to_upper_snake_case(&kw))); - - let all_keywords_values = - grammar.keywords.iter().chain(grammar.contextual_keywords.iter()).collect::>(); - let all_keywords_idents = all_keywords_values.iter().map(|kw| format_ident!("{}", kw)); - let all_keywords = all_keywords_values - .iter() - .map(|name| format_ident!("{}_KW", to_upper_snake_case(&name))) - .collect::>(); - - let literals = - grammar.literals.iter().map(|name| format_ident!("{}", name)).collect::>(); - - let tokens = grammar.tokens.iter().map(|name| format_ident!("{}", name)).collect::>(); - - let nodes = grammar.nodes.iter().map(|name| format_ident!("{}", name)).collect::>(); - - let ast = quote! { - #![allow(bad_style, missing_docs, unreachable_pub)] - /// The kind of syntax node, e.g. `IDENT`, `USE_KW`, or `STRUCT_DEF`. - #[derive(Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash, Debug)] - #[repr(u16)] - pub enum SyntaxKind { - // Technical SyntaxKinds: they appear temporally during parsing, - // but never end up in the final tree - #[doc(hidden)] - TOMBSTONE, - #[doc(hidden)] - EOF, - #(#punctuation,)* - #(#all_keywords,)* - #(#literals,)* - #(#tokens,)* - #(#nodes,)* - - // Technical kind so that we can cast from u16 safely - #[doc(hidden)] - __LAST, - } - use self::SyntaxKind::*; - - impl SyntaxKind { - pub fn is_keyword(self) -> bool { - match self { - #(#all_keywords)|* => true, - _ => false, - } - } - - pub fn is_punct(self) -> bool { - match self { - #(#punctuation)|* => true, - _ => false, - } - } - - pub fn is_literal(self) -> bool { - match self { - #(#literals)|* => true, - _ => false, - } - } - - pub fn from_keyword(ident: &str) -> Option { - let kw = match ident { - #(#full_keywords_values => #full_keywords,)* - _ => return None, - }; - Some(kw) - } - - pub fn from_char(c: char) -> Option { - let tok = match c { - #(#single_byte_tokens_values => #single_byte_tokens,)* - _ => return None, - }; - Some(tok) - } - } - - #[macro_export] - macro_rules! T { - #((#punctuation_values) => { $crate::SyntaxKind::#punctuation };)* - #((#all_keywords_idents) => { $crate::SyntaxKind::#all_keywords };)* - } - }; - - reformat(ast) -} - -fn reformat(text: impl std::fmt::Display) -> Result { - let mut rustfmt = Command::new("rustfmt") - .arg("--config-path") - .arg(project_root().join("rustfmt.toml")) - .stdin(Stdio::piped()) - .stdout(Stdio::piped()) - .spawn()?; - write!(rustfmt.stdin.take().unwrap(), "{}", text)?; - let output = rustfmt.wait_with_output()?; - let stdout = String::from_utf8(output.stdout)?; - let preamble = "Generated file, do not edit by hand, see `crate/ra_tools/src/codegen`"; - Ok(format!("//! {}\n\n{}", preamble, stdout)) -} - -#[derive(Deserialize, Debug)] -struct Grammar { - punct: Vec<(String, String)>, - keywords: Vec, - contextual_keywords: Vec, - literals: Vec, - tokens: Vec, - nodes: Vec, - ast: BTreeMap, -} - -#[derive(Deserialize, Debug)] -struct AstNode { - #[serde(default)] - #[serde(rename = "enum")] - variants: Vec, - - #[serde(default)] - traits: Vec, - #[serde(default)] - collections: Vec<(String, String)>, - #[serde(default)] - options: Vec, -} - -#[derive(Deserialize, Debug)] -#[serde(untagged)] -enum Attr { - Type(String), - NameType(String, String), -} - -fn to_upper_snake_case(s: &str) -> String { - let mut buf = String::with_capacity(s.len()); - let mut prev_is_upper = None; - for c in s.chars() { - if c.is_ascii_uppercase() && prev_is_upper == Some(false) { - buf.push('_') - } - prev_is_upper = Some(c.is_ascii_uppercase()); - - buf.push(c.to_ascii_uppercase()); - } - buf -} - -fn to_lower_snake_case(s: &str) -> String { - let mut buf = String::with_capacity(s.len()); - let mut prev_is_upper = None; - for c in s.chars() { - if c.is_ascii_uppercase() && prev_is_upper == Some(false) { - buf.push('_') - } - prev_is_upper = Some(c.is_ascii_uppercase()); - - buf.push(c.to_ascii_lowercase()); - } - buf -} diff --git a/xtask/src/codegen.rs b/xtask/src/codegen.rs new file mode 100644 index 0000000000..948b867192 --- /dev/null +++ b/xtask/src/codegen.rs @@ -0,0 +1,46 @@ +//! We use code generation heavily in rust-analyzer. +//! +//! Rather then doing it via proc-macros, we use old-school way of just dumping +//! the source code. +//! +//! This module's submodules define specific bits that we generate. + +mod gen_syntax; +mod gen_parser_tests; + +use std::{fs, path::Path}; + +use crate::Result; + +pub use self::{gen_parser_tests::generate_parser_tests, gen_syntax::generate_syntax}; + +pub const GRAMMAR: &str = "crates/ra_syntax/src/grammar.ron"; +const GRAMMAR_DIR: &str = "crates/ra_parser/src/grammar"; +const OK_INLINE_TESTS_DIR: &str = "crates/ra_syntax/test_data/parser/inline/ok"; +const ERR_INLINE_TESTS_DIR: &str = "crates/ra_syntax/test_data/parser/inline/err"; + +pub const SYNTAX_KINDS: &str = "crates/ra_parser/src/syntax_kind/generated.rs"; +pub const AST: &str = "crates/ra_syntax/src/ast/generated.rs"; + +#[derive(Debug, PartialEq, Eq, Clone, Copy)] +pub enum Mode { + Overwrite, + Verify, +} + +/// A helper to update file on disk if it has changed. +/// With verify = false, +pub fn update(path: &Path, contents: &str, mode: Mode) -> Result<()> { + match fs::read_to_string(path) { + Ok(ref old_contents) if old_contents == contents => { + return Ok(()); + } + _ => (), + } + if mode == Mode::Verify { + Err(format!("`{}` is not up-to-date", path.display()))?; + } + eprintln!("updating {}", path.display()); + fs::write(path, contents)?; + Ok(()) +} diff --git a/xtask/src/codegen/gen_parser_tests.rs b/xtask/src/codegen/gen_parser_tests.rs new file mode 100644 index 0000000000..e09b6fcfec --- /dev/null +++ b/xtask/src/codegen/gen_parser_tests.rs @@ -0,0 +1,150 @@ +//! This module greps parser's code for specially formatted comments and turnes +//! them into tests. + +use std::{ + collections::HashMap, + fs, + path::{Path, PathBuf}, +}; + +use itertools::Itertools; + +use crate::{ + codegen::{self, update, Mode}, + project_root, Result, +}; + +pub fn generate_parser_tests(mode: Mode) -> Result<()> { + let tests = tests_from_dir(&project_root().join(Path::new(codegen::GRAMMAR_DIR)))?; + fn install_tests(tests: &HashMap, into: &str, mode: Mode) -> Result<()> { + let tests_dir = project_root().join(into); + if !tests_dir.is_dir() { + fs::create_dir_all(&tests_dir)?; + } + // ok is never actually read, but it needs to be specified to create a Test in existing_tests + let existing = existing_tests(&tests_dir, true)?; + for t in existing.keys().filter(|&t| !tests.contains_key(t)) { + panic!("Test is deleted: {}", t); + } + + let mut new_idx = existing.len() + 1; + for (name, test) in tests { + let path = match existing.get(name) { + Some((path, _test)) => path.clone(), + None => { + let file_name = format!("{:04}_{}.rs", new_idx, name); + new_idx += 1; + tests_dir.join(file_name) + } + }; + update(&path, &test.text, mode)?; + } + Ok(()) + } + install_tests(&tests.ok, codegen::OK_INLINE_TESTS_DIR, mode)?; + install_tests(&tests.err, codegen::ERR_INLINE_TESTS_DIR, mode) +} + +#[derive(Debug)] +struct Test { + pub name: String, + pub text: String, + pub ok: bool, +} + +#[derive(Default, Debug)] +struct Tests { + pub ok: HashMap, + pub err: HashMap, +} + +fn collect_tests(s: &str) -> Vec<(usize, Test)> { + let mut res = vec![]; + let prefix = "// "; + let comment_blocks = s + .lines() + .map(str::trim_start) + .enumerate() + .group_by(|(_idx, line)| line.starts_with(prefix)); + + 'outer: for (is_comment, block) in comment_blocks.into_iter() { + if !is_comment { + continue; + } + let mut block = block.map(|(idx, line)| (idx, &line[prefix.len()..])); + + let mut ok = true; + let (start_line, name) = loop { + match block.next() { + Some((idx, line)) if line.starts_with("test ") => { + break (idx, line["test ".len()..].to_string()); + } + Some((idx, line)) if line.starts_with("test_err ") => { + ok = false; + break (idx, line["test_err ".len()..].to_string()); + } + Some(_) => (), + None => continue 'outer, + } + }; + let text: String = + itertools::join(block.map(|(_, line)| line).chain(::std::iter::once("")), "\n"); + assert!(!text.trim().is_empty() && text.ends_with('\n')); + res.push((start_line, Test { name, text, ok })) + } + res +} + +fn tests_from_dir(dir: &Path) -> Result { + let mut res = Tests::default(); + for entry in ::walkdir::WalkDir::new(dir) { + let entry = entry.unwrap(); + if !entry.file_type().is_file() { + continue; + } + if entry.path().extension().unwrap_or_default() != "rs" { + continue; + } + process_file(&mut res, entry.path())?; + } + let grammar_rs = dir.parent().unwrap().join("grammar.rs"); + process_file(&mut res, &grammar_rs)?; + return Ok(res); + fn process_file(res: &mut Tests, path: &Path) -> Result<()> { + let text = fs::read_to_string(path)?; + + for (_, test) in collect_tests(&text) { + if test.ok { + if let Some(old_test) = res.ok.insert(test.name.clone(), test) { + Err(format!("Duplicate test: {}", old_test.name))? + } + } else { + if let Some(old_test) = res.err.insert(test.name.clone(), test) { + Err(format!("Duplicate test: {}", old_test.name))? + } + } + } + Ok(()) + } +} + +fn existing_tests(dir: &Path, ok: bool) -> Result> { + let mut res = HashMap::new(); + for file in fs::read_dir(dir)? { + let file = file?; + let path = file.path(); + if path.extension().unwrap_or_default() != "rs" { + continue; + } + let name = { + let file_name = path.file_name().unwrap().to_str().unwrap(); + file_name[5..file_name.len() - 3].to_string() + }; + let text = fs::read_to_string(&path)?; + let test = Test { name: name.clone(), text, ok }; + if let Some(old) = res.insert(name, (path, test)) { + println!("Duplicate test: {:?}", old); + } + } + Ok(res) +} diff --git a/xtask/src/codegen/gen_syntax.rs b/xtask/src/codegen/gen_syntax.rs new file mode 100644 index 0000000000..6a81c0e4df --- /dev/null +++ b/xtask/src/codegen/gen_syntax.rs @@ -0,0 +1,354 @@ +//! This module generate AST datatype used by rust-analyzer. +//! +//! Specifically, it generates the `SyntaxKind` enum and a number of newtype +//! wrappers around `SyntaxNode` which implement `ra_syntax::AstNode`. + +use std::{ + collections::BTreeMap, + fs, + io::Write, + process::{Command, Stdio}, +}; + +use proc_macro2::{Punct, Spacing}; +use quote::{format_ident, quote}; +use ron; +use serde::Deserialize; + +use crate::{ + codegen::{self, update, Mode}, + project_root, Result, +}; + +pub fn generate_syntax(mode: Mode) -> Result<()> { + let grammar = project_root().join(codegen::GRAMMAR); + let grammar: Grammar = { + let text = fs::read_to_string(grammar)?; + ron::de::from_str(&text)? + }; + + let syntax_kinds_file = project_root().join(codegen::SYNTAX_KINDS); + let syntax_kinds = generate_syntax_kinds(&grammar)?; + update(syntax_kinds_file.as_path(), &syntax_kinds, mode)?; + + let ast_file = project_root().join(codegen::AST); + let ast = generate_ast(&grammar)?; + update(ast_file.as_path(), &ast, mode)?; + + Ok(()) +} + +fn generate_ast(grammar: &Grammar) -> Result { + let nodes = grammar.ast.iter().map(|(name, ast_node)| { + let variants = + ast_node.variants.iter().map(|var| format_ident!("{}", var)).collect::>(); + let name = format_ident!("{}", name); + + let adt = if variants.is_empty() { + let kind = format_ident!("{}", to_upper_snake_case(&name.to_string())); + quote! { + #[derive(Debug, Clone, PartialEq, Eq, Hash)] + pub struct #name { + pub(crate) syntax: SyntaxNode, + } + + impl AstNode for #name { + fn can_cast(kind: SyntaxKind) -> bool { + match kind { + #kind => true, + _ => false, + } + } + fn cast(syntax: SyntaxNode) -> Option { + if Self::can_cast(syntax.kind()) { Some(Self { syntax }) } else { None } + } + fn syntax(&self) -> &SyntaxNode { &self.syntax } + } + } + } else { + let kinds = variants + .iter() + .map(|name| format_ident!("{}", to_upper_snake_case(&name.to_string()))) + .collect::>(); + + quote! { + #[derive(Debug, Clone, PartialEq, Eq, Hash)] + pub enum #name { + #(#variants(#variants),)* + } + + #( + impl From<#variants> for #name { + fn from(node: #variants) -> #name { + #name::#variants(node) + } + } + )* + + impl AstNode for #name { + fn can_cast(kind: SyntaxKind) -> bool { + match kind { + #(#kinds)|* => true, + _ => false, + } + } + fn cast(syntax: SyntaxNode) -> Option { + let res = match syntax.kind() { + #( + #kinds => #name::#variants(#variants { syntax }), + )* + _ => return None, + }; + Some(res) + } + fn syntax(&self) -> &SyntaxNode { + match self { + #( + #name::#variants(it) => &it.syntax, + )* + } + } + } + } + }; + + let traits = ast_node.traits.iter().map(|trait_name| { + let trait_name = format_ident!("{}", trait_name); + quote!(impl ast::#trait_name for #name {}) + }); + + let collections = ast_node.collections.iter().map(|(name, kind)| { + let method_name = format_ident!("{}", name); + let kind = format_ident!("{}", kind); + quote! { + pub fn #method_name(&self) -> AstChildren<#kind> { + AstChildren::new(&self.syntax) + } + } + }); + + let options = ast_node.options.iter().map(|attr| { + let method_name = match attr { + Attr::Type(t) => format_ident!("{}", to_lower_snake_case(&t)), + Attr::NameType(n, _) => format_ident!("{}", n), + }; + let ty = match attr { + Attr::Type(t) | Attr::NameType(_, t) => format_ident!("{}", t), + }; + quote! { + pub fn #method_name(&self) -> Option<#ty> { + AstChildren::new(&self.syntax).next() + } + } + }); + + quote! { + #adt + + #(#traits)* + + impl #name { + #(#collections)* + #(#options)* + } + } + }); + + let ast = quote! { + use crate::{ + SyntaxNode, SyntaxKind::{self, *}, + ast::{self, AstNode, AstChildren}, + }; + + #(#nodes)* + }; + + let pretty = reformat(ast)?; + Ok(pretty) +} + +fn generate_syntax_kinds(grammar: &Grammar) -> Result { + let (single_byte_tokens_values, single_byte_tokens): (Vec<_>, Vec<_>) = grammar + .punct + .iter() + .filter(|(token, _name)| token.len() == 1) + .map(|(token, name)| (token.chars().next().unwrap(), format_ident!("{}", name))) + .unzip(); + + let punctuation_values = grammar.punct.iter().map(|(token, _name)| { + if "{}[]()".contains(token) { + let c = token.chars().next().unwrap(); + quote! { #c } + } else { + let cs = token.chars().map(|c| Punct::new(c, Spacing::Joint)); + quote! { #(#cs)* } + } + }); + let punctuation = + grammar.punct.iter().map(|(_token, name)| format_ident!("{}", name)).collect::>(); + + let full_keywords_values = &grammar.keywords; + let full_keywords = + full_keywords_values.iter().map(|kw| format_ident!("{}_KW", to_upper_snake_case(&kw))); + + let all_keywords_values = + grammar.keywords.iter().chain(grammar.contextual_keywords.iter()).collect::>(); + let all_keywords_idents = all_keywords_values.iter().map(|kw| format_ident!("{}", kw)); + let all_keywords = all_keywords_values + .iter() + .map(|name| format_ident!("{}_KW", to_upper_snake_case(&name))) + .collect::>(); + + let literals = + grammar.literals.iter().map(|name| format_ident!("{}", name)).collect::>(); + + let tokens = grammar.tokens.iter().map(|name| format_ident!("{}", name)).collect::>(); + + let nodes = grammar.nodes.iter().map(|name| format_ident!("{}", name)).collect::>(); + + let ast = quote! { + #![allow(bad_style, missing_docs, unreachable_pub)] + /// The kind of syntax node, e.g. `IDENT`, `USE_KW`, or `STRUCT_DEF`. + #[derive(Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash, Debug)] + #[repr(u16)] + pub enum SyntaxKind { + // Technical SyntaxKinds: they appear temporally during parsing, + // but never end up in the final tree + #[doc(hidden)] + TOMBSTONE, + #[doc(hidden)] + EOF, + #(#punctuation,)* + #(#all_keywords,)* + #(#literals,)* + #(#tokens,)* + #(#nodes,)* + + // Technical kind so that we can cast from u16 safely + #[doc(hidden)] + __LAST, + } + use self::SyntaxKind::*; + + impl SyntaxKind { + pub fn is_keyword(self) -> bool { + match self { + #(#all_keywords)|* => true, + _ => false, + } + } + + pub fn is_punct(self) -> bool { + match self { + #(#punctuation)|* => true, + _ => false, + } + } + + pub fn is_literal(self) -> bool { + match self { + #(#literals)|* => true, + _ => false, + } + } + + pub fn from_keyword(ident: &str) -> Option { + let kw = match ident { + #(#full_keywords_values => #full_keywords,)* + _ => return None, + }; + Some(kw) + } + + pub fn from_char(c: char) -> Option { + let tok = match c { + #(#single_byte_tokens_values => #single_byte_tokens,)* + _ => return None, + }; + Some(tok) + } + } + + #[macro_export] + macro_rules! T { + #((#punctuation_values) => { $crate::SyntaxKind::#punctuation };)* + #((#all_keywords_idents) => { $crate::SyntaxKind::#all_keywords };)* + } + }; + + reformat(ast) +} + +fn reformat(text: impl std::fmt::Display) -> Result { + let mut rustfmt = Command::new("rustfmt") + .arg("--config-path") + .arg(project_root().join("rustfmt.toml")) + .stdin(Stdio::piped()) + .stdout(Stdio::piped()) + .spawn()?; + write!(rustfmt.stdin.take().unwrap(), "{}", text)?; + let output = rustfmt.wait_with_output()?; + let stdout = String::from_utf8(output.stdout)?; + let preamble = "Generated file, do not edit by hand, see `crate/ra_tools/src/codegen`"; + Ok(format!("//! {}\n\n{}", preamble, stdout)) +} + +#[derive(Deserialize, Debug)] +struct Grammar { + punct: Vec<(String, String)>, + keywords: Vec, + contextual_keywords: Vec, + literals: Vec, + tokens: Vec, + nodes: Vec, + ast: BTreeMap, +} + +#[derive(Deserialize, Debug)] +struct AstNode { + #[serde(default)] + #[serde(rename = "enum")] + variants: Vec, + + #[serde(default)] + traits: Vec, + #[serde(default)] + collections: Vec<(String, String)>, + #[serde(default)] + options: Vec, +} + +#[derive(Deserialize, Debug)] +#[serde(untagged)] +enum Attr { + Type(String), + NameType(String, String), +} + +fn to_upper_snake_case(s: &str) -> String { + let mut buf = String::with_capacity(s.len()); + let mut prev_is_upper = None; + for c in s.chars() { + if c.is_ascii_uppercase() && prev_is_upper == Some(false) { + buf.push('_') + } + prev_is_upper = Some(c.is_ascii_uppercase()); + + buf.push(c.to_ascii_uppercase()); + } + buf +} + +fn to_lower_snake_case(s: &str) -> String { + let mut buf = String::with_capacity(s.len()); + let mut prev_is_upper = None; + for c in s.chars() { + if c.is_ascii_uppercase() && prev_is_upper == Some(false) { + buf.push('_') + } + prev_is_upper = Some(c.is_ascii_uppercase()); + + buf.push(c.to_ascii_lowercase()); + } + buf +} diff --git a/xtask/src/lib.rs b/xtask/src/lib.rs index a8685f5677..cc69463a98 100644 --- a/xtask/src/lib.rs +++ b/xtask/src/lib.rs @@ -1,9 +1,8 @@ //! FIXME: write short doc here -mod boilerplate_gen; +pub mod codegen; use std::{ - collections::HashMap, error::Error, fs, io::{Error as IoError, ErrorKind}, @@ -11,72 +10,12 @@ use std::{ process::{Command, Output, Stdio}, }; -use itertools::Itertools; - -pub use self::boilerplate_gen::generate_boilerplate; +use crate::codegen::Mode; pub type Result = std::result::Result>; -pub const GRAMMAR: &str = "crates/ra_syntax/src/grammar.ron"; -const GRAMMAR_DIR: &str = "crates/ra_parser/src/grammar"; -const OK_INLINE_TESTS_DIR: &str = "crates/ra_syntax/test_data/parser/inline/ok"; -const ERR_INLINE_TESTS_DIR: &str = "crates/ra_syntax/test_data/parser/inline/err"; - -pub const SYNTAX_KINDS: &str = "crates/ra_parser/src/syntax_kind/generated.rs"; -pub const AST: &str = "crates/ra_syntax/src/ast/generated.rs"; const TOOLCHAIN: &str = "stable"; -#[derive(Debug, PartialEq, Eq, Clone, Copy)] -pub enum Mode { - Overwrite, - Verify, -} -pub use Mode::*; - -#[derive(Debug)] -pub struct Test { - pub name: String, - pub text: String, - pub ok: bool, -} - -pub fn collect_tests(s: &str) -> Vec<(usize, Test)> { - let mut res = vec![]; - let prefix = "// "; - let comment_blocks = s - .lines() - .map(str::trim_start) - .enumerate() - .group_by(|(_idx, line)| line.starts_with(prefix)); - - 'outer: for (is_comment, block) in comment_blocks.into_iter() { - if !is_comment { - continue; - } - let mut block = block.map(|(idx, line)| (idx, &line[prefix.len()..])); - - let mut ok = true; - let (start_line, name) = loop { - match block.next() { - Some((idx, line)) if line.starts_with("test ") => { - break (idx, line["test ".len()..].to_string()); - } - Some((idx, line)) if line.starts_with("test_err ") => { - ok = false; - break (idx, line["test_err ".len()..].to_string()); - } - Some(_) => (), - None => continue 'outer, - } - }; - let text: String = - itertools::join(block.map(|(_, line)| line).chain(::std::iter::once("")), "\n"); - assert!(!text.trim().is_empty() && text.ends_with('\n')); - res.push((start_line, Test { name, text, ok })) - } - res -} - pub fn project_root() -> PathBuf { Path::new(&env!("CARGO_MANIFEST_DIR")).ancestors().nth(1).unwrap().to_path_buf() } @@ -126,7 +65,7 @@ pub fn run_rustfmt(mode: Mode) -> Result<()> { _ => install_rustfmt()?, }; - if mode == Verify { + if mode == Mode::Verify { run(&format!("rustup run {} -- cargo fmt -- --check", TOOLCHAIN), ".")?; } else { run(&format!("rustup run {} -- cargo fmt", TOOLCHAIN), ".")?; @@ -206,37 +145,6 @@ pub fn run_fuzzer() -> Result<()> { run("rustup run nightly -- cargo fuzz run parser", "./crates/ra_syntax") } -pub fn gen_tests(mode: Mode) -> Result<()> { - let tests = tests_from_dir(&project_root().join(Path::new(GRAMMAR_DIR)))?; - fn install_tests(tests: &HashMap, into: &str, mode: Mode) -> Result<()> { - let tests_dir = project_root().join(into); - if !tests_dir.is_dir() { - fs::create_dir_all(&tests_dir)?; - } - // ok is never actually read, but it needs to be specified to create a Test in existing_tests - let existing = existing_tests(&tests_dir, true)?; - for t in existing.keys().filter(|&t| !tests.contains_key(t)) { - panic!("Test is deleted: {}", t); - } - - let mut new_idx = existing.len() + 1; - for (name, test) in tests { - let path = match existing.get(name) { - Some((path, _test)) => path.clone(), - None => { - let file_name = format!("{:04}_{}.rs", new_idx, name); - new_idx += 1; - tests_dir.join(file_name) - } - }; - update(&path, &test.text, mode)?; - } - Ok(()) - } - install_tests(&tests.ok, OK_INLINE_TESTS_DIR, mode)?; - install_tests(&tests.err, ERR_INLINE_TESTS_DIR, mode) -} - fn do_run(cmdline: &str, dir: &str, mut f: F) -> Result where F: FnMut(&mut Command), @@ -253,80 +161,3 @@ where } Ok(output) } - -#[derive(Default, Debug)] -struct Tests { - pub ok: HashMap, - pub err: HashMap, -} - -fn tests_from_dir(dir: &Path) -> Result { - let mut res = Tests::default(); - for entry in ::walkdir::WalkDir::new(dir) { - let entry = entry.unwrap(); - if !entry.file_type().is_file() { - continue; - } - if entry.path().extension().unwrap_or_default() != "rs" { - continue; - } - process_file(&mut res, entry.path())?; - } - let grammar_rs = dir.parent().unwrap().join("grammar.rs"); - process_file(&mut res, &grammar_rs)?; - return Ok(res); - fn process_file(res: &mut Tests, path: &Path) -> Result<()> { - let text = fs::read_to_string(path)?; - - for (_, test) in collect_tests(&text) { - if test.ok { - if let Some(old_test) = res.ok.insert(test.name.clone(), test) { - Err(format!("Duplicate test: {}", old_test.name))? - } - } else { - if let Some(old_test) = res.err.insert(test.name.clone(), test) { - Err(format!("Duplicate test: {}", old_test.name))? - } - } - } - Ok(()) - } -} - -fn existing_tests(dir: &Path, ok: bool) -> Result> { - let mut res = HashMap::new(); - for file in fs::read_dir(dir)? { - let file = file?; - let path = file.path(); - if path.extension().unwrap_or_default() != "rs" { - continue; - } - let name = { - let file_name = path.file_name().unwrap().to_str().unwrap(); - file_name[5..file_name.len() - 3].to_string() - }; - let text = fs::read_to_string(&path)?; - let test = Test { name: name.clone(), text, ok }; - if let Some(old) = res.insert(name, (path, test)) { - println!("Duplicate test: {:?}", old); - } - } - Ok(res) -} - -/// A helper to update file on disk if it has changed. -/// With verify = false, -pub fn update(path: &Path, contents: &str, mode: Mode) -> Result<()> { - match fs::read_to_string(path) { - Ok(ref old_contents) if old_contents == contents => { - return Ok(()); - } - _ => (), - } - if mode == Verify { - Err(format!("`{}` is not up-to-date", path.display()))?; - } - eprintln!("updating {}", path.display()); - fs::write(path, contents)?; - Ok(()) -} diff --git a/xtask/src/main.rs b/xtask/src/main.rs index c08915aac0..0b19c34f43 100644 --- a/xtask/src/main.rs +++ b/xtask/src/main.rs @@ -7,8 +7,8 @@ use core::str; use pico_args::Arguments; use std::{env, path::PathBuf}; use xtask::{ - gen_tests, generate_boilerplate, install_format_hook, run, run_clippy, run_fuzzer, run_rustfmt, - run_with_output, Cmd, Overwrite, Result, + codegen::{self, Mode}, + install_format_hook, run, run_clippy, run_fuzzer, run_rustfmt, run_with_output, Cmd, Result, }; // Latest stable, feel free to send a PR if this lags behind. @@ -62,21 +62,21 @@ fn main() -> Result<()> { help::print_no_param_subcommand_help(&subcommand); return Ok(()); } - gen_tests(Overwrite)? + codegen::generate_parser_tests(Mode::Overwrite)? } "codegen" => { if matches.contains(["-h", "--help"]) { help::print_no_param_subcommand_help(&subcommand); return Ok(()); } - generate_boilerplate(Overwrite)? + codegen::generate_syntax(Mode::Overwrite)? } "format" => { if matches.contains(["-h", "--help"]) { help::print_no_param_subcommand_help(&subcommand); return Ok(()); } - run_rustfmt(Overwrite)? + run_rustfmt(Mode::Overwrite)? } "format-hook" => { if matches.contains(["-h", "--help"]) { diff --git a/xtask/tests/tidy-tests/cli.rs b/xtask/tests/tidy-tests/cli.rs index 5d8ddea836..304d77d891 100644 --- a/xtask/tests/tidy-tests/cli.rs +++ b/xtask/tests/tidy-tests/cli.rs @@ -1,23 +1,26 @@ use walkdir::WalkDir; -use xtask::{gen_tests, generate_boilerplate, project_root, run_rustfmt, Verify}; +use xtask::{ + codegen::{self, Mode}, + project_root, run_rustfmt, +}; #[test] fn generated_grammar_is_fresh() { - if let Err(error) = generate_boilerplate(Verify) { + if let Err(error) = codegen::generate_syntax(Mode::Verify) { panic!("{}. Please update it by running `cargo xtask codegen`", error); } } #[test] fn generated_tests_are_fresh() { - if let Err(error) = gen_tests(Verify) { + if let Err(error) = codegen::generate_parser_tests(Mode::Verify) { panic!("{}. Please update tests by running `cargo xtask gen-tests`", error); } } #[test] fn check_code_formatting() { - if let Err(error) = run_rustfmt(Verify) { + if let Err(error) = run_rustfmt(Mode::Verify) { panic!("{}. Please format the code by running `cargo format`", error); } }