Cleanup tools

This commit is contained in:
Aleksey Kladov 2018-07-30 14:06:22 +03:00
parent 9a4957d143
commit 6983091d6d
9 changed files with 290 additions and 300 deletions

View file

@ -1,4 +1,4 @@
[alias]
parse = "run --package tools --bin parse"
gen = "run --package tools --bin gen"
collect-tests = "run --package tools --bin collect-tests --"
gen-kinds = "run --package tools -- gen-kinds"
gen-tests = "run --package tools -- gen-tests"

View file

@ -8,6 +8,8 @@ matrix:
script:
- cargo fmt --all -- --write-mode=diff
- cargo test
- cargo gen-kinds --verify
- cargo gen-tests --verify
- rust: nightly
before_script:
- rustup component add clippy-preview

View file

@ -2,8 +2,9 @@ extern crate libsyntax2;
use std::io::Read;
use libsyntax2::{parse};
use libsyntax2::utils::dump_tree_green;
use libsyntax2::{
parse, utils::dump_tree_green
};
fn main() {
let text = read_input();

View file

@ -1,6 +1,5 @@
#![allow(bad_style, missing_docs, unreachable_pub)]
#![cfg_attr(rustfmt, rustfmt_skip)]
//! Generated from grammar.ron
use super::SyntaxInfo;
/// The kind of syntax node, e.g. `IDENT`, `USE_KW`, or `STRUCT_DEF`.
@ -138,7 +137,6 @@ pub enum SyntaxKind {
VALUE_PARAMETER,
BLOCK,
LET_STMT,
// Technical SyntaxKinds: they appear temporally during parsing,
// but never end up in the final tree
#[doc(hidden)]
@ -146,7 +144,7 @@ pub enum SyntaxKind {
#[doc(hidden)]
EOF,
}
pub(crate) use self::SyntaxKind::*;
use self::SyntaxKind::*;
impl SyntaxKind {
pub(crate) fn info(self) -> &'static SyntaxInfo {
@ -289,38 +287,39 @@ impl SyntaxKind {
}
}
pub(crate) fn from_keyword(ident: &str) -> Option<SyntaxKind> {
match ident {
"use" => Some(USE_KW),
"fn" => Some(FN_KW),
"struct" => Some(STRUCT_KW),
"enum" => Some(ENUM_KW),
"trait" => Some(TRAIT_KW),
"impl" => Some(IMPL_KW),
"true" => Some(TRUE_KW),
"false" => Some(FALSE_KW),
"as" => Some(AS_KW),
"extern" => Some(EXTERN_KW),
"crate" => Some(CRATE_KW),
"mod" => Some(MOD_KW),
"pub" => Some(PUB_KW),
"self" => Some(SELF_KW),
"super" => Some(SUPER_KW),
"in" => Some(IN_KW),
"where" => Some(WHERE_KW),
"for" => Some(FOR_KW),
"loop" => Some(LOOP_KW),
"while" => Some(WHILE_KW),
"if" => Some(IF_KW),
"match" => Some(MATCH_KW),
"const" => Some(CONST_KW),
"static" => Some(STATIC_KW),
"mut" => Some(MUT_KW),
"unsafe" => Some(UNSAFE_KW),
"type" => Some(TYPE_KW),
"ref" => Some(REF_KW),
"let" => Some(LET_KW),
_ => None,
}
let kw = match ident {
"use" => USE_KW,
"fn" => FN_KW,
"struct" => STRUCT_KW,
"enum" => ENUM_KW,
"trait" => TRAIT_KW,
"impl" => IMPL_KW,
"true" => TRUE_KW,
"false" => FALSE_KW,
"as" => AS_KW,
"extern" => EXTERN_KW,
"crate" => CRATE_KW,
"mod" => MOD_KW,
"pub" => PUB_KW,
"self" => SELF_KW,
"super" => SUPER_KW,
"in" => IN_KW,
"where" => WHERE_KW,
"for" => FOR_KW,
"loop" => LOOP_KW,
"while" => WHILE_KW,
"if" => IF_KW,
"match" => MATCH_KW,
"const" => CONST_KW,
"static" => STATIC_KW,
"mut" => MUT_KW,
"unsafe" => UNSAFE_KW,
"type" => TYPE_KW,
"ref" => REF_KW,
"let" => LET_KW,
_ => return None,
};
Some(kw)
}
}

View file

@ -0,0 +1,59 @@
#![allow(bad_style, missing_docs, unreachable_pub)]
#![cfg_attr(rustfmt, rustfmt_skip)]
use super::SyntaxInfo;
/// The kind of syntax node, e.g. `IDENT`, `USE_KW`, or `STRUCT_DEF`.
#[derive(Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)]
pub enum SyntaxKind {
{%- for t in tokens %}
{{t}},
{%- endfor -%}
{% for kw in keywords %}
{{kw | upper}}_KW,
{%- endfor -%}
{% for kw in contextual_keywords %}
{{kw | upper}}_KW,
{%- endfor -%}
{% for node in nodes %}
{{node}},
{%- endfor %}
// Technical SyntaxKinds: they appear temporally during parsing,
// but never end up in the final tree
#[doc(hidden)]
TOMBSTONE,
#[doc(hidden)]
EOF,
}
use self::SyntaxKind::*;
impl SyntaxKind {
pub(crate) fn info(self) -> &'static SyntaxInfo {
match self {
{%- for t in tokens %}
{{t}} => &SyntaxInfo { name: "{{t}}" },
{%- endfor -%}
{% for kw in keywords %}
{{kw | upper}}_KW => &SyntaxInfo { name: "{{kw | upper}}_KW" },
{%- endfor -%}
{% for kw in contextual_keywords %}
{{kw | upper}}_KW => &SyntaxInfo { name: "{{kw | upper}}_KW" },
{%- endfor -%}
{% for node in nodes %}
{{node}} => &SyntaxInfo { name: "{{node}}" },
{%- endfor %}
TOMBSTONE => &SyntaxInfo { name: "TOMBSTONE" },
EOF => &SyntaxInfo { name: "EOF" },
}
}
pub(crate) fn from_keyword(ident: &str) -> Option<SyntaxKind> {
let kw = match ident {
{%- for kw in keywords %}
"{{kw}}" => {{kw | upper}}_KW,
{%- endfor %}
_ => return None,
};
Some(kw)
}
}

View file

@ -5,10 +5,9 @@ authors = ["Aleksey Kladov <aleksey.kladov@gmail.com>"]
publish = false
[dependencies]
serde = "1.0.26"
serde_derive = "1.0.26"
file = "1.1.1"
ron = "0.1.5"
walkdir = "2"
itertools = "0.7"
libsyntax2 = { path = "../" }
ron = "0.1.7"
walkdir = "2.1.3"
itertools = "0.7.8"
tera = "0.11"
clap = "2.32.0"
failure = "0.1.1"

View file

@ -1,133 +0,0 @@
extern crate file;
extern crate itertools;
extern crate walkdir;
use walkdir::WalkDir;
use itertools::Itertools;
use std::path::{Path, PathBuf};
use std::collections::HashSet;
use std::fs;
fn main() {
let verify = ::std::env::args().any(|arg| arg == "--verify");
let d = grammar_dir();
let tests = tests_from_dir(&d);
let existing = existing_tests();
for t in existing.difference(&tests) {
panic!("Test is deleted: {}\n{}", t.name, t.text);
}
let new_tests = tests.difference(&existing);
for (i, t) in new_tests.enumerate() {
if verify {
panic!("Inline test is not recorded: {}", t.name);
}
let name = format!("{:04}_{}.rs", existing.len() + i + 1, t.name);
println!("Creating {}", name);
let path = inline_tests_dir().join(name);
file::put_text(&path, &t.text).unwrap();
}
}
#[derive(Debug, Eq)]
struct Test {
name: String,
text: String,
}
impl PartialEq for Test {
fn eq(&self, other: &Test) -> bool {
self.name.eq(&other.name)
}
}
impl ::std::hash::Hash for Test {
fn hash<H: ::std::hash::Hasher>(&self, state: &mut H) {
self.name.hash(state)
}
}
fn tests_from_dir(dir: &Path) -> HashSet<Test> {
let mut res = HashSet::new();
for entry in WalkDir::new(dir) {
let entry = entry.unwrap();
if !entry.file_type().is_file() {
continue;
}
if entry.path().extension().unwrap_or_default() != "rs" {
continue;
}
let text = file::get_text(entry.path()).unwrap();
for test in collect_tests(&text) {
if let Some(old_test) = res.replace(test) {
panic!("Duplicate test: {}", old_test.name)
}
}
}
res
}
fn collect_tests(s: &str) -> Vec<Test> {
let mut res = vec![];
let prefix = "// ";
let comment_blocks = s.lines()
.map(str::trim_left)
.group_by(|line| line.starts_with(prefix));
'outer: for (is_comment, block) in comment_blocks.into_iter() {
if !is_comment {
continue;
}
let mut block = block.map(|line| &line[prefix.len()..]);
let name = loop {
match block.next() {
Some(line) if line.starts_with("test ") => break line["test ".len()..].to_string(),
Some(_) => (),
None => continue 'outer,
}
};
let text: String = itertools::join(block.chain(::std::iter::once("")), "\n");
assert!(!text.trim().is_empty() && text.ends_with("\n"));
res.push(Test { name, text })
}
res
}
fn existing_tests() -> HashSet<Test> {
let mut res = HashSet::new();
for file in fs::read_dir(&inline_tests_dir()).unwrap() {
let file = file.unwrap();
let path = file.path();
if path.extension().unwrap_or_default() != "rs" {
continue;
}
let name = path.file_name().unwrap().to_str().unwrap();
let name = name["0000_".len()..name.len() - 3].to_string();
let text = file::get_text(&path).unwrap();
res.insert(Test { name, text });
}
res
}
fn inline_tests_dir() -> PathBuf {
let res = base_dir().join("tests/data/parser/inline");
if !res.is_dir() {
fs::create_dir_all(&res).unwrap();
}
res
}
fn grammar_dir() -> PathBuf {
base_dir().join("src/parser/grammar")
}
fn base_dir() -> PathBuf {
let dir = env!("CARGO_MANIFEST_DIR");
PathBuf::from(dir).parent().unwrap().to_owned()
}

View file

@ -1,121 +0,0 @@
extern crate serde;
#[macro_use]
extern crate serde_derive;
extern crate file;
extern crate ron;
use std::path::PathBuf;
use std::fmt::Write;
fn main() {
let grammar = Grammar::read();
let text = grammar.to_syntax_kinds();
let target = generated_file();
if text != file::get_text(&target).unwrap_or_default() {
file::put_text(&target, &text).unwrap();
}
}
#[derive(Deserialize)]
struct Grammar {
keywords: Vec<String>,
contextual_keywords: Vec<String>,
tokens: Vec<String>,
nodes: Vec<String>,
}
impl Grammar {
fn read() -> Grammar {
let text = file::get_text(&grammar_file()).unwrap();
ron::de::from_str(&text).unwrap()
}
fn to_syntax_kinds(&self) -> String {
let mut acc = String::new();
acc.push_str("#![allow(bad_style, missing_docs, unreachable_pub)]\n");
acc.push_str("#![cfg_attr(rustfmt, rustfmt_skip)]\n");
acc.push_str("//! Generated from grammar.ron\n");
acc.push_str("use super::SyntaxInfo;\n");
acc.push_str("\n");
let syntax_kinds: Vec<String> = self.tokens
.iter()
.cloned()
.chain(self.keywords.iter().map(|kw| kw_token(kw)))
.chain(self.contextual_keywords.iter().map(|kw| kw_token(kw)))
.chain(self.nodes.iter().cloned())
.collect();
// enum SyntaxKind
acc.push_str("/// The kind of syntax node, e.g. `IDENT`, `USE_KW`, or `STRUCT_DEF`.\n");
acc.push_str("#[derive(Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)]\n");
acc.push_str("pub enum SyntaxKind {\n");
for kind in syntax_kinds.iter() {
write!(acc, " {},\n", scream(kind)).unwrap();
}
acc.push_str("\n");
acc.push_str(" // Technical SyntaxKinds: they appear temporally during parsing,\n");
acc.push_str(" // but never end up in the final tree\n");
acc.push_str(" #[doc(hidden)]\n");
acc.push_str(" TOMBSTONE,\n");
acc.push_str(" #[doc(hidden)]\n");
acc.push_str(" EOF,\n");
acc.push_str("}\n");
acc.push_str("pub(crate) use self::SyntaxKind::*;\n");
acc.push_str("\n");
// fn info
acc.push_str("impl SyntaxKind {\n");
acc.push_str(" pub(crate) fn info(self) -> &'static SyntaxInfo {\n");
acc.push_str(" match self {\n");
for kind in syntax_kinds.iter() {
let sname = scream(kind);
write!(
acc,
" {sname} => &SyntaxInfo {{ name: \"{sname}\" }},\n",
sname = sname
).unwrap();
}
acc.push_str("\n");
acc.push_str(" TOMBSTONE => &SyntaxInfo { name: \"TOMBSTONE\" },\n");
acc.push_str(" EOF => &SyntaxInfo { name: \"EOF\" },\n");
acc.push_str(" }\n");
acc.push_str(" }\n");
// fn from_keyword
acc.push_str(" pub(crate) fn from_keyword(ident: &str) -> Option<SyntaxKind> {\n");
acc.push_str(" match ident {\n");
// NB: no contextual_keywords here!
for kw in self.keywords.iter() {
write!(acc, " {:?} => Some({}),\n", kw, kw_token(kw)).unwrap();
}
acc.push_str(" _ => None,\n");
acc.push_str(" }\n");
acc.push_str(" }\n");
acc.push_str("}\n");
acc.push_str("\n");
acc
}
}
fn grammar_file() -> PathBuf {
base_dir().join("src/grammar.ron")
}
fn generated_file() -> PathBuf {
base_dir().join("src/syntax_kinds/generated.rs")
}
fn scream(word: &str) -> String {
word.chars().map(|c| c.to_ascii_uppercase()).collect()
}
fn kw_token(keyword: &str) -> String {
format!("{}_KW", scream(keyword))
}
fn base_dir() -> PathBuf {
let dir = env!("CARGO_MANIFEST_DIR");
PathBuf::from(dir).parent().unwrap().to_owned()
}

184
tools/src/bin/main.rs Normal file
View file

@ -0,0 +1,184 @@
extern crate clap;
#[macro_use]
extern crate failure;
extern crate tera;
extern crate ron;
extern crate walkdir;
extern crate itertools;
use std::{
fs,
path::{Path},
collections::HashSet,
};
use clap::{App, Arg, SubCommand};
use itertools::Itertools;
type Result<T> = ::std::result::Result<T, failure::Error>;
const GRAMMAR_DIR: &str = "./src/parser/grammar";
const INLINE_TESTS_DIR: &str = "tests/data/parser/inline";
const GRAMMAR: &str = "./src/grammar.ron";
const SYNTAX_KINDS: &str = "./src/syntax_kinds/generated.rs";
const SYNTAX_KINDS_TEMPLATE: &str = "./src/syntax_kinds/generated.rs.tera";
fn main() -> Result<()> {
let matches = App::new("tasks")
.setting(clap::AppSettings::SubcommandRequiredElseHelp)
.arg(
Arg::with_name("verify")
.long("--verify")
.help("Verify that generated code is up-to-date")
.global(true)
)
.subcommand(SubCommand::with_name("gen-kinds"))
.subcommand(SubCommand::with_name("gen-tests"))
.get_matches();
match matches.subcommand() {
(name, Some(matches)) => run_gen_command(name, matches.is_present("verify"))?,
_ => unreachable!(),
}
Ok(())
}
fn run_gen_command(name: &str, verify: bool) -> Result<()> {
match name {
"gen-kinds" => update(Path::new(SYNTAX_KINDS), &get_kinds()?, verify),
"gen-tests" => gen_tests(verify),
_ => unreachable!(),
}
}
fn update(path: &Path, contents: &str, verify: bool) -> Result<()> {
match fs::read_to_string(path) {
Ok(ref old_contents) if old_contents == contents => {
return Ok(());
}
_ => (),
}
if verify {
bail!("`{}` is not up-to-date", path.display());
}
fs::write(path, contents)?;
Ok(())
}
fn get_kinds() -> Result<String> {
let grammar = grammar()?;
let template = fs::read_to_string(SYNTAX_KINDS_TEMPLATE)?;
let ret = tera::Tera::one_off(&template, &grammar, false).map_err(|e| {
format_err!("template error: {}", e)
})?;
Ok(ret)
}
fn grammar() -> Result<ron::value::Value> {
let text = fs::read_to_string(GRAMMAR)?;
let ret = ron::de::from_str(&text)?;
Ok(ret)
}
fn gen_tests(verify: bool) -> Result<()> {
let tests = tests_from_dir(Path::new(GRAMMAR_DIR))?;
let inline_tests_dir = Path::new(INLINE_TESTS_DIR);
if !inline_tests_dir.is_dir() {
fs::create_dir_all(inline_tests_dir)?;
}
let existing = existing_tests(inline_tests_dir)?;
for t in existing.difference(&tests) {
panic!("Test is deleted: {}\n{}", t.name, t.text);
}
let new_tests = tests.difference(&existing);
for (i, t) in new_tests.enumerate() {
let name = format!("{:04}_{}.rs", existing.len() + i + 1, t.name);
let path = inline_tests_dir.join(name);
update(&path, &t.text, verify)?;
}
Ok(())
}
#[derive(Debug, Eq)]
struct Test {
name: String,
text: String,
}
impl PartialEq for Test {
fn eq(&self, other: &Test) -> bool {
self.name.eq(&other.name)
}
}
impl ::std::hash::Hash for Test {
fn hash<H: ::std::hash::Hasher>(&self, state: &mut H) {
self.name.hash(state)
}
}
fn tests_from_dir(dir: &Path) -> Result<HashSet<Test>> {
let mut res = HashSet::new();
for entry in ::walkdir::WalkDir::new(dir) {
let entry = entry.unwrap();
if !entry.file_type().is_file() {
continue;
}
if entry.path().extension().unwrap_or_default() != "rs" {
continue;
}
let text = fs::read_to_string(entry.path())?;
for test in collect_tests(&text) {
if let Some(old_test) = res.replace(test) {
bail!("Duplicate test: {}", old_test.name)
}
}
}
Ok(res)
}
fn collect_tests(s: &str) -> Vec<Test> {
let mut res = vec![];
let prefix = "// ";
let comment_blocks = s.lines()
.map(str::trim_left)
.group_by(|line| line.starts_with(prefix));
'outer: for (is_comment, block) in comment_blocks.into_iter() {
if !is_comment {
continue;
}
let mut block = block.map(|line| &line[prefix.len()..]);
let name = loop {
match block.next() {
Some(line) if line.starts_with("test ") => break line["test ".len()..].to_string(),
Some(_) => (),
None => continue 'outer,
}
};
let text: String = itertools::join(block.chain(::std::iter::once("")), "\n");
assert!(!text.trim().is_empty() && text.ends_with("\n"));
res.push(Test { name, text })
}
res
}
fn existing_tests(dir: &Path) -> Result<HashSet<Test>> {
let mut res = HashSet::new();
for file in fs::read_dir(dir)? {
let file = file?;
let path = file.path();
if path.extension().unwrap_or_default() != "rs" {
continue;
}
let name = path.file_name().unwrap().to_str().unwrap();
let name = name["0000_".len()..name.len() - 3].to_string();
let text = fs::read_to_string(&path)?;
res.insert(Test { name, text });
}
Ok(res)
}