5572: Switch to ungrammar from ast_src r=matklad a=matklad

The primary advantage of ungrammar is that it (eventually) allows one
to describe concrete syntax tree structure -- with alternatives and
specific sequence of tokens & nodes.

That should be re-usable for:

* generate `make` calls
* Rust reference
* Hypothetical parser's evented API

We loose doc comments for the time being unfortunately. I don't think
we should add support for doc comments to ungrammar -- they'll make
grammar file hard to read. We might supply docs as out-of band info,
or maybe just via a reference, but we'll think about that once things
are no longer in flux



bors r+
🤖

Co-authored-by: Aleksey Kladov <aleksey.kladov@gmail.com>
This commit is contained in:
bors[bot] 2020-07-29 17:18:53 +00:00 committed by GitHub
commit 2dfda0b984
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
9 changed files with 2393 additions and 4994 deletions

7
Cargo.lock generated
View file

@ -1749,6 +1749,12 @@ dependencies = [
"tracing-subscriber",
]
[[package]]
name = "ungrammar"
version = "0.1.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "0ee12e4891ab3acc2d95d5023022ace22020247bb8a8d1ece875a443f7dab37d"
[[package]]
name = "unicode-bidi"
version = "0.3.4"
@ -1893,5 +1899,6 @@ dependencies = [
"pico-args",
"proc-macro2",
"quote",
"ungrammar",
"walkdir",
]

View file

@ -1,7 +1,7 @@
//! HIR for references to types. Paths in these are not yet resolved. They can
//! be directly created from an ast::TypeRef, without further queries.
use ra_syntax::ast::{self, TypeAscriptionOwner, TypeBoundsOwner};
use ra_syntax::ast::{self, TypeAscriptionOwner};
use crate::{body::LowerCtx, path::Path};

View file

@ -7,6 +7,8 @@ use crate::{
SyntaxToken, T,
};
impl ast::AttrsOwner for ast::Expr {}
impl ast::Expr {
pub fn is_block_like(&self) -> bool {
match self {

File diff suppressed because it is too large Load diff

View file

@ -472,3 +472,19 @@ impl ast::TokenTree {
.filter(|it| matches!(it.kind(), T!['}'] | T![')'] | T![']']))
}
}
impl ast::DocCommentsOwner for ast::SourceFile {}
impl ast::DocCommentsOwner for ast::FnDef {}
impl ast::DocCommentsOwner for ast::StructDef {}
impl ast::DocCommentsOwner for ast::UnionDef {}
impl ast::DocCommentsOwner for ast::RecordFieldDef {}
impl ast::DocCommentsOwner for ast::TupleFieldDef {}
impl ast::DocCommentsOwner for ast::EnumDef {}
impl ast::DocCommentsOwner for ast::EnumVariant {}
impl ast::DocCommentsOwner for ast::TraitDef {}
impl ast::DocCommentsOwner for ast::Module {}
impl ast::DocCommentsOwner for ast::StaticDef {}
impl ast::DocCommentsOwner for ast::ConstDef {}
impl ast::DocCommentsOwner for ast::TypeAliasDef {}
impl ast::DocCommentsOwner for ast::ImplDef {}
impl ast::DocCommentsOwner for ast::MacroCall {}

View file

@ -10,9 +10,10 @@ license = "MIT OR Apache-2.0"
doctest = false
[dependencies]
walkdir = "2.3.1"
pico-args = "0.3.1"
quote = "1.0.2"
proc-macro2 = "1.0.8"
anyhow = "1.0.26"
flate2 = "1.0"
pico-args = "0.3.1"
proc-macro2 = "1.0.8"
quote = "1.0.2"
ungrammar = "0.1.0"
walkdir = "2.3.1"

File diff suppressed because it is too large Load diff

View file

@ -3,19 +3,27 @@
//! Specifically, it generates the `SyntaxKind` enum and a number of newtype
//! wrappers around `SyntaxNode` which implement `ra_syntax::AstNode`.
use std::{collections::HashSet, fmt::Write};
use std::{
collections::{BTreeSet, HashSet},
fmt::Write,
};
use proc_macro2::{Punct, Spacing};
use quote::{format_ident, quote};
use ungrammar::{Grammar, Rule};
use crate::{
ast_src::{rust_ast, AstSrc, Field, FieldSrc, KindsSrc, KINDS_SRC},
ast_src::{AstEnumSrc, AstNodeSrc, AstSrc, Field, FieldSrc, KindsSrc, KINDS_SRC},
codegen::{self, update, Mode},
project_root, Result,
};
pub fn generate_syntax(mode: Mode) -> Result<()> {
let ast = rust_ast();
let grammar = include_str!("rust.ungram")
.parse::<Grammar>()
.unwrap_or_else(|err| panic!("\n \x1b[91merror\x1b[0m: {}\n", err));
let ast = lower(&grammar);
let syntax_kinds_file = project_root().join(codegen::SYNTAX_KINDS);
let syntax_kinds = generate_syntax_kinds(KINDS_SRC)?;
update(syntax_kinds_file.as_path(), &syntax_kinds, mode)?;
@ -215,7 +223,9 @@ fn generate_nodes(kinds: KindsSrc<'_>, grammar: &AstSrc) -> Result<String> {
.map(|kind| to_pascal_case(kind))
.filter(|name| !defined_nodes.iter().any(|&it| it == name))
{
eprintln!("Warning: node {} not defined in ast source", node);
drop(node)
// TODO: restore this
// eprintln!("Warning: node {} not defined in ast source", node);
}
let ast = quote! {
@ -414,6 +424,10 @@ fn to_pascal_case(s: &str) -> String {
buf
}
fn pluralize(s: &str) -> String {
format!("{}s", s)
}
impl Field {
fn is_many(&self) -> bool {
matches!(self, Field::Node { src: FieldSrc::Many(_), .. })
@ -449,6 +463,7 @@ impl Field {
"." => "dot",
".." => "dotdot",
"..." => "dotdotdot",
"..=" => "dotdoteq",
"=>" => "fat_arrow",
"@" => "at",
":" => "colon",
@ -475,3 +490,204 @@ impl Field {
}
}
}
fn lower(grammar: &Grammar) -> AstSrc {
let mut res = AstSrc::default();
res.tokens = vec!["Whitespace".into(), "Comment".into(), "String".into(), "RawString".into()];
let nodes = grammar
.iter()
.filter(|&node| match grammar[node].rule {
Rule::Node(it) if it == node => false,
_ => true,
})
.collect::<Vec<_>>();
for &node in &nodes {
let name = grammar[node].name.clone();
let rule = &grammar[node].rule;
match lower_enum(grammar, rule) {
Some(variants) => {
let enum_src = AstEnumSrc { doc: Vec::new(), name, traits: Vec::new(), variants };
res.enums.push(enum_src);
}
None => {
let mut fields = Vec::new();
lower_rule(&mut fields, grammar, rule);
res.nodes.push(AstNodeSrc { doc: Vec::new(), name, traits: Vec::new(), fields });
}
}
}
deduplicate_fields(&mut res);
extract_enums(&mut res);
extract_struct_traits(&mut res);
extract_enum_traits(&mut res);
res
}
fn lower_enum(grammar: &Grammar, rule: &Rule) -> Option<Vec<String>> {
let alternatives = match rule {
Rule::Alt(it) => it,
_ => return None,
};
let mut variants = Vec::new();
for alternative in alternatives {
match alternative {
Rule::Node(it) => variants.push(grammar[*it].name.clone()),
_ => return None,
}
}
Some(variants)
}
fn lower_rule(acc: &mut Vec<Field>, grammar: &Grammar, rule: &Rule) {
match rule {
Rule::Node(node) => {
let field = Field::Node { name: grammar[*node].name.clone(), src: FieldSrc::Shorthand };
acc.push(field);
}
Rule::Token(token) => {
let mut name = grammar[*token].name.clone();
if name != "int_number" && name != "string" {
if "[]{}()".contains(&name) {
name = format!("'{}'", name);
}
let field = Field::Token(name);
acc.push(field);
}
}
Rule::Rep(inner) => {
if let Rule::Node(node) = &**inner {
let name = grammar[*node].name.clone();
let label = pluralize(&to_lower_snake_case(&name));
let field = Field::Node { name: label.clone(), src: FieldSrc::Many(name) };
acc.push(field);
return;
}
todo!("{:?}", rule)
}
Rule::Labeled { label, rule } => {
let node = match &**rule {
Rule::Rep(inner) | Rule::Opt(inner) => match &**inner {
Rule::Node(node) => node,
_ => todo!("{:?}", rule),
},
Rule::Node(node) => node,
_ => todo!("{:?}", rule),
};
let field = Field::Node {
name: label.clone(),
src: match &**rule {
Rule::Rep(_) => FieldSrc::Many(grammar[*node].name.clone()),
_ => FieldSrc::Optional(grammar[*node].name.clone()),
},
};
acc.push(field);
}
Rule::Seq(rules) | Rule::Alt(rules) => {
for rule in rules {
lower_rule(acc, grammar, rule)
}
}
Rule::Opt(rule) => lower_rule(acc, grammar, rule),
}
}
fn deduplicate_fields(ast: &mut AstSrc) {
eprintln!();
for node in &mut ast.nodes {
let mut i = 0;
'outer: while i < node.fields.len() {
for j in 0..i {
let f1 = &node.fields[i];
let f2 = &node.fields[j];
if f1 == f2 {
node.fields.remove(i);
continue 'outer;
}
}
i += 1;
}
}
}
fn extract_enums(ast: &mut AstSrc) {
for node in &mut ast.nodes {
for enm in &ast.enums {
let mut to_remove = Vec::new();
for (i, field) in node.fields.iter().enumerate() {
let ty = field.ty().to_string();
if enm.variants.iter().any(|it| it == &ty) {
to_remove.push(i);
}
}
if to_remove.len() == enm.variants.len() {
node.remove_field(to_remove);
node.fields.push(Field::Node { name: enm.name.clone(), src: FieldSrc::Shorthand });
}
}
}
}
fn extract_struct_traits(ast: &mut AstSrc) {
let traits: &[(&str, &[&str])] = &[
("AttrsOwner", &["attrs"]),
("NameOwner", &["name"]),
("VisibilityOwner", &["visibility"]),
("TypeParamsOwner", &["type_param_list", "where_clause"]),
("TypeBoundsOwner", &["type_bound_list", "colon_token"]),
("ModuleItemOwner", &["items"]),
("TypeAscriptionOwner", &["ascribed_type"]),
("LoopBodyOwner", &["label", "loop_body"]),
("ArgListOwner", &["arg_list"]),
];
for node in &mut ast.nodes {
for (name, methods) in traits {
extract_struct_trait(node, name, methods);
}
}
}
fn extract_struct_trait(node: &mut AstNodeSrc, trait_name: &str, methods: &[&str]) {
let mut to_remove = Vec::new();
for (i, field) in node.fields.iter().enumerate() {
let method_name = field.method_name().to_string();
if methods.iter().any(|&it| it == &method_name) {
to_remove.push(i);
}
}
if to_remove.len() == methods.len() {
node.traits.push(trait_name.to_string());
node.remove_field(to_remove);
}
}
fn extract_enum_traits(ast: &mut AstSrc) {
for enm in &mut ast.enums {
let nodes = &ast.nodes;
let mut variant_traits = enm
.variants
.iter()
.map(|var| nodes.iter().find(|it| &it.name == var).unwrap())
.map(|node| node.traits.iter().cloned().collect::<BTreeSet<_>>());
let mut enum_traits = match variant_traits.next() {
Some(it) => it,
None => continue,
};
for traits in variant_traits {
enum_traits = enum_traits.intersection(&traits).cloned().collect();
}
enm.traits = enum_traits.into_iter().collect();
}
}
impl AstNodeSrc {
fn remove_field(&mut self, to_remove: Vec<usize>) {
to_remove.into_iter().rev().for_each(|idx| {
self.fields.remove(idx);
});
}
}

View file

@ -0,0 +1,529 @@
SourceFile =
Attr*
items:ModuleItem*
FnDef =
Attr* Visibility? Abi? 'const' 'default' 'async' 'unsafe' 'fn' Name TypeParamList?
ParamList RetType?
WhereClause?
(body:BlockExpr | ';')
RetType =
'->' TypeRef
StructDef =
Attr* Visibility? 'struct' Name TypeParamList? (
WhereClause? (RecordFieldDefList | ';')
| TupleFieldDefList WhereClause? ';'
)
UnionDef =
Attr* Visibility? 'union' Name TypeParamList? WhereClause?
RecordFieldDefList
RecordFieldDefList =
'{' fields:RecordFieldDef* '}'
RecordFieldDef =
Attr* Visibility? Name ':' ascribed_type:TypeRef
TupleFieldDefList =
'(' fields:TupleFieldDef* ')'
TupleFieldDef =
Attr* Visibility? Name TypeRef
FieldDefList =
RecordFieldDefList
| TupleFieldDefList
EnumDef =
Attr* Visibility? 'enum' Name TypeParamList? WhereClause?
variant_list:EnumVariantList
EnumVariantList =
'{' variants:EnumVariant* '}'
EnumVariant =
Attr* Visibility? Name FieldDefList ('=' Expr)?
TraitDef =
Attr* Visibility? 'unsafe'? 'auto'? 'trait' Name TypeParamList
(':' TypeBoundList?)? WhereClause
ItemList
Module =
Attr* Visibility? 'mod' Name
(ItemList | ';')
ItemList =
'{'
AssocItem*
items:ModuleItem*
'}'
ConstDef =
Attr* Visibility? 'default'? 'const' Name ':' ascribed_type:TypeRef
'=' body:Expr ';'
StaticDef =
Attr* Visibility? 'static'? 'mut'? 'static' Name ':' ascribed_type:TypeRef
'=' body:Expr ';'
TypeAliasDef =
Attr* Visibility? 'default'? 'type' Name TypeParamList? WhereClause? (':' TypeBoundList?)?
'=' TypeRef ';'
ImplDef =
Attr* Visibility? 'const'? 'default'? 'unsafe'? 'impl' TypeParamList? '!'? 'for'
WhereClause?
ItemList
ParenType =
'(' TypeRef ')'
TupleType =
'(' fields:TypeRef* ')'
NeverType =
'!'
PathType =
Path
PointerType =
'*' ('const' | 'mut') TypeRef
ArrayType =
'[' TypeRef ';' Expr ']'
SliceType =
'[' TypeRef ']'
ReferenceType =
'&' 'lifetime'? 'mut'? TypeRef
PlaceholderType =
'_'
FnPointerType =
Abi 'unsafe'? 'fn' ParamList RetType?
ForType =
'for' TypeParamList TypeRef
ImplTraitType =
'impl' TypeBoundList
DynTraitType =
'dyn' TypeBoundList
TupleExpr =
Attr* '(' Expr* ')'
ArrayExpr =
Attr* '[' (Expr* | Expr ';' Expr) ']'
ParenExpr =
Attr* '(' Expr ')'
PathExpr =
Path
LambdaExpr =
Attr* 'static'? 'async'? 'move'? ParamList RetType?
body:Expr
IfExpr =
Attr* 'if' Condition
Condition =
'let' Pat '=' Expr
| Expr
EffectExpr =
Attr* Label? ('try' | 'unsafe' | 'async') BlockExpr
LoopExpr =
Attr* Label? 'loop'
loop_body:BlockExpr?
ForExpr =
Attr* Label? 'for' Pat 'in' iterable:Expr
loop_body:BlockExpr?
WhileExpr =
Attr* Label? 'while' Condition
loop_body:BlockExpr?
ContinueExpr =
Attr* 'continue' 'lifetime'?
BreakExpr =
Attr* 'break' 'lifetime'? Expr?
Label =
'lifetime'
BlockExpr =
Attr* Label
'{'
items:ModuleItem*
statements:Stmt*
Expr?
'}'
ReturnExpr =
Attr* 'return' Expr
CallExpr =
Attr* Expr ArgList
MethodCallExpr =
Attr* Expr '.' NameRef TypeArgList? ArgList
ArgList =
'(' args:Expr* ')'
FieldExpr =
Attr* Expr '.' NameRef
IndexExpr =
Attr* '[' ']'
AwaitExpr =
Attr* Expr '.' 'await'
TryExpr =
Attr* Expr '?'
CastExpr =
Attr* Expr 'as' TypeRef
RefExpr =
Attr* '&' ('raw' | 'mut' | 'const') Expr
PrefixExpr =
Attr* Expr
BoxExpr =
Attr* 'box' Expr
RangeExpr =
Attr*
BinExpr =
Attr*
Literal =
'int_number'
MatchExpr =
Attr* 'match' Expr MatchArmList
MatchArmList =
'{' arms:MatchArm* '}'
MatchArm =
Attr* Pat guard:MatchGuard? '=>' Expr
MatchGuard =
'if' Expr
RecordLit =
Path RecordFieldList
RecordFieldList =
'{'
fields:RecordField*
('..' spread:Expr)?
'}'
RecordField =
Attr* NameRef (':' Expr)?
OrPat =
Pat*
ParenPat =
'(' Pat ')'
RefPat =
'&' 'mut'? Pat
BoxPat =
'box' Path
BindPat =
Attr* 'ref'? 'mut'? Name ('@' Pat)?
PlaceholderPat =
'_'
DotDotPat =
'..'
PathPat =
Path
SlicePat =
'[' args:Pat* ']'
RangePat =
'..' | '..='
LiteralPat =
Literal
MacroPat =
MacroCall
RecordPat =
Path RecordFieldPatList
RecordFieldPatList =
'{'
record_field_pats:RecordFieldPat*
BindPat*
'..'?
'}'
RecordFieldPat =
Attr* NameRef ':' Pat
TupleStructPat =
Path '(' args:Pat* ')'
TuplePat =
'(' args:Pat* ')'
Visibility =
'pub' ('(' 'super' | 'self' | 'crate' | 'in' Path ')')?
Name =
'ident'
NameRef =
'ident' | 'int_number'
MacroCall =
Attr* Path '!' Name? TokenTree ';'?
MacroDef =
Name TokenTree
TokenTree =
'(' ')' | '{' '}' | '[' ']'
MacroItems =
items:ModuleItem*
MacroStmts =
statements:Stmt*
Expr?
Attr =
'#' '!'? '[' Path ('=' input:AttrInput)? ']'
TypeParamList =
'<'
TypeParam*
LifetimeParam*
ConstParam*
'>'
TypeParam =
Attr* Name (':' TypeBoundList?)?
('=' default_type:TypeRef)?
ConstParam =
Attr* 'const' Name ':' ascribed_type:TypeRef
('=' default_val:Expr)?
LifetimeParam =
Attr* 'lifetime'
TypeBound =
'lifetime' | 'const'? TypeRef
TypeBoundList =
bounds:TypeBound*
WherePred =
('for' TypeParamList)? ('lifetime' | TypeRef) ':' TypeBoundList
WhereClause =
'where' predicates:WherePred*
Abi =
'string'
ExprStmt =
Attr* Expr ';'
LetStmt =
Attr* 'let' Pat (':' ascribed_type:TypeRef)
'=' initializer:Expr ';'
ParamList =
'(' SelfParam Param* ')'
SelfParam =
Attr* ('&' 'lifetime'?)? 'mut'? 'self' (':' ascribed_type:TypeRef)
Param =
Attr* Pat (':' ascribed_type:TypeRef)
| '...'
UseItem =
Attr* Visibility? 'use' UseTree ';'
UseTree =
Path ('::' ('*' | UseTreeList)) Alias?
UseTreeList =
'{' UseTree* '}'
Alias =
'as' Name
ExternCrateItem =
Attr* Visibility? 'extern' 'crate' (NameRef | 'self') Alias? ';'
Path =
(qualifier:Path '::')? segment:PathSegment
PathSegment =
'::' | 'crate' | 'self' | 'super'
| '<' NameRef TypeArgList ParamList RetType PathType '>'
TypeArgList =
'::'? '<'
TypeArg*
LifetimeArg*
AssocTypeArg*
ConstArg*
'>'
TypeArg =
TypeRef
AssocTypeArg =
NameRef (':' TypeBoundList | '=' TypeRef)
LifetimeArg =
'lifetime'
ConstArg =
Literal | BlockExpr BlockExpr
ExternBlock =
Attr* Abi ExternItemList
ExternItemList =
'{' extern_items:ExternItem* '}'
MetaItem =
Path '=' AttrInput nested_meta_items:MetaItem*
NominalDef =
StructDef
| EnumDef
| UnionDef
TypeRef =
ParenType
| TupleType
| NeverType
| PathType
| PointerType
| ArrayType
| SliceType
| ReferenceType
| PlaceholderType
| FnPointerType
| ForType
| ImplTraitType
| DynTraitType
AssocItem =
FnDef
| TypeAliasDef
| ConstDef
ExternItem =
FnDef | StaticDef
ModuleItem =
StructDef
| UnionDef
| EnumDef
| FnDef
| TraitDef
| TypeAliasDef
| ImplDef
| UseItem
| ExternCrateItem
| ConstDef
| StaticDef
| Module
| MacroCall
| ExternBlock
AttrInput =
Literal
| TokenTree
Stmt =
LetStmt
| ExprStmt
Pat =
OrPat
| ParenPat
| RefPat
| BoxPat
| BindPat
| PlaceholderPat
| DotDotPat
| PathPat
| RecordPat
| TupleStructPat
| TuplePat
| SlicePat
| RangePat
| LiteralPat
| MacroPat
Expr =
TupleExpr
| ArrayExpr
| ParenExpr
| PathExpr
| LambdaExpr
| IfExpr
| LoopExpr
| ForExpr
| WhileExpr
| ContinueExpr
| BreakExpr
| Label
| BlockExpr
| ReturnExpr
| MatchExpr
| RecordLit
| CallExpr
| IndexExpr
| MethodCallExpr
| FieldExpr
| AwaitExpr
| TryExpr
| EffectExpr
| CastExpr
| RefExpr
| PrefixExpr
| RangeExpr
| BinExpr
| Literal
| MacroCall
| BoxExpr