Derive kinds information from ungrammar file

This commit is contained in:
Lukas Wirth 2024-07-17 10:04:45 +02:00
parent 8f044d9681
commit 983c9c122e
12 changed files with 448 additions and 718 deletions

View file

@ -165,42 +165,6 @@ pub(crate) mod entry {
}
m.complete(p, ERROR);
}
pub(crate) fn eager_macro_input(p: &mut Parser<'_>) {
let m = p.start();
let closing_paren_kind = match p.current() {
T!['{'] => T!['}'],
T!['('] => T![')'],
T!['['] => T![']'],
_ => {
p.error("expected `{`, `[`, `(`");
while !p.at(EOF) {
p.bump_any();
}
m.complete(p, ERROR);
return;
}
};
p.bump_any();
while !p.at(EOF) && !p.at(closing_paren_kind) {
if expressions::expr(p).is_none() {
break;
}
if !p.at(EOF) && !p.at(closing_paren_kind) {
p.expect(T![,]);
}
}
p.expect(closing_paren_kind);
if p.at(EOF) {
m.complete(p, MACRO_EAGER_INPUT);
return;
}
while !p.at(EOF) {
p.bump_any();
}
m.complete(p, ERROR);
}
}
}

View file

@ -173,13 +173,6 @@ pub(super) fn opt_item(p: &mut Parser<'_>, m: Marker) -> Result<(), Marker> {
}
}
// test existential_type
// existential type Foo: Fn() -> usize;
if p.at_contextual_kw(T![existential]) && p.nth(1) == T![type] {
p.bump_remap(T![existential]);
has_mods = true;
}
// items
match p.current() {
T![fn] => fn_(p, m),
@ -201,7 +194,7 @@ pub(super) fn opt_item(p: &mut Parser<'_>, m: Marker) -> Result<(), Marker> {
_ if has_visibility || has_mods => {
if has_mods {
p.error("expected existential, fn, trait or impl");
p.error("expected fn, trait or impl");
} else {
p.error("expected an item");
}

View file

@ -82,8 +82,6 @@ pub enum TopEntryPoint {
/// Edge case -- macros generally don't expand to attributes, with the
/// exception of `cfg_attr` which does!
MetaItem,
/// Edge case 2 -- eager macros expand their input to a delimited list of comma separated expressions
MacroEagerInput,
}
impl TopEntryPoint {
@ -97,7 +95,6 @@ impl TopEntryPoint {
TopEntryPoint::Type => grammar::entry::top::type_,
TopEntryPoint::Expr => grammar::entry::top::expr,
TopEntryPoint::MetaItem => grammar::entry::top::meta_item,
TopEntryPoint::MacroEagerInput => grammar::entry::top::eager_macro_input,
};
let mut p = parser::Parser::new(input, edition);
entry_point(&mut p);

File diff suppressed because one or more lines are too long

View file

@ -1,31 +0,0 @@
SOURCE_FILE
TYPE_ALIAS
EXISTENTIAL_KW "existential"
WHITESPACE " "
TYPE_KW "type"
WHITESPACE " "
NAME
IDENT "Foo"
COLON ":"
WHITESPACE " "
TYPE_BOUND_LIST
TYPE_BOUND
PATH_TYPE
PATH
PATH_SEGMENT
NAME_REF
IDENT "Fn"
PARAM_LIST
L_PAREN "("
R_PAREN ")"
WHITESPACE " "
RET_TYPE
THIN_ARROW "->"
WHITESPACE " "
PATH_TYPE
PATH
PATH_SEGMENT
NAME_REF
IDENT "usize"
SEMICOLON ";"
WHITESPACE "\n"

View file

@ -1 +0,0 @@
existential type Foo: Fn() -> usize;

View file

@ -8,7 +8,10 @@
//
// // -- comment
// Name = -- non-terminal definition
// 'ident' -- token (terminal)
// 'ident' -- keyword or punct token (terminal)
// '?ident' -- contextual keyword (terminal)
// '#ident' -- generic token (terminal)
// '@ident' -- literal token (terminal)
// A B -- sequence
// A | B -- alternation
// A* -- zero or more repetition
@ -17,17 +20,17 @@
// label:A -- suggested name for field of AST node
//*************************//
// Names, Paths and Macros //
// Paths //
//*************************//
Name =
'ident' | 'self'
'#ident' | 'self'
NameRef =
'ident' | 'int_number' | 'self' | 'super' | 'crate' | 'Self'
'#ident' | '@int_number' | 'self' | 'super' | 'crate' | 'Self'
Lifetime =
'lifetime_ident'
'#lifetime_ident'
Path =
(qualifier:Path '::')? segment:PathSegment
@ -38,6 +41,11 @@ PathSegment =
| NameRef ParamList RetType?
| '<' Type ('as' PathType)? '>'
//*************************//
// Generics //
//*************************//
GenericArgList =
'::'? '<' (GenericArg (',' GenericArg)* ','?)? '>'
@ -61,6 +69,36 @@ LifetimeArg =
ConstArg =
Expr
GenericParamList =
'<' (GenericParam (',' GenericParam)* ','?)? '>'
GenericParam =
ConstParam
| LifetimeParam
| TypeParam
TypeParam =
Attr* Name (':' TypeBoundList?)?
('=' default_type:Type)?
ConstParam =
Attr* 'const' Name ':' Type
('=' default_val:ConstArg)?
LifetimeParam =
Attr* Lifetime (':' TypeBoundList?)?
WhereClause =
'where' predicates:(WherePred (',' WherePred)* ','?)
WherePred =
('for' GenericParamList)? (Lifetime | Type) ':' TypeBoundList?
//*************************//
// Macro //
//*************************//
MacroCall =
Attr* Path '!' TokenTree ';'?
@ -72,22 +110,23 @@ TokenTree =
MacroItems =
Item*
MacroEagerInput =
'(' (Expr (',' Expr)* ','?)? ')'
| '{' (Expr (',' Expr)* ','?)? '}'
| '[' (Expr (',' Expr)* ','?)? ']'
MacroStmts =
statements:Stmt*
Expr?
Attr =
'#' '!'? '[' Meta ']'
Meta =
'unsafe' '(' Path ('=' Expr | TokenTree)? ')'
| Path ('=' Expr | TokenTree)?
//*************************//
// Items //
//*************************//
SourceFile =
'shebang'?
'#shebang'?
Attr*
Item*
@ -112,7 +151,7 @@ Item =
MacroRules =
Attr* Visibility?
'macro_rules' '!' Name
'?macro_rules' '!' Name
TokenTree
MacroDef =
@ -148,7 +187,7 @@ UseTreeList =
Fn =
Attr* Visibility?
'default'? 'const'? 'async'? 'unsafe'? Abi?
'?default'? 'const'? 'async'? 'unsafe'? Abi?
'fn' Name GenericParamList? ParamList RetType? WhereClause?
(body:BlockExpr | ';')
@ -180,7 +219,7 @@ RetType =
TypeAlias =
Attr* Visibility?
'default'?
'?default'?
'type' Name GenericParamList? (':' TypeBoundList?)? WhereClause?
('=' Type)? ';'
@ -223,7 +262,7 @@ Variant =
Union =
Attr* Visibility?
'union' Name GenericParamList? WhereClause?
'?union' Name GenericParamList? WhereClause?
RecordFieldList
// A Data Type.
@ -236,7 +275,7 @@ Adt =
Const =
Attr* Visibility?
'default'?
'?default'?
'const' (Name | '_') ':' Type
('=' body:Expr)? ';'
@ -247,7 +286,7 @@ Static =
Trait =
Attr* Visibility?
'unsafe'? 'auto'?
'unsafe'? '?auto'?
'trait' Name GenericParamList?
(':' TypeBoundList?)? WhereClause? AssocItemList
@ -266,7 +305,7 @@ AssocItem =
Impl =
Attr* Visibility?
'default'? 'unsafe'?
'?default'? 'unsafe'?
'impl' GenericParamList? ('const'? '!'? trait:Type 'for')? self_ty:Type WhereClause?
AssocItemList
@ -282,41 +321,9 @@ ExternItem =
| Static
| TypeAlias
GenericParamList =
'<' (GenericParam (',' GenericParam)* ','?)? '>'
GenericParam =
ConstParam
| LifetimeParam
| TypeParam
TypeParam =
Attr* Name (':' TypeBoundList?)?
('=' default_type:Type)?
ConstParam =
Attr* 'const' Name ':' Type
('=' default_val:ConstArg)?
LifetimeParam =
Attr* Lifetime (':' TypeBoundList?)?
WhereClause =
'where' predicates:(WherePred (',' WherePred)* ','?)
WherePred =
('for' GenericParamList)? (Lifetime | Type) ':' TypeBoundList?
Visibility =
'pub' ('(' 'in'? Path ')')?
Attr =
'#' '!'? '[' Meta ']'
Meta =
'unsafe' '(' Path ('=' Expr | TokenTree)? ')'
| Path ('=' Expr | TokenTree)?
//****************************//
// Statements and Expressions //
@ -379,13 +386,13 @@ Expr =
| UnderscoreExpr
OffsetOfExpr =
Attr* 'builtin' '#' 'offset_of' '(' Type ',' fields:(NameRef ('.' NameRef)* ) ')'
Attr* '?builtin' '#' '?offset_of' '(' Type ',' fields:(NameRef ('.' NameRef)* ) ')'
AsmExpr =
Attr* 'builtin' '#' 'asm' '(' Expr ')'
Attr* '?builtin' '#' '?asm' '(' Expr ')'
FormatArgsExpr =
Attr* 'builtin' '#' 'format_args' '('
Attr* '?builtin' '#' '?format_args' '('
template:Expr
(',' args:(FormatArgsArg (',' FormatArgsArg)* ','?)? )?
')'
@ -398,11 +405,12 @@ MacroExpr =
Literal =
Attr* value:(
'int_number' | 'float_number'
| 'string' | 'raw_string'
| 'byte_string' | 'raw_byte_string'
'@int_number' | '@float_number'
| '@string' | '@raw_string'
| '@byte_string' | '@raw_byte_string'
| '@c_string' | '@raw_c_string'
| '@char' | '@byte'
| 'true' | 'false'
| 'char' | 'byte'
)
PathExpr =
@ -416,7 +424,7 @@ StmtList =
'}'
RefExpr =
Attr* '&' (('raw' 'const'?)| ('raw'? 'mut') ) Expr
Attr* '&' (('?raw' 'const'?)| ('?raw'? 'mut') ) Expr
TryExpr =
Attr* Expr '?'
@ -538,7 +546,7 @@ YieldExpr =
Attr* 'yield' Expr?
YeetExpr =
Attr* 'do' 'yeet' Expr?
Attr* 'do' '?yeet' Expr?
LetExpr =
Attr* 'let' Pat '=' Expr

View file

@ -14,6 +14,8 @@ pub struct Abi {
impl Abi {
#[inline]
pub fn extern_token(&self) -> Option<SyntaxToken> { support::token(&self.syntax, T![extern]) }
#[inline]
pub fn string_token(&self) -> Option<SyntaxToken> { support::token(&self.syntax, T![string]) }
}
#[derive(Debug, Clone, PartialEq, Eq, Hash)]
@ -833,27 +835,6 @@ impl MacroDef {
pub fn macro_token(&self) -> Option<SyntaxToken> { support::token(&self.syntax, T![macro]) }
}
#[derive(Debug, Clone, PartialEq, Eq, Hash)]
pub struct MacroEagerInput {
pub(crate) syntax: SyntaxNode,
}
impl MacroEagerInput {
#[inline]
pub fn exprs(&self) -> AstChildren<Expr> { support::children(&self.syntax) }
#[inline]
pub fn l_paren_token(&self) -> Option<SyntaxToken> { support::token(&self.syntax, T!['(']) }
#[inline]
pub fn r_paren_token(&self) -> Option<SyntaxToken> { support::token(&self.syntax, T![')']) }
#[inline]
pub fn l_brack_token(&self) -> Option<SyntaxToken> { support::token(&self.syntax, T!['[']) }
#[inline]
pub fn r_brack_token(&self) -> Option<SyntaxToken> { support::token(&self.syntax, T![']']) }
#[inline]
pub fn l_curly_token(&self) -> Option<SyntaxToken> { support::token(&self.syntax, T!['{']) }
#[inline]
pub fn r_curly_token(&self) -> Option<SyntaxToken> { support::token(&self.syntax, T!['}']) }
}
#[derive(Debug, Clone, PartialEq, Eq, Hash)]
pub struct MacroExpr {
pub(crate) syntax: SyntaxNode,
@ -1050,6 +1031,10 @@ impl NameRef {
#[inline]
pub fn ident_token(&self) -> Option<SyntaxToken> { support::token(&self.syntax, T![ident]) }
#[inline]
pub fn int_number_token(&self) -> Option<SyntaxToken> {
support::token(&self.syntax, T![int_number])
}
#[inline]
pub fn self_token(&self) -> Option<SyntaxToken> { support::token(&self.syntax, T![self]) }
#[inline]
pub fn super_token(&self) -> Option<SyntaxToken> { support::token(&self.syntax, T![super]) }
@ -3021,20 +3006,6 @@ impl AstNode for MacroDef {
#[inline]
fn syntax(&self) -> &SyntaxNode { &self.syntax }
}
impl AstNode for MacroEagerInput {
#[inline]
fn can_cast(kind: SyntaxKind) -> bool { kind == MACRO_EAGER_INPUT }
#[inline]
fn cast(syntax: SyntaxNode) -> Option<Self> {
if Self::can_cast(syntax.kind()) {
Some(Self { syntax })
} else {
None
}
}
#[inline]
fn syntax(&self) -> &SyntaxNode { &self.syntax }
}
impl AstNode for MacroExpr {
#[inline]
fn can_cast(kind: SyntaxKind) -> bool { kind == MACRO_EXPR }
@ -5741,11 +5712,6 @@ impl std::fmt::Display for MacroDef {
std::fmt::Display::fmt(self.syntax(), f)
}
}
impl std::fmt::Display for MacroEagerInput {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
std::fmt::Display::fmt(self.syntax(), f)
}
}
impl std::fmt::Display for MacroExpr {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
std::fmt::Display::fmt(self.syntax(), f)

View file

@ -211,115 +211,6 @@ impl SourceFile {
}
}
impl ast::TokenTree {
pub fn reparse_as_comma_separated_expr(
self,
edition: parser::Edition,
) -> Parse<ast::MacroEagerInput> {
let tokens = self.syntax().descendants_with_tokens().filter_map(NodeOrToken::into_token);
let mut parser_input = parser::Input::default();
let mut was_joint = false;
for t in tokens {
let kind = t.kind();
if kind.is_trivia() {
was_joint = false
} else if kind == SyntaxKind::IDENT {
let token_text = t.text();
let contextual_kw =
SyntaxKind::from_contextual_keyword(token_text).unwrap_or(SyntaxKind::IDENT);
parser_input.push_ident(contextual_kw);
} else {
if was_joint {
parser_input.was_joint();
}
parser_input.push(kind);
// Tag the token as joint if it is float with a fractional part
// we use this jointness to inform the parser about what token split
// event to emit when we encounter a float literal in a field access
if kind == SyntaxKind::FLOAT_NUMBER {
if !t.text().ends_with('.') {
parser_input.was_joint();
} else {
was_joint = false;
}
} else {
was_joint = true;
}
}
}
let parser_output = parser::TopEntryPoint::MacroEagerInput.parse(&parser_input, edition);
let mut tokens =
self.syntax().descendants_with_tokens().filter_map(NodeOrToken::into_token);
let mut text = String::new();
let mut pos = TextSize::from(0);
let mut builder = SyntaxTreeBuilder::default();
for event in parser_output.iter() {
match event {
parser::Step::Token { kind, n_input_tokens } => {
let mut token = tokens.next().unwrap();
while token.kind().is_trivia() {
let text = token.text();
pos += TextSize::from(text.len() as u32);
builder.token(token.kind(), text);
token = tokens.next().unwrap();
}
text.push_str(token.text());
for _ in 1..n_input_tokens {
let token = tokens.next().unwrap();
text.push_str(token.text());
}
pos += TextSize::from(text.len() as u32);
builder.token(kind, &text);
text.clear();
}
parser::Step::FloatSplit { ends_in_dot: has_pseudo_dot } => {
let token = tokens.next().unwrap();
let text = token.text();
match text.split_once('.') {
Some((left, right)) => {
assert!(!left.is_empty());
builder.start_node(SyntaxKind::NAME_REF);
builder.token(SyntaxKind::INT_NUMBER, left);
builder.finish_node();
// here we move the exit up, the original exit has been deleted in process
builder.finish_node();
builder.token(SyntaxKind::DOT, ".");
if has_pseudo_dot {
assert!(right.is_empty(), "{left}.{right}");
} else {
assert!(!right.is_empty(), "{left}.{right}");
builder.start_node(SyntaxKind::NAME_REF);
builder.token(SyntaxKind::INT_NUMBER, right);
builder.finish_node();
// the parser creates an unbalanced start node, we are required to close it here
builder.finish_node();
}
}
None => unreachable!(),
}
pos += TextSize::from(text.len() as u32);
}
parser::Step::Enter { kind } => builder.start_node(kind),
parser::Step::Exit => builder.finish_node(),
parser::Step::Error { msg } => builder.error(msg.to_owned(), pos),
}
}
let (green, errors) = builder.finish_raw();
Parse::new(green, errors)
}
}
/// Matches a `SyntaxNode` against an `ast` type.
///
/// # Example:

View file

@ -163,8 +163,9 @@ fn add_preamble(cg: CodegenType, mut text: String) -> String {
/// case, updates the file and then fails the test.
#[allow(clippy::print_stderr)]
fn ensure_file_contents(cg: CodegenType, file: &Path, contents: &str, check: bool) {
let contents = normalize_newlines(contents);
if let Ok(old_contents) = fs::read_to_string(file) {
if normalize_newlines(&old_contents) == normalize_newlines(contents) {
if normalize_newlines(&old_contents) == contents {
// File is already up to date.
return;
}

View file

@ -17,15 +17,22 @@ use quote::{format_ident, quote};
use ungrammar::{Grammar, Rule};
use crate::{
codegen::{add_preamble, ensure_file_contents, reformat},
codegen::{add_preamble, ensure_file_contents, grammar::ast_src::generate_kind_src, reformat},
project_root,
};
mod ast_src;
use self::ast_src::{AstEnumSrc, AstNodeSrc, AstSrc, Cardinality, Field, KindsSrc, KINDS_SRC};
use self::ast_src::{AstEnumSrc, AstNodeSrc, AstSrc, Cardinality, Field, KindsSrc};
pub(crate) fn generate(check: bool) {
let syntax_kinds = generate_syntax_kinds(KINDS_SRC);
let grammar = fs::read_to_string(project_root().join("crates/syntax/rust.ungram"))
.unwrap()
.parse()
.unwrap();
let ast = lower(&grammar);
let kinds_src = generate_kind_src(&ast.nodes, &ast.enums, &grammar);
let syntax_kinds = generate_syntax_kinds(kinds_src);
let syntax_kinds_file = project_root().join("crates/parser/src/syntax_kind/generated.rs");
ensure_file_contents(
crate::flags::CodegenType::Grammar,
@ -34,12 +41,6 @@ pub(crate) fn generate(check: bool) {
check,
);
let grammar = fs::read_to_string(project_root().join("crates/syntax/rust.ungram"))
.unwrap()
.parse()
.unwrap();
let ast = lower(&grammar);
let ast_tokens = generate_tokens(&ast);
let ast_tokens_file = project_root().join("crates/syntax/src/ast/generated/tokens.rs");
ensure_file_contents(
@ -49,7 +50,7 @@ pub(crate) fn generate(check: bool) {
check,
);
let ast_nodes = generate_nodes(KINDS_SRC, &ast);
let ast_nodes = generate_nodes(kinds_src, &ast);
let ast_nodes_file = project_root().join("crates/syntax/src/ast/generated/nodes.rs");
ensure_file_contents(
crate::flags::CodegenType::Grammar,
@ -96,7 +97,7 @@ fn generate_tokens(grammar: &AstSrc) -> String {
.replace("#[derive", "\n#[derive")
}
fn generate_nodes(kinds: KindsSrc<'_>, grammar: &AstSrc) -> String {
fn generate_nodes(kinds: KindsSrc, grammar: &AstSrc) -> String {
let (node_defs, node_boilerplate_impls): (Vec<_>, Vec<_>) = grammar
.nodes
.iter()
@ -117,7 +118,7 @@ fn generate_nodes(kinds: KindsSrc<'_>, grammar: &AstSrc) -> String {
});
let methods = node.fields.iter().map(|field| {
let method_name = field.method_name();
let method_name = format_ident!("{}", field.method_name());
let ty = field.ty();
if field.is_many() {
@ -366,7 +367,7 @@ fn write_doc_comment(contents: &[String], dest: &mut String) {
}
}
fn generate_syntax_kinds(grammar: KindsSrc<'_>) -> String {
fn generate_syntax_kinds(grammar: KindsSrc) -> String {
let (single_byte_tokens_values, single_byte_tokens): (Vec<_>, Vec<_>) = grammar
.punct
.iter()
@ -481,6 +482,7 @@ fn generate_syntax_kinds(grammar: KindsSrc<'_>) -> String {
#([#punctuation_values] => { $crate::SyntaxKind::#punctuation };)*
#([#all_keywords_idents] => { $crate::SyntaxKind::#all_keywords };)*
[lifetime_ident] => { $crate::SyntaxKind::LIFETIME_IDENT };
[int_number] => { $crate::SyntaxKind::INT_NUMBER };
[ident] => { $crate::SyntaxKind::IDENT };
[shebang] => { $crate::SyntaxKind::SHEBANG };
}
@ -550,7 +552,7 @@ impl Field {
_ => None,
}
}
fn method_name(&self) -> proc_macro2::Ident {
fn method_name(&self) -> String {
match self {
Field::Token(name) => {
let name = match name.as_str() {
@ -585,13 +587,13 @@ impl Field {
"~" => "tilde",
_ => name,
};
format_ident!("{}_token", name)
format!("{name}_token",)
}
Field::Node { name, .. } => {
if name == "type" {
format_ident!("ty")
String::from("ty")
} else {
format_ident!("{}", name)
name.to_owned()
}
}
}
@ -604,6 +606,15 @@ impl Field {
}
}
fn clean_token_name(name: &str) -> String {
let cleaned = name.trim_start_matches(['@', '#', '?']);
if cleaned.is_empty() {
name.to_owned()
} else {
cleaned.to_owned()
}
}
fn lower(grammar: &Grammar) -> AstSrc {
let mut res = AstSrc {
tokens:
@ -683,14 +694,12 @@ fn lower_rule(acc: &mut Vec<Field>, grammar: &Grammar, label: Option<&String>, r
}
Rule::Token(token) => {
assert!(label.is_none());
let mut name = grammar[*token].name.clone();
if name != "int_number" && name != "string" {
if "[]{}()".contains(&name) {
name = format!("'{name}'");
}
let field = Field::Token(name);
acc.push(field);
let mut name = clean_token_name(&grammar[*token].name);
if "[]{}()".contains(&name) {
name = format!("'{name}'");
}
let field = Field::Token(name);
acc.push(field);
}
Rule::Rep(inner) => {
if let Rule::Node(node) = &**inner {
@ -863,7 +872,7 @@ fn extract_struct_traits(ast: &mut AstSrc) {
fn extract_struct_trait(node: &mut AstNodeSrc, trait_name: &str, methods: &[&str]) {
let mut to_remove = Vec::new();
for (i, field) in node.fields.iter().enumerate() {
let method_name = field.method_name().to_string();
let method_name = field.method_name();
if methods.iter().any(|&it| it == method_name) {
to_remove.push(i);
}

View file

@ -1,241 +1,152 @@
//! Defines input for code generation process.
pub(crate) struct KindsSrc<'a> {
pub(crate) punct: &'a [(&'a str, &'a str)],
pub(crate) keywords: &'a [&'a str],
pub(crate) contextual_keywords: &'a [&'a str],
pub(crate) literals: &'a [&'a str],
pub(crate) tokens: &'a [&'a str],
pub(crate) nodes: &'a [&'a str],
use crate::codegen::grammar::to_upper_snake_case;
#[derive(Copy, Clone, Debug)]
pub(crate) struct KindsSrc {
pub(crate) punct: &'static [(&'static str, &'static str)],
pub(crate) keywords: &'static [&'static str],
pub(crate) contextual_keywords: &'static [&'static str],
pub(crate) literals: &'static [&'static str],
pub(crate) tokens: &'static [&'static str],
pub(crate) nodes: &'static [&'static str],
}
pub(crate) const KINDS_SRC: KindsSrc<'_> = KindsSrc {
punct: &[
(";", "SEMICOLON"),
(",", "COMMA"),
("(", "L_PAREN"),
(")", "R_PAREN"),
("{", "L_CURLY"),
("}", "R_CURLY"),
("[", "L_BRACK"),
("]", "R_BRACK"),
("<", "L_ANGLE"),
(">", "R_ANGLE"),
("@", "AT"),
("#", "POUND"),
("~", "TILDE"),
("?", "QUESTION"),
("$", "DOLLAR"),
("&", "AMP"),
("|", "PIPE"),
("+", "PLUS"),
("*", "STAR"),
("/", "SLASH"),
("^", "CARET"),
("%", "PERCENT"),
("_", "UNDERSCORE"),
(".", "DOT"),
("..", "DOT2"),
("...", "DOT3"),
("..=", "DOT2EQ"),
(":", "COLON"),
("::", "COLON2"),
("=", "EQ"),
("==", "EQ2"),
("=>", "FAT_ARROW"),
("!", "BANG"),
("!=", "NEQ"),
("-", "MINUS"),
("->", "THIN_ARROW"),
("<=", "LTEQ"),
(">=", "GTEQ"),
("+=", "PLUSEQ"),
("-=", "MINUSEQ"),
("|=", "PIPEEQ"),
("&=", "AMPEQ"),
("^=", "CARETEQ"),
("/=", "SLASHEQ"),
("*=", "STAREQ"),
("%=", "PERCENTEQ"),
("&&", "AMP2"),
("||", "PIPE2"),
("<<", "SHL"),
(">>", "SHR"),
("<<=", "SHLEQ"),
(">>=", "SHREQ"),
],
keywords: &[
"abstract", "as", "async", "await", "become", "box", "break", "const", "continue", "crate",
"do", "dyn", "else", "enum", "extern", "false", "final", "fn", "for", "if", "impl", "in",
"let", "loop", "macro", "match", "mod", "move", "mut", "override", "priv", "pub", "ref",
"return", "self", "Self", "static", "struct", "super", "trait", "true", "try", "type",
"typeof", "unsafe", "unsized", "use", "virtual", "where", "while", "yield",
],
contextual_keywords: &[
"auto",
"builtin",
"default",
"existential",
"union",
"raw",
"macro_rules",
"yeet",
"offset_of",
"asm",
"format_args",
],
literals: &["INT_NUMBER", "FLOAT_NUMBER", "CHAR", "BYTE", "STRING", "BYTE_STRING", "C_STRING"],
tokens: &["ERROR", "IDENT", "WHITESPACE", "LIFETIME_IDENT", "COMMENT", "SHEBANG"],
nodes: &[
"SOURCE_FILE",
"STRUCT",
"UNION",
"ENUM",
"FN",
"RET_TYPE",
"EXTERN_CRATE",
"MODULE",
"USE",
"STATIC",
"CONST",
"TRAIT",
"TRAIT_ALIAS",
"IMPL",
"TYPE_ALIAS",
"MACRO_CALL",
"MACRO_RULES",
"MACRO_ARM",
"TOKEN_TREE",
"MACRO_DEF",
"PAREN_TYPE",
"TUPLE_TYPE",
"MACRO_TYPE",
"NEVER_TYPE",
"PATH_TYPE",
"PTR_TYPE",
"ARRAY_TYPE",
"SLICE_TYPE",
"REF_TYPE",
"INFER_TYPE",
"FN_PTR_TYPE",
"FOR_TYPE",
"IMPL_TRAIT_TYPE",
"DYN_TRAIT_TYPE",
"OR_PAT",
"PAREN_PAT",
"REF_PAT",
"BOX_PAT",
"IDENT_PAT",
"WILDCARD_PAT",
"REST_PAT",
"PATH_PAT",
"RECORD_PAT",
"RECORD_PAT_FIELD_LIST",
"RECORD_PAT_FIELD",
"TUPLE_STRUCT_PAT",
"TUPLE_PAT",
"SLICE_PAT",
"RANGE_PAT",
"LITERAL_PAT",
"MACRO_PAT",
"CONST_BLOCK_PAT",
// atoms
"TUPLE_EXPR",
"ARRAY_EXPR",
"PAREN_EXPR",
"PATH_EXPR",
"CLOSURE_EXPR",
"IF_EXPR",
"WHILE_EXPR",
"LOOP_EXPR",
"FOR_EXPR",
"CONTINUE_EXPR",
"BREAK_EXPR",
"LABEL",
"BLOCK_EXPR",
"STMT_LIST",
"RETURN_EXPR",
"BECOME_EXPR",
"YIELD_EXPR",
"YEET_EXPR",
"LET_EXPR",
"UNDERSCORE_EXPR",
"MACRO_EXPR",
"MATCH_EXPR",
"MATCH_ARM_LIST",
"MATCH_ARM",
"MATCH_GUARD",
"RECORD_EXPR",
"RECORD_EXPR_FIELD_LIST",
"RECORD_EXPR_FIELD",
"OFFSET_OF_EXPR",
"ASM_EXPR",
"FORMAT_ARGS_EXPR",
"FORMAT_ARGS_ARG",
// postfix
"CALL_EXPR",
"INDEX_EXPR",
"METHOD_CALL_EXPR",
"FIELD_EXPR",
"AWAIT_EXPR",
"TRY_EXPR",
"CAST_EXPR",
// unary
"REF_EXPR",
"PREFIX_EXPR",
"RANGE_EXPR", // just weird
"BIN_EXPR",
"EXTERN_BLOCK",
"EXTERN_ITEM_LIST",
"VARIANT",
"RECORD_FIELD_LIST",
"RECORD_FIELD",
"TUPLE_FIELD_LIST",
"TUPLE_FIELD",
"VARIANT_LIST",
"ITEM_LIST",
"ASSOC_ITEM_LIST",
"ATTR",
"META",
"USE_TREE",
"USE_TREE_LIST",
"PATH",
"PATH_SEGMENT",
"LITERAL",
"RENAME",
"VISIBILITY",
"WHERE_CLAUSE",
"WHERE_PRED",
"ABI",
"NAME",
"NAME_REF",
"LET_STMT",
"LET_ELSE",
"EXPR_STMT",
"GENERIC_PARAM_LIST",
"GENERIC_PARAM",
"LIFETIME_PARAM",
"TYPE_PARAM",
"RETURN_TYPE_ARG",
"CONST_PARAM",
"GENERIC_ARG_LIST",
"LIFETIME",
"LIFETIME_ARG",
"TYPE_ARG",
"ASSOC_TYPE_ARG",
"CONST_ARG",
"PARAM_LIST",
"PARAM",
"SELF_PARAM",
"ARG_LIST",
"TYPE_BOUND",
"TYPE_BOUND_LIST",
// macro related
"MACRO_ITEMS",
"MACRO_STMTS",
"MACRO_EAGER_INPUT",
],
};
/// The punctuations of the language.
const PUNCT: &[(&str, &str)] = &[
// KEEP THE DOLLAR AT THE TOP ITS SPECIAL
("$", "DOLLAR"),
(";", "SEMICOLON"),
(",", "COMMA"),
("(", "L_PAREN"),
(")", "R_PAREN"),
("{", "L_CURLY"),
("}", "R_CURLY"),
("[", "L_BRACK"),
("]", "R_BRACK"),
("<", "L_ANGLE"),
(">", "R_ANGLE"),
("@", "AT"),
("#", "POUND"),
("~", "TILDE"),
("?", "QUESTION"),
("&", "AMP"),
("|", "PIPE"),
("+", "PLUS"),
("*", "STAR"),
("/", "SLASH"),
("^", "CARET"),
("%", "PERCENT"),
("_", "UNDERSCORE"),
(".", "DOT"),
("..", "DOT2"),
("...", "DOT3"),
("..=", "DOT2EQ"),
(":", "COLON"),
("::", "COLON2"),
("=", "EQ"),
("==", "EQ2"),
("=>", "FAT_ARROW"),
("!", "BANG"),
("!=", "NEQ"),
("-", "MINUS"),
("->", "THIN_ARROW"),
("<=", "LTEQ"),
(">=", "GTEQ"),
("+=", "PLUSEQ"),
("-=", "MINUSEQ"),
("|=", "PIPEEQ"),
("&=", "AMPEQ"),
("^=", "CARETEQ"),
("/=", "SLASHEQ"),
("*=", "STAREQ"),
("%=", "PERCENTEQ"),
("&&", "AMP2"),
("||", "PIPE2"),
("<<", "SHL"),
(">>", "SHR"),
("<<=", "SHLEQ"),
(">>=", "SHREQ"),
];
const TOKENS: &[&str] = &["ERROR", "WHITESPACE", "NEWLINE", "COMMENT"];
// &["ERROR", "IDENT", "WHITESPACE", "LIFETIME_IDENT", "COMMENT", "SHEBANG"],;
const EOF: &str = "EOF";
const RESERVED: &[&str] = &[
"abstract", "become", "box", "do", "final", "macro", "override", "priv", "typeof", "unsized",
"virtual", "yield", "try",
];
pub(crate) fn generate_kind_src(
nodes: &[AstNodeSrc],
enums: &[AstEnumSrc],
grammar: &ungrammar::Grammar,
) -> KindsSrc {
let mut keywords: Vec<&_> = Vec::new();
let mut contextual_keywords: Vec<&_> = Vec::new();
let mut tokens: Vec<&_> = TOKENS.to_vec();
let mut literals: Vec<&_> = Vec::new();
let mut used_puncts = vec![false; PUNCT.len()];
// Mark $ as used
used_puncts[0] = true;
grammar.tokens().for_each(|token| {
let name = &*grammar[token].name;
if name == EOF {
return;
}
match name.split_at(1) {
("@", lit) if !lit.is_empty() => {
literals.push(String::leak(to_upper_snake_case(lit)));
}
("#", token) if !token.is_empty() => {
tokens.push(String::leak(to_upper_snake_case(token)));
}
("?", kw) if !kw.is_empty() => {
contextual_keywords.push(String::leak(kw.to_owned()));
}
_ if name.chars().all(char::is_alphabetic) => {
keywords.push(String::leak(name.to_owned()));
}
_ => {
let idx = PUNCT
.iter()
.position(|(punct, _)| punct == &name)
.unwrap_or_else(|| panic!("Grammar references unknown punctuation {name:?}"));
used_puncts[idx] = true;
}
}
});
PUNCT.iter().zip(used_puncts).filter(|(_, used)| !used).for_each(|((punct, _), _)| {
panic!("Punctuation {punct:?} is not used in grammar");
});
keywords.extend(RESERVED.iter().copied());
keywords.sort();
keywords.dedup();
// we leak things here for simplicity, that way we don't have to deal with lifetimes
// The execution is a one shot job so thats fine
let nodes = nodes
.iter()
.map(|it| &it.name)
.chain(enums.iter().map(|it| &it.name))
.map(|it| to_upper_snake_case(it))
.map(String::leak)
.map(|it| &*it)
.collect();
let nodes = Vec::leak(nodes);
nodes.sort();
let keywords = Vec::leak(keywords);
keywords.sort();
let contextual_keywords = Vec::leak(contextual_keywords);
contextual_keywords.sort();
let literals = Vec::leak(literals);
literals.sort();
let tokens = Vec::leak(tokens);
tokens.sort();
KindsSrc { punct: PUNCT, nodes, keywords, contextual_keywords, literals, tokens }
}
#[derive(Default, Debug)]
pub(crate) struct AstSrc {