From 983c9c122ed080d919a1e8ab8b2078b00a527508 Mon Sep 17 00:00:00 2001 From: Lukas Wirth Date: Wed, 17 Jul 2024 10:04:45 +0200 Subject: [PATCH] Derive kinds information from ungrammar file --- crates/parser/src/grammar.rs | 36 -- crates/parser/src/grammar/items.rs | 9 +- crates/parser/src/lib.rs | 3 - crates/parser/src/syntax_kind/generated.rs | 362 +++++++++-------- .../inline/ok/0131_existential_type.rast | 31 -- .../parser/inline/ok/0131_existential_type.rs | 1 - crates/syntax/rust.ungram | 128 +++--- crates/syntax/src/ast/generated/nodes.rs | 46 +-- crates/syntax/src/lib.rs | 109 ----- xtask/src/codegen.rs | 3 +- xtask/src/codegen/grammar.rs | 59 +-- xtask/src/codegen/grammar/ast_src.rs | 379 +++++++----------- 12 files changed, 448 insertions(+), 718 deletions(-) delete mode 100644 crates/parser/test_data/parser/inline/ok/0131_existential_type.rast delete mode 100644 crates/parser/test_data/parser/inline/ok/0131_existential_type.rs diff --git a/crates/parser/src/grammar.rs b/crates/parser/src/grammar.rs index 2930190cb3..7ae1e5f82e 100644 --- a/crates/parser/src/grammar.rs +++ b/crates/parser/src/grammar.rs @@ -165,42 +165,6 @@ pub(crate) mod entry { } m.complete(p, ERROR); } - - pub(crate) fn eager_macro_input(p: &mut Parser<'_>) { - let m = p.start(); - - let closing_paren_kind = match p.current() { - T!['{'] => T!['}'], - T!['('] => T![')'], - T!['['] => T![']'], - _ => { - p.error("expected `{`, `[`, `(`"); - while !p.at(EOF) { - p.bump_any(); - } - m.complete(p, ERROR); - return; - } - }; - p.bump_any(); - while !p.at(EOF) && !p.at(closing_paren_kind) { - if expressions::expr(p).is_none() { - break; - } - if !p.at(EOF) && !p.at(closing_paren_kind) { - p.expect(T![,]); - } - } - p.expect(closing_paren_kind); - if p.at(EOF) { - m.complete(p, MACRO_EAGER_INPUT); - return; - } - while !p.at(EOF) { - p.bump_any(); - } - m.complete(p, ERROR); - } } } diff --git a/crates/parser/src/grammar/items.rs b/crates/parser/src/grammar/items.rs index 99bbf47654..262255b356 100644 --- a/crates/parser/src/grammar/items.rs +++ b/crates/parser/src/grammar/items.rs @@ -173,13 +173,6 @@ pub(super) fn opt_item(p: &mut Parser<'_>, m: Marker) -> Result<(), Marker> { } } - // test existential_type - // existential type Foo: Fn() -> usize; - if p.at_contextual_kw(T![existential]) && p.nth(1) == T![type] { - p.bump_remap(T![existential]); - has_mods = true; - } - // items match p.current() { T![fn] => fn_(p, m), @@ -201,7 +194,7 @@ pub(super) fn opt_item(p: &mut Parser<'_>, m: Marker) -> Result<(), Marker> { _ if has_visibility || has_mods => { if has_mods { - p.error("expected existential, fn, trait or impl"); + p.error("expected fn, trait or impl"); } else { p.error("expected an item"); } diff --git a/crates/parser/src/lib.rs b/crates/parser/src/lib.rs index 738ed239a7..679492066a 100644 --- a/crates/parser/src/lib.rs +++ b/crates/parser/src/lib.rs @@ -82,8 +82,6 @@ pub enum TopEntryPoint { /// Edge case -- macros generally don't expand to attributes, with the /// exception of `cfg_attr` which does! MetaItem, - /// Edge case 2 -- eager macros expand their input to a delimited list of comma separated expressions - MacroEagerInput, } impl TopEntryPoint { @@ -97,7 +95,6 @@ impl TopEntryPoint { TopEntryPoint::Type => grammar::entry::top::type_, TopEntryPoint::Expr => grammar::entry::top::expr, TopEntryPoint::MetaItem => grammar::entry::top::meta_item, - TopEntryPoint::MacroEagerInput => grammar::entry::top::eager_macro_input, }; let mut p = parser::Parser::new(input, edition); entry_point(&mut p); diff --git a/crates/parser/src/syntax_kind/generated.rs b/crates/parser/src/syntax_kind/generated.rs index ad3398453b..f2c24540c0 100644 --- a/crates/parser/src/syntax_kind/generated.rs +++ b/crates/parser/src/syntax_kind/generated.rs @@ -9,6 +9,7 @@ pub enum SyntaxKind { TOMBSTONE, #[doc(hidden)] EOF, + DOLLAR, SEMICOLON, COMMA, L_PAREN, @@ -23,7 +24,6 @@ pub enum SyntaxKind { POUND, TILDE, QUESTION, - DOLLAR, AMP, PIPE, PLUS, @@ -61,6 +61,7 @@ pub enum SyntaxKind { SHR, SHLEQ, SHREQ, + SELF_TYPE_KW, ABSTRACT_KW, AS_KW, ASYNC_KW, @@ -96,8 +97,8 @@ pub enum SyntaxKind { REF_KW, RETURN_KW, SELF_KW, - SELF_TYPE_KW, STATIC_KW, + STRING_KW, STRUCT_KW, SUPER_KW, TRAIT_KW, @@ -112,173 +113,183 @@ pub enum SyntaxKind { WHERE_KW, WHILE_KW, YIELD_KW, + ASM_KW, AUTO_KW, BUILTIN_KW, DEFAULT_KW, - EXISTENTIAL_KW, - UNION_KW, - RAW_KW, - MACRO_RULES_KW, - YEET_KW, - OFFSET_OF_KW, - ASM_KW, FORMAT_ARGS_KW, - INT_NUMBER, - FLOAT_NUMBER, - CHAR, + MACRO_RULES_KW, + OFFSET_OF_KW, + RAW_KW, + UNION_KW, + YEET_KW, BYTE, - STRING, BYTE_STRING, + CHAR, C_STRING, + FLOAT_NUMBER, + INT_NUMBER, + RAW_BYTE_STRING, + RAW_C_STRING, + RAW_STRING, + STRING, + COMMENT, ERROR, IDENT, - WHITESPACE, LIFETIME_IDENT, - COMMENT, + NEWLINE, SHEBANG, - SOURCE_FILE, - STRUCT, - UNION, - ENUM, - FN, - RET_TYPE, - EXTERN_CRATE, - MODULE, - USE, - STATIC, + WHITESPACE, + ABI, + ADT, + ARG_LIST, + ARRAY_EXPR, + ARRAY_TYPE, + ASM_EXPR, + ASSOC_ITEM, + ASSOC_ITEM_LIST, + ASSOC_TYPE_ARG, + ATTR, + AWAIT_EXPR, + BECOME_EXPR, + BIN_EXPR, + BLOCK_EXPR, + BOX_PAT, + BREAK_EXPR, + CALL_EXPR, + CAST_EXPR, + CLOSURE_EXPR, CONST, + CONST_ARG, + CONST_BLOCK_PAT, + CONST_PARAM, + CONTINUE_EXPR, + DYN_TRAIT_TYPE, + ENUM, + EXPR, + EXPR_STMT, + EXTERN_BLOCK, + EXTERN_CRATE, + EXTERN_ITEM, + EXTERN_ITEM_LIST, + FIELD_EXPR, + FIELD_LIST, + FN, + FN_PTR_TYPE, + FORMAT_ARGS_ARG, + FORMAT_ARGS_EXPR, + FOR_EXPR, + FOR_TYPE, + GENERIC_ARG, + GENERIC_ARG_LIST, + GENERIC_PARAM, + GENERIC_PARAM_LIST, + IDENT_PAT, + IF_EXPR, + IMPL, + IMPL_TRAIT_TYPE, + INDEX_EXPR, + INFER_TYPE, + ITEM, + ITEM_LIST, + LABEL, + LET_ELSE, + LET_EXPR, + LET_STMT, + LIFETIME, + LIFETIME_ARG, + LIFETIME_PARAM, + LITERAL, + LITERAL_PAT, + LOOP_EXPR, + MACRO_CALL, + MACRO_DEF, + MACRO_EXPR, + MACRO_ITEMS, + MACRO_PAT, + MACRO_RULES, + MACRO_STMTS, + MACRO_TYPE, + MATCH_ARM, + MATCH_ARM_LIST, + MATCH_EXPR, + MATCH_GUARD, + META, + METHOD_CALL_EXPR, + MODULE, + NAME, + NAME_REF, + NEVER_TYPE, + OFFSET_OF_EXPR, + OR_PAT, + PARAM, + PARAM_LIST, + PAREN_EXPR, + PAREN_PAT, + PAREN_TYPE, + PAT, + PATH, + PATH_EXPR, + PATH_PAT, + PATH_SEGMENT, + PATH_TYPE, + PREFIX_EXPR, + PTR_TYPE, + RANGE_EXPR, + RANGE_PAT, + RECORD_EXPR, + RECORD_EXPR_FIELD, + RECORD_EXPR_FIELD_LIST, + RECORD_FIELD, + RECORD_FIELD_LIST, + RECORD_PAT, + RECORD_PAT_FIELD, + RECORD_PAT_FIELD_LIST, + REF_EXPR, + REF_PAT, + REF_TYPE, + RENAME, + REST_PAT, + RETURN_EXPR, + RET_TYPE, + SELF_PARAM, + SLICE_PAT, + SLICE_TYPE, + SOURCE_FILE, + STATIC, + STMT, + STMT_LIST, + STRUCT, + TOKEN_TREE, TRAIT, TRAIT_ALIAS, - IMPL, - TYPE_ALIAS, - MACRO_CALL, - MACRO_RULES, - MACRO_ARM, - TOKEN_TREE, - MACRO_DEF, - PAREN_TYPE, - TUPLE_TYPE, - MACRO_TYPE, - NEVER_TYPE, - PATH_TYPE, - PTR_TYPE, - ARRAY_TYPE, - SLICE_TYPE, - REF_TYPE, - INFER_TYPE, - FN_PTR_TYPE, - FOR_TYPE, - IMPL_TRAIT_TYPE, - DYN_TRAIT_TYPE, - OR_PAT, - PAREN_PAT, - REF_PAT, - BOX_PAT, - IDENT_PAT, - WILDCARD_PAT, - REST_PAT, - PATH_PAT, - RECORD_PAT, - RECORD_PAT_FIELD_LIST, - RECORD_PAT_FIELD, - TUPLE_STRUCT_PAT, - TUPLE_PAT, - SLICE_PAT, - RANGE_PAT, - LITERAL_PAT, - MACRO_PAT, - CONST_BLOCK_PAT, - TUPLE_EXPR, - ARRAY_EXPR, - PAREN_EXPR, - PATH_EXPR, - CLOSURE_EXPR, - IF_EXPR, - WHILE_EXPR, - LOOP_EXPR, - FOR_EXPR, - CONTINUE_EXPR, - BREAK_EXPR, - LABEL, - BLOCK_EXPR, - STMT_LIST, - RETURN_EXPR, - BECOME_EXPR, - YIELD_EXPR, - YEET_EXPR, - LET_EXPR, - UNDERSCORE_EXPR, - MACRO_EXPR, - MATCH_EXPR, - MATCH_ARM_LIST, - MATCH_ARM, - MATCH_GUARD, - RECORD_EXPR, - RECORD_EXPR_FIELD_LIST, - RECORD_EXPR_FIELD, - OFFSET_OF_EXPR, - ASM_EXPR, - FORMAT_ARGS_EXPR, - FORMAT_ARGS_ARG, - CALL_EXPR, - INDEX_EXPR, - METHOD_CALL_EXPR, - FIELD_EXPR, - AWAIT_EXPR, TRY_EXPR, - CAST_EXPR, - REF_EXPR, - PREFIX_EXPR, - RANGE_EXPR, - BIN_EXPR, - EXTERN_BLOCK, - EXTERN_ITEM_LIST, - VARIANT, - RECORD_FIELD_LIST, - RECORD_FIELD, - TUPLE_FIELD_LIST, + TUPLE_EXPR, TUPLE_FIELD, - VARIANT_LIST, - ITEM_LIST, - ASSOC_ITEM_LIST, - ATTR, - META, + TUPLE_FIELD_LIST, + TUPLE_PAT, + TUPLE_STRUCT_PAT, + TUPLE_TYPE, + TYPE, + TYPE_ALIAS, + TYPE_ARG, + TYPE_BOUND, + TYPE_BOUND_LIST, + TYPE_PARAM, + UNDERSCORE_EXPR, + UNION, + USE, USE_TREE, USE_TREE_LIST, - PATH, - PATH_SEGMENT, - LITERAL, - RENAME, + VARIANT, + VARIANT_LIST, VISIBILITY, WHERE_CLAUSE, WHERE_PRED, - ABI, - NAME, - NAME_REF, - LET_STMT, - LET_ELSE, - EXPR_STMT, - GENERIC_PARAM_LIST, - GENERIC_PARAM, - LIFETIME_PARAM, - TYPE_PARAM, - RETURN_TYPE_ARG, - CONST_PARAM, - GENERIC_ARG_LIST, - LIFETIME, - LIFETIME_ARG, - TYPE_ARG, - ASSOC_TYPE_ARG, - CONST_ARG, - PARAM_LIST, - PARAM, - SELF_PARAM, - ARG_LIST, - TYPE_BOUND, - TYPE_BOUND_LIST, - MACRO_ITEMS, - MACRO_STMTS, - MACRO_EAGER_INPUT, + WHILE_EXPR, + WILDCARD_PAT, + YEET_EXPR, + YIELD_EXPR, #[doc(hidden)] __LAST, } @@ -287,7 +298,8 @@ impl SyntaxKind { pub fn is_keyword(self) -> bool { matches!( self, - ABSTRACT_KW + SELF_TYPE_KW + | ABSTRACT_KW | AS_KW | ASYNC_KW | AWAIT_KW @@ -322,8 +334,8 @@ impl SyntaxKind { | REF_KW | RETURN_KW | SELF_KW - | SELF_TYPE_KW | STATIC_KW + | STRING_KW | STRUCT_KW | SUPER_KW | TRAIT_KW @@ -338,23 +350,23 @@ impl SyntaxKind { | WHERE_KW | WHILE_KW | YIELD_KW + | ASM_KW | AUTO_KW | BUILTIN_KW | DEFAULT_KW - | EXISTENTIAL_KW - | UNION_KW - | RAW_KW - | MACRO_RULES_KW - | YEET_KW - | OFFSET_OF_KW - | ASM_KW | FORMAT_ARGS_KW + | MACRO_RULES_KW + | OFFSET_OF_KW + | RAW_KW + | UNION_KW + | YEET_KW ) } pub fn is_punct(self) -> bool { matches!( self, - SEMICOLON + DOLLAR + | SEMICOLON | COMMA | L_PAREN | R_PAREN @@ -368,7 +380,6 @@ impl SyntaxKind { | POUND | TILDE | QUESTION - | DOLLAR | AMP | PIPE | PLUS @@ -409,10 +420,22 @@ impl SyntaxKind { ) } pub fn is_literal(self) -> bool { - matches!(self, INT_NUMBER | FLOAT_NUMBER | CHAR | BYTE | STRING | BYTE_STRING | C_STRING) + matches!( + self, + BYTE | BYTE_STRING + | CHAR + | C_STRING + | FLOAT_NUMBER + | INT_NUMBER + | RAW_BYTE_STRING + | RAW_C_STRING + | RAW_STRING + | STRING + ) } pub fn from_keyword(ident: &str) -> Option { let kw = match ident { + "Self" => SELF_TYPE_KW, "abstract" => ABSTRACT_KW, "as" => AS_KW, "async" => ASYNC_KW, @@ -448,8 +471,8 @@ impl SyntaxKind { "ref" => REF_KW, "return" => RETURN_KW, "self" => SELF_KW, - "Self" => SELF_TYPE_KW, "static" => STATIC_KW, + "string" => STRING_KW, "struct" => STRUCT_KW, "super" => SUPER_KW, "trait" => TRAIT_KW, @@ -470,23 +493,23 @@ impl SyntaxKind { } pub fn from_contextual_keyword(ident: &str) -> Option { let kw = match ident { + "asm" => ASM_KW, "auto" => AUTO_KW, "builtin" => BUILTIN_KW, "default" => DEFAULT_KW, - "existential" => EXISTENTIAL_KW, - "union" => UNION_KW, - "raw" => RAW_KW, - "macro_rules" => MACRO_RULES_KW, - "yeet" => YEET_KW, - "offset_of" => OFFSET_OF_KW, - "asm" => ASM_KW, "format_args" => FORMAT_ARGS_KW, + "macro_rules" => MACRO_RULES_KW, + "offset_of" => OFFSET_OF_KW, + "raw" => RAW_KW, + "union" => UNION_KW, + "yeet" => YEET_KW, _ => return None, }; Some(kw) } pub fn from_char(c: char) -> Option { let tok = match c { + '$' => DOLLAR, ';' => SEMICOLON, ',' => COMMA, '(' => L_PAREN, @@ -501,7 +524,6 @@ impl SyntaxKind { '#' => POUND, '~' => TILDE, '?' => QUESTION, - '$' => DOLLAR, '&' => AMP, '|' => PIPE, '+' => PLUS, @@ -521,4 +543,4 @@ impl SyntaxKind { } } #[macro_export] -macro_rules ! T { [;] => { $ crate :: SyntaxKind :: SEMICOLON } ; [,] => { $ crate :: SyntaxKind :: COMMA } ; ['('] => { $ crate :: SyntaxKind :: L_PAREN } ; [')'] => { $ crate :: SyntaxKind :: R_PAREN } ; ['{'] => { $ crate :: SyntaxKind :: L_CURLY } ; ['}'] => { $ crate :: SyntaxKind :: R_CURLY } ; ['['] => { $ crate :: SyntaxKind :: L_BRACK } ; [']'] => { $ crate :: SyntaxKind :: R_BRACK } ; [<] => { $ crate :: SyntaxKind :: L_ANGLE } ; [>] => { $ crate :: SyntaxKind :: R_ANGLE } ; [@] => { $ crate :: SyntaxKind :: AT } ; [#] => { $ crate :: SyntaxKind :: POUND } ; [~] => { $ crate :: SyntaxKind :: TILDE } ; [?] => { $ crate :: SyntaxKind :: QUESTION } ; [$] => { $ crate :: SyntaxKind :: DOLLAR } ; [&] => { $ crate :: SyntaxKind :: AMP } ; [|] => { $ crate :: SyntaxKind :: PIPE } ; [+] => { $ crate :: SyntaxKind :: PLUS } ; [*] => { $ crate :: SyntaxKind :: STAR } ; [/] => { $ crate :: SyntaxKind :: SLASH } ; [^] => { $ crate :: SyntaxKind :: CARET } ; [%] => { $ crate :: SyntaxKind :: PERCENT } ; [_] => { $ crate :: SyntaxKind :: UNDERSCORE } ; [.] => { $ crate :: SyntaxKind :: DOT } ; [..] => { $ crate :: SyntaxKind :: DOT2 } ; [...] => { $ crate :: SyntaxKind :: DOT3 } ; [..=] => { $ crate :: SyntaxKind :: DOT2EQ } ; [:] => { $ crate :: SyntaxKind :: COLON } ; [::] => { $ crate :: SyntaxKind :: COLON2 } ; [=] => { $ crate :: SyntaxKind :: EQ } ; [==] => { $ crate :: SyntaxKind :: EQ2 } ; [=>] => { $ crate :: SyntaxKind :: FAT_ARROW } ; [!] => { $ crate :: SyntaxKind :: BANG } ; [!=] => { $ crate :: SyntaxKind :: NEQ } ; [-] => { $ crate :: SyntaxKind :: MINUS } ; [->] => { $ crate :: SyntaxKind :: THIN_ARROW } ; [<=] => { $ crate :: SyntaxKind :: LTEQ } ; [>=] => { $ crate :: SyntaxKind :: GTEQ } ; [+=] => { $ crate :: SyntaxKind :: PLUSEQ } ; [-=] => { $ crate :: SyntaxKind :: MINUSEQ } ; [|=] => { $ crate :: SyntaxKind :: PIPEEQ } ; [&=] => { $ crate :: SyntaxKind :: AMPEQ } ; [^=] => { $ crate :: SyntaxKind :: CARETEQ } ; [/=] => { $ crate :: SyntaxKind :: SLASHEQ } ; [*=] => { $ crate :: SyntaxKind :: STAREQ } ; [%=] => { $ crate :: SyntaxKind :: PERCENTEQ } ; [&&] => { $ crate :: SyntaxKind :: AMP2 } ; [||] => { $ crate :: SyntaxKind :: PIPE2 } ; [<<] => { $ crate :: SyntaxKind :: SHL } ; [>>] => { $ crate :: SyntaxKind :: SHR } ; [<<=] => { $ crate :: SyntaxKind :: SHLEQ } ; [>>=] => { $ crate :: SyntaxKind :: SHREQ } ; [abstract] => { $ crate :: SyntaxKind :: ABSTRACT_KW } ; [as] => { $ crate :: SyntaxKind :: AS_KW } ; [async] => { $ crate :: SyntaxKind :: ASYNC_KW } ; [await] => { $ crate :: SyntaxKind :: AWAIT_KW } ; [become] => { $ crate :: SyntaxKind :: BECOME_KW } ; [box] => { $ crate :: SyntaxKind :: BOX_KW } ; [break] => { $ crate :: SyntaxKind :: BREAK_KW } ; [const] => { $ crate :: SyntaxKind :: CONST_KW } ; [continue] => { $ crate :: SyntaxKind :: CONTINUE_KW } ; [crate] => { $ crate :: SyntaxKind :: CRATE_KW } ; [do] => { $ crate :: SyntaxKind :: DO_KW } ; [dyn] => { $ crate :: SyntaxKind :: DYN_KW } ; [else] => { $ crate :: SyntaxKind :: ELSE_KW } ; [enum] => { $ crate :: SyntaxKind :: ENUM_KW } ; [extern] => { $ crate :: SyntaxKind :: EXTERN_KW } ; [false] => { $ crate :: SyntaxKind :: FALSE_KW } ; [final] => { $ crate :: SyntaxKind :: FINAL_KW } ; [fn] => { $ crate :: SyntaxKind :: FN_KW } ; [for] => { $ crate :: SyntaxKind :: FOR_KW } ; [if] => { $ crate :: SyntaxKind :: IF_KW } ; [impl] => { $ crate :: SyntaxKind :: IMPL_KW } ; [in] => { $ crate :: SyntaxKind :: IN_KW } ; [let] => { $ crate :: SyntaxKind :: LET_KW } ; [loop] => { $ crate :: SyntaxKind :: LOOP_KW } ; [macro] => { $ crate :: SyntaxKind :: MACRO_KW } ; [match] => { $ crate :: SyntaxKind :: MATCH_KW } ; [mod] => { $ crate :: SyntaxKind :: MOD_KW } ; [move] => { $ crate :: SyntaxKind :: MOVE_KW } ; [mut] => { $ crate :: SyntaxKind :: MUT_KW } ; [override] => { $ crate :: SyntaxKind :: OVERRIDE_KW } ; [priv] => { $ crate :: SyntaxKind :: PRIV_KW } ; [pub] => { $ crate :: SyntaxKind :: PUB_KW } ; [ref] => { $ crate :: SyntaxKind :: REF_KW } ; [return] => { $ crate :: SyntaxKind :: RETURN_KW } ; [self] => { $ crate :: SyntaxKind :: SELF_KW } ; [Self] => { $ crate :: SyntaxKind :: SELF_TYPE_KW } ; [static] => { $ crate :: SyntaxKind :: STATIC_KW } ; [struct] => { $ crate :: SyntaxKind :: STRUCT_KW } ; [super] => { $ crate :: SyntaxKind :: SUPER_KW } ; [trait] => { $ crate :: SyntaxKind :: TRAIT_KW } ; [true] => { $ crate :: SyntaxKind :: TRUE_KW } ; [try] => { $ crate :: SyntaxKind :: TRY_KW } ; [type] => { $ crate :: SyntaxKind :: TYPE_KW } ; [typeof] => { $ crate :: SyntaxKind :: TYPEOF_KW } ; [unsafe] => { $ crate :: SyntaxKind :: UNSAFE_KW } ; [unsized] => { $ crate :: SyntaxKind :: UNSIZED_KW } ; [use] => { $ crate :: SyntaxKind :: USE_KW } ; [virtual] => { $ crate :: SyntaxKind :: VIRTUAL_KW } ; [where] => { $ crate :: SyntaxKind :: WHERE_KW } ; [while] => { $ crate :: SyntaxKind :: WHILE_KW } ; [yield] => { $ crate :: SyntaxKind :: YIELD_KW } ; [auto] => { $ crate :: SyntaxKind :: AUTO_KW } ; [builtin] => { $ crate :: SyntaxKind :: BUILTIN_KW } ; [default] => { $ crate :: SyntaxKind :: DEFAULT_KW } ; [existential] => { $ crate :: SyntaxKind :: EXISTENTIAL_KW } ; [union] => { $ crate :: SyntaxKind :: UNION_KW } ; [raw] => { $ crate :: SyntaxKind :: RAW_KW } ; [macro_rules] => { $ crate :: SyntaxKind :: MACRO_RULES_KW } ; [yeet] => { $ crate :: SyntaxKind :: YEET_KW } ; [offset_of] => { $ crate :: SyntaxKind :: OFFSET_OF_KW } ; [asm] => { $ crate :: SyntaxKind :: ASM_KW } ; [format_args] => { $ crate :: SyntaxKind :: FORMAT_ARGS_KW } ; [lifetime_ident] => { $ crate :: SyntaxKind :: LIFETIME_IDENT } ; [ident] => { $ crate :: SyntaxKind :: IDENT } ; [shebang] => { $ crate :: SyntaxKind :: SHEBANG } ; } +macro_rules ! T { [$] => { $ crate :: SyntaxKind :: DOLLAR } ; [;] => { $ crate :: SyntaxKind :: SEMICOLON } ; [,] => { $ crate :: SyntaxKind :: COMMA } ; ['('] => { $ crate :: SyntaxKind :: L_PAREN } ; [')'] => { $ crate :: SyntaxKind :: R_PAREN } ; ['{'] => { $ crate :: SyntaxKind :: L_CURLY } ; ['}'] => { $ crate :: SyntaxKind :: R_CURLY } ; ['['] => { $ crate :: SyntaxKind :: L_BRACK } ; [']'] => { $ crate :: SyntaxKind :: R_BRACK } ; [<] => { $ crate :: SyntaxKind :: L_ANGLE } ; [>] => { $ crate :: SyntaxKind :: R_ANGLE } ; [@] => { $ crate :: SyntaxKind :: AT } ; [#] => { $ crate :: SyntaxKind :: POUND } ; [~] => { $ crate :: SyntaxKind :: TILDE } ; [?] => { $ crate :: SyntaxKind :: QUESTION } ; [&] => { $ crate :: SyntaxKind :: AMP } ; [|] => { $ crate :: SyntaxKind :: PIPE } ; [+] => { $ crate :: SyntaxKind :: PLUS } ; [*] => { $ crate :: SyntaxKind :: STAR } ; [/] => { $ crate :: SyntaxKind :: SLASH } ; [^] => { $ crate :: SyntaxKind :: CARET } ; [%] => { $ crate :: SyntaxKind :: PERCENT } ; [_] => { $ crate :: SyntaxKind :: UNDERSCORE } ; [.] => { $ crate :: SyntaxKind :: DOT } ; [..] => { $ crate :: SyntaxKind :: DOT2 } ; [...] => { $ crate :: SyntaxKind :: DOT3 } ; [..=] => { $ crate :: SyntaxKind :: DOT2EQ } ; [:] => { $ crate :: SyntaxKind :: COLON } ; [::] => { $ crate :: SyntaxKind :: COLON2 } ; [=] => { $ crate :: SyntaxKind :: EQ } ; [==] => { $ crate :: SyntaxKind :: EQ2 } ; [=>] => { $ crate :: SyntaxKind :: FAT_ARROW } ; [!] => { $ crate :: SyntaxKind :: BANG } ; [!=] => { $ crate :: SyntaxKind :: NEQ } ; [-] => { $ crate :: SyntaxKind :: MINUS } ; [->] => { $ crate :: SyntaxKind :: THIN_ARROW } ; [<=] => { $ crate :: SyntaxKind :: LTEQ } ; [>=] => { $ crate :: SyntaxKind :: GTEQ } ; [+=] => { $ crate :: SyntaxKind :: PLUSEQ } ; [-=] => { $ crate :: SyntaxKind :: MINUSEQ } ; [|=] => { $ crate :: SyntaxKind :: PIPEEQ } ; [&=] => { $ crate :: SyntaxKind :: AMPEQ } ; [^=] => { $ crate :: SyntaxKind :: CARETEQ } ; [/=] => { $ crate :: SyntaxKind :: SLASHEQ } ; [*=] => { $ crate :: SyntaxKind :: STAREQ } ; [%=] => { $ crate :: SyntaxKind :: PERCENTEQ } ; [&&] => { $ crate :: SyntaxKind :: AMP2 } ; [||] => { $ crate :: SyntaxKind :: PIPE2 } ; [<<] => { $ crate :: SyntaxKind :: SHL } ; [>>] => { $ crate :: SyntaxKind :: SHR } ; [<<=] => { $ crate :: SyntaxKind :: SHLEQ } ; [>>=] => { $ crate :: SyntaxKind :: SHREQ } ; [Self] => { $ crate :: SyntaxKind :: SELF_TYPE_KW } ; [abstract] => { $ crate :: SyntaxKind :: ABSTRACT_KW } ; [as] => { $ crate :: SyntaxKind :: AS_KW } ; [async] => { $ crate :: SyntaxKind :: ASYNC_KW } ; [await] => { $ crate :: SyntaxKind :: AWAIT_KW } ; [become] => { $ crate :: SyntaxKind :: BECOME_KW } ; [box] => { $ crate :: SyntaxKind :: BOX_KW } ; [break] => { $ crate :: SyntaxKind :: BREAK_KW } ; [const] => { $ crate :: SyntaxKind :: CONST_KW } ; [continue] => { $ crate :: SyntaxKind :: CONTINUE_KW } ; [crate] => { $ crate :: SyntaxKind :: CRATE_KW } ; [do] => { $ crate :: SyntaxKind :: DO_KW } ; [dyn] => { $ crate :: SyntaxKind :: DYN_KW } ; [else] => { $ crate :: SyntaxKind :: ELSE_KW } ; [enum] => { $ crate :: SyntaxKind :: ENUM_KW } ; [extern] => { $ crate :: SyntaxKind :: EXTERN_KW } ; [false] => { $ crate :: SyntaxKind :: FALSE_KW } ; [final] => { $ crate :: SyntaxKind :: FINAL_KW } ; [fn] => { $ crate :: SyntaxKind :: FN_KW } ; [for] => { $ crate :: SyntaxKind :: FOR_KW } ; [if] => { $ crate :: SyntaxKind :: IF_KW } ; [impl] => { $ crate :: SyntaxKind :: IMPL_KW } ; [in] => { $ crate :: SyntaxKind :: IN_KW } ; [let] => { $ crate :: SyntaxKind :: LET_KW } ; [loop] => { $ crate :: SyntaxKind :: LOOP_KW } ; [macro] => { $ crate :: SyntaxKind :: MACRO_KW } ; [match] => { $ crate :: SyntaxKind :: MATCH_KW } ; [mod] => { $ crate :: SyntaxKind :: MOD_KW } ; [move] => { $ crate :: SyntaxKind :: MOVE_KW } ; [mut] => { $ crate :: SyntaxKind :: MUT_KW } ; [override] => { $ crate :: SyntaxKind :: OVERRIDE_KW } ; [priv] => { $ crate :: SyntaxKind :: PRIV_KW } ; [pub] => { $ crate :: SyntaxKind :: PUB_KW } ; [ref] => { $ crate :: SyntaxKind :: REF_KW } ; [return] => { $ crate :: SyntaxKind :: RETURN_KW } ; [self] => { $ crate :: SyntaxKind :: SELF_KW } ; [static] => { $ crate :: SyntaxKind :: STATIC_KW } ; [string] => { $ crate :: SyntaxKind :: STRING_KW } ; [struct] => { $ crate :: SyntaxKind :: STRUCT_KW } ; [super] => { $ crate :: SyntaxKind :: SUPER_KW } ; [trait] => { $ crate :: SyntaxKind :: TRAIT_KW } ; [true] => { $ crate :: SyntaxKind :: TRUE_KW } ; [try] => { $ crate :: SyntaxKind :: TRY_KW } ; [type] => { $ crate :: SyntaxKind :: TYPE_KW } ; [typeof] => { $ crate :: SyntaxKind :: TYPEOF_KW } ; [unsafe] => { $ crate :: SyntaxKind :: UNSAFE_KW } ; [unsized] => { $ crate :: SyntaxKind :: UNSIZED_KW } ; [use] => { $ crate :: SyntaxKind :: USE_KW } ; [virtual] => { $ crate :: SyntaxKind :: VIRTUAL_KW } ; [where] => { $ crate :: SyntaxKind :: WHERE_KW } ; [while] => { $ crate :: SyntaxKind :: WHILE_KW } ; [yield] => { $ crate :: SyntaxKind :: YIELD_KW } ; [asm] => { $ crate :: SyntaxKind :: ASM_KW } ; [auto] => { $ crate :: SyntaxKind :: AUTO_KW } ; [builtin] => { $ crate :: SyntaxKind :: BUILTIN_KW } ; [default] => { $ crate :: SyntaxKind :: DEFAULT_KW } ; [format_args] => { $ crate :: SyntaxKind :: FORMAT_ARGS_KW } ; [macro_rules] => { $ crate :: SyntaxKind :: MACRO_RULES_KW } ; [offset_of] => { $ crate :: SyntaxKind :: OFFSET_OF_KW } ; [raw] => { $ crate :: SyntaxKind :: RAW_KW } ; [union] => { $ crate :: SyntaxKind :: UNION_KW } ; [yeet] => { $ crate :: SyntaxKind :: YEET_KW } ; [lifetime_ident] => { $ crate :: SyntaxKind :: LIFETIME_IDENT } ; [int_number] => { $ crate :: SyntaxKind :: INT_NUMBER } ; [ident] => { $ crate :: SyntaxKind :: IDENT } ; [shebang] => { $ crate :: SyntaxKind :: SHEBANG } ; } diff --git a/crates/parser/test_data/parser/inline/ok/0131_existential_type.rast b/crates/parser/test_data/parser/inline/ok/0131_existential_type.rast deleted file mode 100644 index b73780261b..0000000000 --- a/crates/parser/test_data/parser/inline/ok/0131_existential_type.rast +++ /dev/null @@ -1,31 +0,0 @@ -SOURCE_FILE - TYPE_ALIAS - EXISTENTIAL_KW "existential" - WHITESPACE " " - TYPE_KW "type" - WHITESPACE " " - NAME - IDENT "Foo" - COLON ":" - WHITESPACE " " - TYPE_BOUND_LIST - TYPE_BOUND - PATH_TYPE - PATH - PATH_SEGMENT - NAME_REF - IDENT "Fn" - PARAM_LIST - L_PAREN "(" - R_PAREN ")" - WHITESPACE " " - RET_TYPE - THIN_ARROW "->" - WHITESPACE " " - PATH_TYPE - PATH - PATH_SEGMENT - NAME_REF - IDENT "usize" - SEMICOLON ";" - WHITESPACE "\n" diff --git a/crates/parser/test_data/parser/inline/ok/0131_existential_type.rs b/crates/parser/test_data/parser/inline/ok/0131_existential_type.rs deleted file mode 100644 index 23baf7145c..0000000000 --- a/crates/parser/test_data/parser/inline/ok/0131_existential_type.rs +++ /dev/null @@ -1 +0,0 @@ -existential type Foo: Fn() -> usize; diff --git a/crates/syntax/rust.ungram b/crates/syntax/rust.ungram index 8c772b9c7a..c98adc3818 100644 --- a/crates/syntax/rust.ungram +++ b/crates/syntax/rust.ungram @@ -8,7 +8,10 @@ // // // -- comment // Name = -- non-terminal definition -// 'ident' -- token (terminal) +// 'ident' -- keyword or punct token (terminal) +// '?ident' -- contextual keyword (terminal) +// '#ident' -- generic token (terminal) +// '@ident' -- literal token (terminal) // A B -- sequence // A | B -- alternation // A* -- zero or more repetition @@ -17,17 +20,17 @@ // label:A -- suggested name for field of AST node //*************************// -// Names, Paths and Macros // +// Paths // //*************************// Name = - 'ident' | 'self' + '#ident' | 'self' NameRef = - 'ident' | 'int_number' | 'self' | 'super' | 'crate' | 'Self' + '#ident' | '@int_number' | 'self' | 'super' | 'crate' | 'Self' Lifetime = - 'lifetime_ident' + '#lifetime_ident' Path = (qualifier:Path '::')? segment:PathSegment @@ -38,6 +41,11 @@ PathSegment = | NameRef ParamList RetType? | '<' Type ('as' PathType)? '>' + +//*************************// +// Generics // +//*************************// + GenericArgList = '::'? '<' (GenericArg (',' GenericArg)* ','?)? '>' @@ -61,6 +69,36 @@ LifetimeArg = ConstArg = Expr +GenericParamList = + '<' (GenericParam (',' GenericParam)* ','?)? '>' + +GenericParam = + ConstParam +| LifetimeParam +| TypeParam + +TypeParam = + Attr* Name (':' TypeBoundList?)? + ('=' default_type:Type)? + +ConstParam = + Attr* 'const' Name ':' Type + ('=' default_val:ConstArg)? + +LifetimeParam = + Attr* Lifetime (':' TypeBoundList?)? + +WhereClause = + 'where' predicates:(WherePred (',' WherePred)* ','?) + +WherePred = + ('for' GenericParamList)? (Lifetime | Type) ':' TypeBoundList? + + +//*************************// +// Macro // +//*************************// + MacroCall = Attr* Path '!' TokenTree ';'? @@ -72,22 +110,23 @@ TokenTree = MacroItems = Item* -MacroEagerInput = - '(' (Expr (',' Expr)* ','?)? ')' -| '{' (Expr (',' Expr)* ','?)? '}' -| '[' (Expr (',' Expr)* ','?)? ']' - - MacroStmts = statements:Stmt* Expr? +Attr = + '#' '!'? '[' Meta ']' + +Meta = + 'unsafe' '(' Path ('=' Expr | TokenTree)? ')' +| Path ('=' Expr | TokenTree)? + //*************************// // Items // //*************************// SourceFile = - 'shebang'? + '#shebang'? Attr* Item* @@ -112,7 +151,7 @@ Item = MacroRules = Attr* Visibility? - 'macro_rules' '!' Name + '?macro_rules' '!' Name TokenTree MacroDef = @@ -148,7 +187,7 @@ UseTreeList = Fn = Attr* Visibility? - 'default'? 'const'? 'async'? 'unsafe'? Abi? + '?default'? 'const'? 'async'? 'unsafe'? Abi? 'fn' Name GenericParamList? ParamList RetType? WhereClause? (body:BlockExpr | ';') @@ -180,7 +219,7 @@ RetType = TypeAlias = Attr* Visibility? - 'default'? + '?default'? 'type' Name GenericParamList? (':' TypeBoundList?)? WhereClause? ('=' Type)? ';' @@ -223,7 +262,7 @@ Variant = Union = Attr* Visibility? - 'union' Name GenericParamList? WhereClause? + '?union' Name GenericParamList? WhereClause? RecordFieldList // A Data Type. @@ -236,7 +275,7 @@ Adt = Const = Attr* Visibility? - 'default'? + '?default'? 'const' (Name | '_') ':' Type ('=' body:Expr)? ';' @@ -247,7 +286,7 @@ Static = Trait = Attr* Visibility? - 'unsafe'? 'auto'? + 'unsafe'? '?auto'? 'trait' Name GenericParamList? (':' TypeBoundList?)? WhereClause? AssocItemList @@ -266,7 +305,7 @@ AssocItem = Impl = Attr* Visibility? - 'default'? 'unsafe'? + '?default'? 'unsafe'? 'impl' GenericParamList? ('const'? '!'? trait:Type 'for')? self_ty:Type WhereClause? AssocItemList @@ -282,41 +321,9 @@ ExternItem = | Static | TypeAlias -GenericParamList = - '<' (GenericParam (',' GenericParam)* ','?)? '>' - -GenericParam = - ConstParam -| LifetimeParam -| TypeParam - -TypeParam = - Attr* Name (':' TypeBoundList?)? - ('=' default_type:Type)? - -ConstParam = - Attr* 'const' Name ':' Type - ('=' default_val:ConstArg)? - -LifetimeParam = - Attr* Lifetime (':' TypeBoundList?)? - -WhereClause = - 'where' predicates:(WherePred (',' WherePred)* ','?) - -WherePred = - ('for' GenericParamList)? (Lifetime | Type) ':' TypeBoundList? - Visibility = 'pub' ('(' 'in'? Path ')')? -Attr = - '#' '!'? '[' Meta ']' - -Meta = - 'unsafe' '(' Path ('=' Expr | TokenTree)? ')' -| Path ('=' Expr | TokenTree)? - //****************************// // Statements and Expressions // @@ -379,13 +386,13 @@ Expr = | UnderscoreExpr OffsetOfExpr = - Attr* 'builtin' '#' 'offset_of' '(' Type ',' fields:(NameRef ('.' NameRef)* ) ')' + Attr* '?builtin' '#' '?offset_of' '(' Type ',' fields:(NameRef ('.' NameRef)* ) ')' AsmExpr = - Attr* 'builtin' '#' 'asm' '(' Expr ')' + Attr* '?builtin' '#' '?asm' '(' Expr ')' FormatArgsExpr = - Attr* 'builtin' '#' 'format_args' '(' + Attr* '?builtin' '#' '?format_args' '(' template:Expr (',' args:(FormatArgsArg (',' FormatArgsArg)* ','?)? )? ')' @@ -398,11 +405,12 @@ MacroExpr = Literal = Attr* value:( - 'int_number' | 'float_number' - | 'string' | 'raw_string' - | 'byte_string' | 'raw_byte_string' + '@int_number' | '@float_number' + | '@string' | '@raw_string' + | '@byte_string' | '@raw_byte_string' + | '@c_string' | '@raw_c_string' + | '@char' | '@byte' | 'true' | 'false' - | 'char' | 'byte' ) PathExpr = @@ -416,7 +424,7 @@ StmtList = '}' RefExpr = - Attr* '&' (('raw' 'const'?)| ('raw'? 'mut') ) Expr + Attr* '&' (('?raw' 'const'?)| ('?raw'? 'mut') ) Expr TryExpr = Attr* Expr '?' @@ -538,7 +546,7 @@ YieldExpr = Attr* 'yield' Expr? YeetExpr = - Attr* 'do' 'yeet' Expr? + Attr* 'do' '?yeet' Expr? LetExpr = Attr* 'let' Pat '=' Expr diff --git a/crates/syntax/src/ast/generated/nodes.rs b/crates/syntax/src/ast/generated/nodes.rs index 0373e7c552..32a429ba32 100644 --- a/crates/syntax/src/ast/generated/nodes.rs +++ b/crates/syntax/src/ast/generated/nodes.rs @@ -14,6 +14,8 @@ pub struct Abi { impl Abi { #[inline] pub fn extern_token(&self) -> Option { support::token(&self.syntax, T![extern]) } + #[inline] + pub fn string_token(&self) -> Option { support::token(&self.syntax, T![string]) } } #[derive(Debug, Clone, PartialEq, Eq, Hash)] @@ -833,27 +835,6 @@ impl MacroDef { pub fn macro_token(&self) -> Option { support::token(&self.syntax, T![macro]) } } -#[derive(Debug, Clone, PartialEq, Eq, Hash)] -pub struct MacroEagerInput { - pub(crate) syntax: SyntaxNode, -} -impl MacroEagerInput { - #[inline] - pub fn exprs(&self) -> AstChildren { support::children(&self.syntax) } - #[inline] - pub fn l_paren_token(&self) -> Option { support::token(&self.syntax, T!['(']) } - #[inline] - pub fn r_paren_token(&self) -> Option { support::token(&self.syntax, T![')']) } - #[inline] - pub fn l_brack_token(&self) -> Option { support::token(&self.syntax, T!['[']) } - #[inline] - pub fn r_brack_token(&self) -> Option { support::token(&self.syntax, T![']']) } - #[inline] - pub fn l_curly_token(&self) -> Option { support::token(&self.syntax, T!['{']) } - #[inline] - pub fn r_curly_token(&self) -> Option { support::token(&self.syntax, T!['}']) } -} - #[derive(Debug, Clone, PartialEq, Eq, Hash)] pub struct MacroExpr { pub(crate) syntax: SyntaxNode, @@ -1050,6 +1031,10 @@ impl NameRef { #[inline] pub fn ident_token(&self) -> Option { support::token(&self.syntax, T![ident]) } #[inline] + pub fn int_number_token(&self) -> Option { + support::token(&self.syntax, T![int_number]) + } + #[inline] pub fn self_token(&self) -> Option { support::token(&self.syntax, T![self]) } #[inline] pub fn super_token(&self) -> Option { support::token(&self.syntax, T![super]) } @@ -3021,20 +3006,6 @@ impl AstNode for MacroDef { #[inline] fn syntax(&self) -> &SyntaxNode { &self.syntax } } -impl AstNode for MacroEagerInput { - #[inline] - fn can_cast(kind: SyntaxKind) -> bool { kind == MACRO_EAGER_INPUT } - #[inline] - fn cast(syntax: SyntaxNode) -> Option { - if Self::can_cast(syntax.kind()) { - Some(Self { syntax }) - } else { - None - } - } - #[inline] - fn syntax(&self) -> &SyntaxNode { &self.syntax } -} impl AstNode for MacroExpr { #[inline] fn can_cast(kind: SyntaxKind) -> bool { kind == MACRO_EXPR } @@ -5741,11 +5712,6 @@ impl std::fmt::Display for MacroDef { std::fmt::Display::fmt(self.syntax(), f) } } -impl std::fmt::Display for MacroEagerInput { - fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - std::fmt::Display::fmt(self.syntax(), f) - } -} impl std::fmt::Display for MacroExpr { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { std::fmt::Display::fmt(self.syntax(), f) diff --git a/crates/syntax/src/lib.rs b/crates/syntax/src/lib.rs index abff16b6f5..6d21ca1747 100644 --- a/crates/syntax/src/lib.rs +++ b/crates/syntax/src/lib.rs @@ -211,115 +211,6 @@ impl SourceFile { } } -impl ast::TokenTree { - pub fn reparse_as_comma_separated_expr( - self, - edition: parser::Edition, - ) -> Parse { - let tokens = self.syntax().descendants_with_tokens().filter_map(NodeOrToken::into_token); - - let mut parser_input = parser::Input::default(); - let mut was_joint = false; - for t in tokens { - let kind = t.kind(); - if kind.is_trivia() { - was_joint = false - } else if kind == SyntaxKind::IDENT { - let token_text = t.text(); - let contextual_kw = - SyntaxKind::from_contextual_keyword(token_text).unwrap_or(SyntaxKind::IDENT); - parser_input.push_ident(contextual_kw); - } else { - if was_joint { - parser_input.was_joint(); - } - parser_input.push(kind); - // Tag the token as joint if it is float with a fractional part - // we use this jointness to inform the parser about what token split - // event to emit when we encounter a float literal in a field access - if kind == SyntaxKind::FLOAT_NUMBER { - if !t.text().ends_with('.') { - parser_input.was_joint(); - } else { - was_joint = false; - } - } else { - was_joint = true; - } - } - } - - let parser_output = parser::TopEntryPoint::MacroEagerInput.parse(&parser_input, edition); - - let mut tokens = - self.syntax().descendants_with_tokens().filter_map(NodeOrToken::into_token); - let mut text = String::new(); - let mut pos = TextSize::from(0); - let mut builder = SyntaxTreeBuilder::default(); - for event in parser_output.iter() { - match event { - parser::Step::Token { kind, n_input_tokens } => { - let mut token = tokens.next().unwrap(); - while token.kind().is_trivia() { - let text = token.text(); - pos += TextSize::from(text.len() as u32); - builder.token(token.kind(), text); - - token = tokens.next().unwrap(); - } - text.push_str(token.text()); - for _ in 1..n_input_tokens { - let token = tokens.next().unwrap(); - text.push_str(token.text()); - } - - pos += TextSize::from(text.len() as u32); - builder.token(kind, &text); - text.clear(); - } - parser::Step::FloatSplit { ends_in_dot: has_pseudo_dot } => { - let token = tokens.next().unwrap(); - let text = token.text(); - - match text.split_once('.') { - Some((left, right)) => { - assert!(!left.is_empty()); - builder.start_node(SyntaxKind::NAME_REF); - builder.token(SyntaxKind::INT_NUMBER, left); - builder.finish_node(); - - // here we move the exit up, the original exit has been deleted in process - builder.finish_node(); - - builder.token(SyntaxKind::DOT, "."); - - if has_pseudo_dot { - assert!(right.is_empty(), "{left}.{right}"); - } else { - assert!(!right.is_empty(), "{left}.{right}"); - builder.start_node(SyntaxKind::NAME_REF); - builder.token(SyntaxKind::INT_NUMBER, right); - builder.finish_node(); - - // the parser creates an unbalanced start node, we are required to close it here - builder.finish_node(); - } - } - None => unreachable!(), - } - pos += TextSize::from(text.len() as u32); - } - parser::Step::Enter { kind } => builder.start_node(kind), - parser::Step::Exit => builder.finish_node(), - parser::Step::Error { msg } => builder.error(msg.to_owned(), pos), - } - } - - let (green, errors) = builder.finish_raw(); - Parse::new(green, errors) - } -} - /// Matches a `SyntaxNode` against an `ast` type. /// /// # Example: diff --git a/xtask/src/codegen.rs b/xtask/src/codegen.rs index acaa65129d..2491952f52 100644 --- a/xtask/src/codegen.rs +++ b/xtask/src/codegen.rs @@ -163,8 +163,9 @@ fn add_preamble(cg: CodegenType, mut text: String) -> String { /// case, updates the file and then fails the test. #[allow(clippy::print_stderr)] fn ensure_file_contents(cg: CodegenType, file: &Path, contents: &str, check: bool) { + let contents = normalize_newlines(contents); if let Ok(old_contents) = fs::read_to_string(file) { - if normalize_newlines(&old_contents) == normalize_newlines(contents) { + if normalize_newlines(&old_contents) == contents { // File is already up to date. return; } diff --git a/xtask/src/codegen/grammar.rs b/xtask/src/codegen/grammar.rs index 45fa2d37c8..e74a34759d 100644 --- a/xtask/src/codegen/grammar.rs +++ b/xtask/src/codegen/grammar.rs @@ -17,15 +17,22 @@ use quote::{format_ident, quote}; use ungrammar::{Grammar, Rule}; use crate::{ - codegen::{add_preamble, ensure_file_contents, reformat}, + codegen::{add_preamble, ensure_file_contents, grammar::ast_src::generate_kind_src, reformat}, project_root, }; mod ast_src; -use self::ast_src::{AstEnumSrc, AstNodeSrc, AstSrc, Cardinality, Field, KindsSrc, KINDS_SRC}; +use self::ast_src::{AstEnumSrc, AstNodeSrc, AstSrc, Cardinality, Field, KindsSrc}; pub(crate) fn generate(check: bool) { - let syntax_kinds = generate_syntax_kinds(KINDS_SRC); + let grammar = fs::read_to_string(project_root().join("crates/syntax/rust.ungram")) + .unwrap() + .parse() + .unwrap(); + let ast = lower(&grammar); + let kinds_src = generate_kind_src(&ast.nodes, &ast.enums, &grammar); + + let syntax_kinds = generate_syntax_kinds(kinds_src); let syntax_kinds_file = project_root().join("crates/parser/src/syntax_kind/generated.rs"); ensure_file_contents( crate::flags::CodegenType::Grammar, @@ -34,12 +41,6 @@ pub(crate) fn generate(check: bool) { check, ); - let grammar = fs::read_to_string(project_root().join("crates/syntax/rust.ungram")) - .unwrap() - .parse() - .unwrap(); - let ast = lower(&grammar); - let ast_tokens = generate_tokens(&ast); let ast_tokens_file = project_root().join("crates/syntax/src/ast/generated/tokens.rs"); ensure_file_contents( @@ -49,7 +50,7 @@ pub(crate) fn generate(check: bool) { check, ); - let ast_nodes = generate_nodes(KINDS_SRC, &ast); + let ast_nodes = generate_nodes(kinds_src, &ast); let ast_nodes_file = project_root().join("crates/syntax/src/ast/generated/nodes.rs"); ensure_file_contents( crate::flags::CodegenType::Grammar, @@ -96,7 +97,7 @@ fn generate_tokens(grammar: &AstSrc) -> String { .replace("#[derive", "\n#[derive") } -fn generate_nodes(kinds: KindsSrc<'_>, grammar: &AstSrc) -> String { +fn generate_nodes(kinds: KindsSrc, grammar: &AstSrc) -> String { let (node_defs, node_boilerplate_impls): (Vec<_>, Vec<_>) = grammar .nodes .iter() @@ -117,7 +118,7 @@ fn generate_nodes(kinds: KindsSrc<'_>, grammar: &AstSrc) -> String { }); let methods = node.fields.iter().map(|field| { - let method_name = field.method_name(); + let method_name = format_ident!("{}", field.method_name()); let ty = field.ty(); if field.is_many() { @@ -366,7 +367,7 @@ fn write_doc_comment(contents: &[String], dest: &mut String) { } } -fn generate_syntax_kinds(grammar: KindsSrc<'_>) -> String { +fn generate_syntax_kinds(grammar: KindsSrc) -> String { let (single_byte_tokens_values, single_byte_tokens): (Vec<_>, Vec<_>) = grammar .punct .iter() @@ -481,6 +482,7 @@ fn generate_syntax_kinds(grammar: KindsSrc<'_>) -> String { #([#punctuation_values] => { $crate::SyntaxKind::#punctuation };)* #([#all_keywords_idents] => { $crate::SyntaxKind::#all_keywords };)* [lifetime_ident] => { $crate::SyntaxKind::LIFETIME_IDENT }; + [int_number] => { $crate::SyntaxKind::INT_NUMBER }; [ident] => { $crate::SyntaxKind::IDENT }; [shebang] => { $crate::SyntaxKind::SHEBANG }; } @@ -550,7 +552,7 @@ impl Field { _ => None, } } - fn method_name(&self) -> proc_macro2::Ident { + fn method_name(&self) -> String { match self { Field::Token(name) => { let name = match name.as_str() { @@ -585,13 +587,13 @@ impl Field { "~" => "tilde", _ => name, }; - format_ident!("{}_token", name) + format!("{name}_token",) } Field::Node { name, .. } => { if name == "type" { - format_ident!("ty") + String::from("ty") } else { - format_ident!("{}", name) + name.to_owned() } } } @@ -604,6 +606,15 @@ impl Field { } } +fn clean_token_name(name: &str) -> String { + let cleaned = name.trim_start_matches(['@', '#', '?']); + if cleaned.is_empty() { + name.to_owned() + } else { + cleaned.to_owned() + } +} + fn lower(grammar: &Grammar) -> AstSrc { let mut res = AstSrc { tokens: @@ -683,14 +694,12 @@ fn lower_rule(acc: &mut Vec, grammar: &Grammar, label: Option<&String>, r } Rule::Token(token) => { assert!(label.is_none()); - let mut name = grammar[*token].name.clone(); - if name != "int_number" && name != "string" { - if "[]{}()".contains(&name) { - name = format!("'{name}'"); - } - let field = Field::Token(name); - acc.push(field); + let mut name = clean_token_name(&grammar[*token].name); + if "[]{}()".contains(&name) { + name = format!("'{name}'"); } + let field = Field::Token(name); + acc.push(field); } Rule::Rep(inner) => { if let Rule::Node(node) = &**inner { @@ -863,7 +872,7 @@ fn extract_struct_traits(ast: &mut AstSrc) { fn extract_struct_trait(node: &mut AstNodeSrc, trait_name: &str, methods: &[&str]) { let mut to_remove = Vec::new(); for (i, field) in node.fields.iter().enumerate() { - let method_name = field.method_name().to_string(); + let method_name = field.method_name(); if methods.iter().any(|&it| it == method_name) { to_remove.push(i); } diff --git a/xtask/src/codegen/grammar/ast_src.rs b/xtask/src/codegen/grammar/ast_src.rs index c246ee9950..996544f955 100644 --- a/xtask/src/codegen/grammar/ast_src.rs +++ b/xtask/src/codegen/grammar/ast_src.rs @@ -1,241 +1,152 @@ //! Defines input for code generation process. -pub(crate) struct KindsSrc<'a> { - pub(crate) punct: &'a [(&'a str, &'a str)], - pub(crate) keywords: &'a [&'a str], - pub(crate) contextual_keywords: &'a [&'a str], - pub(crate) literals: &'a [&'a str], - pub(crate) tokens: &'a [&'a str], - pub(crate) nodes: &'a [&'a str], +use crate::codegen::grammar::to_upper_snake_case; + +#[derive(Copy, Clone, Debug)] +pub(crate) struct KindsSrc { + pub(crate) punct: &'static [(&'static str, &'static str)], + pub(crate) keywords: &'static [&'static str], + pub(crate) contextual_keywords: &'static [&'static str], + pub(crate) literals: &'static [&'static str], + pub(crate) tokens: &'static [&'static str], + pub(crate) nodes: &'static [&'static str], } -pub(crate) const KINDS_SRC: KindsSrc<'_> = KindsSrc { - punct: &[ - (";", "SEMICOLON"), - (",", "COMMA"), - ("(", "L_PAREN"), - (")", "R_PAREN"), - ("{", "L_CURLY"), - ("}", "R_CURLY"), - ("[", "L_BRACK"), - ("]", "R_BRACK"), - ("<", "L_ANGLE"), - (">", "R_ANGLE"), - ("@", "AT"), - ("#", "POUND"), - ("~", "TILDE"), - ("?", "QUESTION"), - ("$", "DOLLAR"), - ("&", "AMP"), - ("|", "PIPE"), - ("+", "PLUS"), - ("*", "STAR"), - ("/", "SLASH"), - ("^", "CARET"), - ("%", "PERCENT"), - ("_", "UNDERSCORE"), - (".", "DOT"), - ("..", "DOT2"), - ("...", "DOT3"), - ("..=", "DOT2EQ"), - (":", "COLON"), - ("::", "COLON2"), - ("=", "EQ"), - ("==", "EQ2"), - ("=>", "FAT_ARROW"), - ("!", "BANG"), - ("!=", "NEQ"), - ("-", "MINUS"), - ("->", "THIN_ARROW"), - ("<=", "LTEQ"), - (">=", "GTEQ"), - ("+=", "PLUSEQ"), - ("-=", "MINUSEQ"), - ("|=", "PIPEEQ"), - ("&=", "AMPEQ"), - ("^=", "CARETEQ"), - ("/=", "SLASHEQ"), - ("*=", "STAREQ"), - ("%=", "PERCENTEQ"), - ("&&", "AMP2"), - ("||", "PIPE2"), - ("<<", "SHL"), - (">>", "SHR"), - ("<<=", "SHLEQ"), - (">>=", "SHREQ"), - ], - keywords: &[ - "abstract", "as", "async", "await", "become", "box", "break", "const", "continue", "crate", - "do", "dyn", "else", "enum", "extern", "false", "final", "fn", "for", "if", "impl", "in", - "let", "loop", "macro", "match", "mod", "move", "mut", "override", "priv", "pub", "ref", - "return", "self", "Self", "static", "struct", "super", "trait", "true", "try", "type", - "typeof", "unsafe", "unsized", "use", "virtual", "where", "while", "yield", - ], - contextual_keywords: &[ - "auto", - "builtin", - "default", - "existential", - "union", - "raw", - "macro_rules", - "yeet", - "offset_of", - "asm", - "format_args", - ], - literals: &["INT_NUMBER", "FLOAT_NUMBER", "CHAR", "BYTE", "STRING", "BYTE_STRING", "C_STRING"], - tokens: &["ERROR", "IDENT", "WHITESPACE", "LIFETIME_IDENT", "COMMENT", "SHEBANG"], - nodes: &[ - "SOURCE_FILE", - "STRUCT", - "UNION", - "ENUM", - "FN", - "RET_TYPE", - "EXTERN_CRATE", - "MODULE", - "USE", - "STATIC", - "CONST", - "TRAIT", - "TRAIT_ALIAS", - "IMPL", - "TYPE_ALIAS", - "MACRO_CALL", - "MACRO_RULES", - "MACRO_ARM", - "TOKEN_TREE", - "MACRO_DEF", - "PAREN_TYPE", - "TUPLE_TYPE", - "MACRO_TYPE", - "NEVER_TYPE", - "PATH_TYPE", - "PTR_TYPE", - "ARRAY_TYPE", - "SLICE_TYPE", - "REF_TYPE", - "INFER_TYPE", - "FN_PTR_TYPE", - "FOR_TYPE", - "IMPL_TRAIT_TYPE", - "DYN_TRAIT_TYPE", - "OR_PAT", - "PAREN_PAT", - "REF_PAT", - "BOX_PAT", - "IDENT_PAT", - "WILDCARD_PAT", - "REST_PAT", - "PATH_PAT", - "RECORD_PAT", - "RECORD_PAT_FIELD_LIST", - "RECORD_PAT_FIELD", - "TUPLE_STRUCT_PAT", - "TUPLE_PAT", - "SLICE_PAT", - "RANGE_PAT", - "LITERAL_PAT", - "MACRO_PAT", - "CONST_BLOCK_PAT", - // atoms - "TUPLE_EXPR", - "ARRAY_EXPR", - "PAREN_EXPR", - "PATH_EXPR", - "CLOSURE_EXPR", - "IF_EXPR", - "WHILE_EXPR", - "LOOP_EXPR", - "FOR_EXPR", - "CONTINUE_EXPR", - "BREAK_EXPR", - "LABEL", - "BLOCK_EXPR", - "STMT_LIST", - "RETURN_EXPR", - "BECOME_EXPR", - "YIELD_EXPR", - "YEET_EXPR", - "LET_EXPR", - "UNDERSCORE_EXPR", - "MACRO_EXPR", - "MATCH_EXPR", - "MATCH_ARM_LIST", - "MATCH_ARM", - "MATCH_GUARD", - "RECORD_EXPR", - "RECORD_EXPR_FIELD_LIST", - "RECORD_EXPR_FIELD", - "OFFSET_OF_EXPR", - "ASM_EXPR", - "FORMAT_ARGS_EXPR", - "FORMAT_ARGS_ARG", - // postfix - "CALL_EXPR", - "INDEX_EXPR", - "METHOD_CALL_EXPR", - "FIELD_EXPR", - "AWAIT_EXPR", - "TRY_EXPR", - "CAST_EXPR", - // unary - "REF_EXPR", - "PREFIX_EXPR", - "RANGE_EXPR", // just weird - "BIN_EXPR", - "EXTERN_BLOCK", - "EXTERN_ITEM_LIST", - "VARIANT", - "RECORD_FIELD_LIST", - "RECORD_FIELD", - "TUPLE_FIELD_LIST", - "TUPLE_FIELD", - "VARIANT_LIST", - "ITEM_LIST", - "ASSOC_ITEM_LIST", - "ATTR", - "META", - "USE_TREE", - "USE_TREE_LIST", - "PATH", - "PATH_SEGMENT", - "LITERAL", - "RENAME", - "VISIBILITY", - "WHERE_CLAUSE", - "WHERE_PRED", - "ABI", - "NAME", - "NAME_REF", - "LET_STMT", - "LET_ELSE", - "EXPR_STMT", - "GENERIC_PARAM_LIST", - "GENERIC_PARAM", - "LIFETIME_PARAM", - "TYPE_PARAM", - "RETURN_TYPE_ARG", - "CONST_PARAM", - "GENERIC_ARG_LIST", - "LIFETIME", - "LIFETIME_ARG", - "TYPE_ARG", - "ASSOC_TYPE_ARG", - "CONST_ARG", - "PARAM_LIST", - "PARAM", - "SELF_PARAM", - "ARG_LIST", - "TYPE_BOUND", - "TYPE_BOUND_LIST", - // macro related - "MACRO_ITEMS", - "MACRO_STMTS", - "MACRO_EAGER_INPUT", - ], -}; +/// The punctuations of the language. +const PUNCT: &[(&str, &str)] = &[ + // KEEP THE DOLLAR AT THE TOP ITS SPECIAL + ("$", "DOLLAR"), + (";", "SEMICOLON"), + (",", "COMMA"), + ("(", "L_PAREN"), + (")", "R_PAREN"), + ("{", "L_CURLY"), + ("}", "R_CURLY"), + ("[", "L_BRACK"), + ("]", "R_BRACK"), + ("<", "L_ANGLE"), + (">", "R_ANGLE"), + ("@", "AT"), + ("#", "POUND"), + ("~", "TILDE"), + ("?", "QUESTION"), + ("&", "AMP"), + ("|", "PIPE"), + ("+", "PLUS"), + ("*", "STAR"), + ("/", "SLASH"), + ("^", "CARET"), + ("%", "PERCENT"), + ("_", "UNDERSCORE"), + (".", "DOT"), + ("..", "DOT2"), + ("...", "DOT3"), + ("..=", "DOT2EQ"), + (":", "COLON"), + ("::", "COLON2"), + ("=", "EQ"), + ("==", "EQ2"), + ("=>", "FAT_ARROW"), + ("!", "BANG"), + ("!=", "NEQ"), + ("-", "MINUS"), + ("->", "THIN_ARROW"), + ("<=", "LTEQ"), + (">=", "GTEQ"), + ("+=", "PLUSEQ"), + ("-=", "MINUSEQ"), + ("|=", "PIPEEQ"), + ("&=", "AMPEQ"), + ("^=", "CARETEQ"), + ("/=", "SLASHEQ"), + ("*=", "STAREQ"), + ("%=", "PERCENTEQ"), + ("&&", "AMP2"), + ("||", "PIPE2"), + ("<<", "SHL"), + (">>", "SHR"), + ("<<=", "SHLEQ"), + (">>=", "SHREQ"), +]; +const TOKENS: &[&str] = &["ERROR", "WHITESPACE", "NEWLINE", "COMMENT"]; +// &["ERROR", "IDENT", "WHITESPACE", "LIFETIME_IDENT", "COMMENT", "SHEBANG"],; + +const EOF: &str = "EOF"; + +const RESERVED: &[&str] = &[ + "abstract", "become", "box", "do", "final", "macro", "override", "priv", "typeof", "unsized", + "virtual", "yield", "try", +]; + +pub(crate) fn generate_kind_src( + nodes: &[AstNodeSrc], + enums: &[AstEnumSrc], + grammar: &ungrammar::Grammar, +) -> KindsSrc { + let mut keywords: Vec<&_> = Vec::new(); + let mut contextual_keywords: Vec<&_> = Vec::new(); + let mut tokens: Vec<&_> = TOKENS.to_vec(); + let mut literals: Vec<&_> = Vec::new(); + let mut used_puncts = vec![false; PUNCT.len()]; + // Mark $ as used + used_puncts[0] = true; + grammar.tokens().for_each(|token| { + let name = &*grammar[token].name; + if name == EOF { + return; + } + match name.split_at(1) { + ("@", lit) if !lit.is_empty() => { + literals.push(String::leak(to_upper_snake_case(lit))); + } + ("#", token) if !token.is_empty() => { + tokens.push(String::leak(to_upper_snake_case(token))); + } + ("?", kw) if !kw.is_empty() => { + contextual_keywords.push(String::leak(kw.to_owned())); + } + _ if name.chars().all(char::is_alphabetic) => { + keywords.push(String::leak(name.to_owned())); + } + _ => { + let idx = PUNCT + .iter() + .position(|(punct, _)| punct == &name) + .unwrap_or_else(|| panic!("Grammar references unknown punctuation {name:?}")); + used_puncts[idx] = true; + } + } + }); + PUNCT.iter().zip(used_puncts).filter(|(_, used)| !used).for_each(|((punct, _), _)| { + panic!("Punctuation {punct:?} is not used in grammar"); + }); + keywords.extend(RESERVED.iter().copied()); + keywords.sort(); + keywords.dedup(); + + // we leak things here for simplicity, that way we don't have to deal with lifetimes + // The execution is a one shot job so thats fine + let nodes = nodes + .iter() + .map(|it| &it.name) + .chain(enums.iter().map(|it| &it.name)) + .map(|it| to_upper_snake_case(it)) + .map(String::leak) + .map(|it| &*it) + .collect(); + let nodes = Vec::leak(nodes); + nodes.sort(); + let keywords = Vec::leak(keywords); + keywords.sort(); + let contextual_keywords = Vec::leak(contextual_keywords); + contextual_keywords.sort(); + let literals = Vec::leak(literals); + literals.sort(); + let tokens = Vec::leak(tokens); + tokens.sort(); + + KindsSrc { punct: PUNCT, nodes, keywords, contextual_keywords, literals, tokens } +} #[derive(Default, Debug)] pub(crate) struct AstSrc {