diff --git a/crates/syntax/Cargo.toml b/crates/syntax/Cargo.toml index 4a454cb8b7..bb4d10207d 100644 --- a/crates/syntax/Cargo.toml +++ b/crates/syntax/Cargo.toml @@ -30,7 +30,7 @@ rayon = "1.5.1" expect-test = "1.2.2" proc-macro2 = "1.0.36" quote = "1.0.16" -ungrammar = "=1.16.1" +ungrammar = "1.16.1" test_utils = { path = "../test_utils" } sourcegen = { path = "../sourcegen" } diff --git a/crates/syntax/rust.ungram b/crates/syntax/rust.ungram new file mode 100644 index 0000000000..cb58486eff --- /dev/null +++ b/crates/syntax/rust.ungram @@ -0,0 +1,664 @@ +// Rust Un-Grammar. +// +// This grammar specifies the structure of Rust's concrete syntax tree. +// It does not specify parsing rules (ambiguities, precedence, etc are out of scope). +// Tokens are processed -- contextual keywords are recognised, compound operators glued. +// +// Legend: +// +// // -- comment +// Name = -- non-terminal definition +// 'ident' -- token (terminal) +// A B -- sequence +// A | B -- alternation +// A* -- zero or more repetition +// A? -- zero or one repetition +// (A) -- same as A +// label:A -- suggested name for field of AST node + +//*************************// +// Names, Paths and Macros // +//*************************// + +Name = + 'ident' | 'self' + +NameRef = + 'ident' | 'int_number' | 'self' | 'super' | 'crate' | 'Self' + +Lifetime = + 'lifetime_ident' + +Path = + (qualifier:Path '::')? segment:PathSegment + +PathSegment = + '::'? NameRef +| NameRef GenericArgList? +| NameRef ParamList RetType? +| '<' PathType ('as' PathType)? '>' + +GenericArgList = + '::'? '<' (GenericArg (',' GenericArg)* ','?)? '>' + +GenericArg = + TypeArg +| AssocTypeArg +| LifetimeArg +| ConstArg + +TypeArg = + Type + +AssocTypeArg = + NameRef GenericParamList? (':' TypeBoundList | '=' Type) + +LifetimeArg = + Lifetime + +ConstArg = + Expr + +MacroCall = + Attr* Path '!' TokenTree ';'? + +TokenTree = + '(' ')' +| '{' '}' +| '[' ']' + +MacroItems = + Item* + +MacroStmts = + statements:Stmt* + Expr? + +//*************************// +// Items // +//*************************// + +SourceFile = + 'shebang'? + Attr* + Item* + +Item = + Const +| Enum +| ExternBlock +| ExternCrate +| Fn +| Impl +| MacroCall +| MacroRules +| MacroDef +| Module +| Static +| Struct +| Trait +| TypeAlias +| Union +| Use + +MacroRules = + Attr* Visibility? + 'macro_rules' '!' Name + TokenTree + +MacroDef = + Attr* Visibility? + 'macro' Name args:TokenTree? + body:TokenTree + +Module = + Attr* Visibility? + 'mod' Name + (ItemList | ';') + +ItemList = + '{' Attr* Item* '}' + +ExternCrate = + Attr* Visibility? + 'extern' 'crate' NameRef Rename? ';' + +Rename = + 'as' (Name | '_') + +Use = + Attr* Visibility? + 'use' UseTree ';' + +UseTree = + (Path? '::')? ('*' | UseTreeList) +| Path Rename? + +UseTreeList = + '{' (UseTree (',' UseTree)* ','?)? '}' + +Fn = + Attr* Visibility? + 'default'? 'const'? 'async'? 'unsafe'? Abi? + 'fn' Name GenericParamList? ParamList RetType? WhereClause? + (body:BlockExpr | ';') + +Abi = + 'extern' 'string'? + +ParamList = + '('( + SelfParam + | (SelfParam ',')? (Param (',' Param)* ','?)? + )')' +| '|' (Param (',' Param)* ','?)? '|' + +SelfParam = + Attr* ( + ('&' Lifetime?)? 'mut'? Name + | 'mut'? Name ':' Type + ) + +Param = + Attr* ( + Pat (':' Type)? + | Type + | '...' + ) + +RetType = + '->' Type + +TypeAlias = + Attr* Visibility? + 'default'? + 'type' Name GenericParamList? (':' TypeBoundList?)? WhereClause? + ('=' Type)? ';' + +Struct = + Attr* Visibility? + 'struct' Name GenericParamList? ( + WhereClause? (RecordFieldList | ';') + | TupleFieldList WhereClause? ';' + ) + +RecordFieldList = + '{' fields:(RecordField (',' RecordField)* ','?)? '}' + +RecordField = + Attr* Visibility? + Name ':' Type + +TupleFieldList = + '(' fields:(TupleField (',' TupleField)* ','?)? ')' + +TupleField = + Attr* Visibility? + Type + +FieldList = + RecordFieldList +| TupleFieldList + +Enum = + Attr* Visibility? + 'enum' Name GenericParamList? WhereClause? + VariantList + +VariantList = + '{' (Variant (',' Variant)* ','?)? '}' + +Variant = + Attr* Visibility? + Name FieldList? ('=' Expr)? + +Union = + Attr* Visibility? + 'union' Name GenericParamList? WhereClause? + RecordFieldList + +// A Data Type. +// +// Not used directly in the grammar, but handy to have anyway. +Adt = + Enum +| Struct +| Union + +Const = + Attr* Visibility? + 'default'? + 'const' (Name | '_') ':' Type + ('=' body:Expr)? ';' + +Static = + Attr* Visibility? + 'static' 'mut'? Name ':' Type + ('=' body:Expr)? ';' + +Trait = + Attr* Visibility? + 'unsafe'? 'auto'? + 'trait' Name GenericParamList? (':' TypeBoundList?)? WhereClause? + AssocItemList + +AssocItemList = + '{' Attr* AssocItem* '}' + +AssocItem = + Const +| Fn +| MacroCall +| TypeAlias + +Impl = + Attr* Visibility? + 'default'? 'unsafe'? + 'impl' GenericParamList? ('const'? '!'? trait:Type 'for')? self_ty:Type WhereClause? + AssocItemList + +ExternBlock = + Attr* 'unsafe'? Abi ExternItemList + +ExternItemList = + '{' Attr* ExternItem* '}' + +ExternItem = + Fn +| MacroCall +| Static +| TypeAlias + +GenericParamList = + '<' (GenericParam (',' GenericParam)* ','?)? '>' + +GenericParam = + ConstParam +| LifetimeParam +| TypeParam + +TypeParam = + Attr* Name (':' TypeBoundList?)? + ('=' default_type:Type)? + +ConstParam = + Attr* 'const' Name ':' Type + ('=' default_val:Expr)? + +LifetimeParam = + Attr* Lifetime (':' TypeBoundList?)? + +WhereClause = + 'where' predicates:(WherePred (',' WherePred)* ','?) + +WherePred = + ('for' GenericParamList)? (Lifetime | Type) ':' TypeBoundList? + +Visibility = + 'pub' ('(' 'in'? Path ')')? + +Attr = + '#' '!'? '[' Meta ']' + +Meta = + Path ('=' Expr | TokenTree)? + +//****************************// +// Statements and Expressions // +//****************************// + +Stmt = + ';' +| ExprStmt +| Item +| LetStmt + +LetStmt = + Attr* 'let' Pat (':' Type)? + '=' initializer:Expr + LetElse? + ';' + +LetElse = + 'else' BlockExpr + +ExprStmt = + Expr ';'? + +Expr = + ArrayExpr +| AwaitExpr +| BinExpr +| BlockExpr +| BoxExpr +| BreakExpr +| CallExpr +| CastExpr +| ClosureExpr +| ContinueExpr +| FieldExpr +| ForExpr +| IfExpr +| IndexExpr +| Literal +| LoopExpr +| MacroCall +| MacroStmts +| MatchExpr +| MethodCallExpr +| ParenExpr +| PathExpr +| PrefixExpr +| RangeExpr +| RecordExpr +| RefExpr +| ReturnExpr +| TryExpr +| TupleExpr +| WhileExpr +| YieldExpr +| LetExpr +| UnderscoreExpr + +Literal = + Attr* value:( + 'int_number' | 'float_number' + | 'string' | 'raw_string' + | 'byte_string' | 'raw_byte_string' + | 'true' | 'false' + | 'char' | 'byte' + ) + +PathExpr = + Attr* Path + +StmtList = + '{' + Attr* + statements:Stmt* + tail_expr:Expr? + '}' + +RefExpr = + Attr* '&' ('raw' | 'mut' | 'const') Expr + +TryExpr = + Attr* Expr '?' + +BlockExpr = + Attr* Label? ('try' | 'unsafe' | 'async' | 'const') StmtList + +PrefixExpr = + Attr* op:('-' | '!' | '*') Expr + +BinExpr = + Attr* + lhs:Expr + op:( + '||' | '&&' + | '==' | '!=' | '<=' | '>=' | '<' | '>' + | '+' | '*' | '-' | '/' | '%' | '<<' | '>>' | '^' | '|' | '&' + | '=' | '+=' | '/=' | '*=' | '%=' | '>>=' | '<<=' | '-=' | '|=' | '&=' | '^=' + ) + rhs:Expr + +CastExpr = + Attr* Expr 'as' Type + +ParenExpr = + Attr* '(' Attr* Expr ')' + +ArrayExpr = + Attr* '[' Attr* ( + (Expr (',' Expr)* ','?)? + | Expr ';' Expr + ) ']' + +IndexExpr = + Attr* base:Expr '[' index:Expr ']' + +TupleExpr = + Attr* '(' Attr* fields:(Expr (',' Expr)* ','?)? ')' + +RecordExpr = + Path RecordExprFieldList + +RecordExprFieldList = + '{' + Attr* + fields:(RecordExprField (',' RecordExprField)* ','?)? + ('..' spread:Expr?)? + '}' + +RecordExprField = + Attr* (NameRef ':')? Expr + +CallExpr = + Attr* Expr ArgList + +ArgList = + '(' args:(Expr (',' Expr)* ','?)? ')' + +MethodCallExpr = + Attr* receiver:Expr '.' NameRef GenericArgList? ArgList + +FieldExpr = + Attr* Expr '.' NameRef + +ClosureExpr = + Attr* 'static'? 'async'? 'move'? ParamList RetType? + body:Expr + +IfExpr = + Attr* 'if' condition:Expr then_branch:BlockExpr + ('else' else_branch:(IfExpr | BlockExpr))? + +LoopExpr = + Attr* Label? 'loop' + loop_body:BlockExpr + +ForExpr = + Attr* Label? 'for' Pat 'in' iterable:Expr + loop_body:BlockExpr + +WhileExpr = + Attr* Label? 'while' condition:Expr + loop_body:BlockExpr + +Label = + Lifetime ':' + +BreakExpr = + Attr* 'break' Lifetime? Expr? + +ContinueExpr = + Attr* 'continue' Lifetime? + +RangeExpr = + Attr* start:Expr? op:('..' | '..=') end:Expr? + +MatchExpr = + Attr* 'match' Expr MatchArmList + +MatchArmList = + '{' + Attr* + arms:MatchArm* + '}' + +MatchArm = + Attr* Pat guard:MatchGuard? '=>' Expr ','? + +MatchGuard = + 'if' condition:Expr + +ReturnExpr = + Attr* 'return' Expr? + +YieldExpr = + Attr* 'yield' Expr? + +LetExpr = + Attr* 'let' Pat '=' Expr + +UnderscoreExpr = + Attr* '_' + +AwaitExpr = + Attr* Expr '.' 'await' + +BoxExpr = + Attr* 'box' Expr + +//*************************// +// Types // +//*************************// + +Type = + ArrayType +| DynTraitType +| FnPtrType +| ForType +| ImplTraitType +| InferType +| MacroType +| NeverType +| ParenType +| PathType +| PtrType +| RefType +| SliceType +| TupleType + +ParenType = + '(' Type ')' + +NeverType = + '!' + +MacroType = + MacroCall + +PathType = + Path + +TupleType = + '(' fields:(Type (',' Type)* ','?)? ')' + +PtrType = + '*' ('const' | 'mut') Type + +RefType = + '&' Lifetime? 'mut'? Type + +ArrayType = + '[' Type ';' Expr ']' + +SliceType = + '[' Type ']' + +InferType = + '_' + +FnPtrType = + 'const'? 'async'? 'unsafe'? Abi? 'fn' ParamList RetType? + +ForType = + 'for' GenericParamList Type + +ImplTraitType = + 'impl' TypeBoundList + +DynTraitType = + 'dyn' TypeBoundList + +TypeBoundList = + bounds:(TypeBound ('+' TypeBound)* '+'?) + +TypeBound = + Lifetime +| ('?' | '~' 'const')? Type + +//************************// +// Patterns // +//************************// + +Pat = + IdentPat +| BoxPat +| RestPat +| LiteralPat +| MacroPat +| OrPat +| ParenPat +| PathPat +| WildcardPat +| RangePat +| RecordPat +| RefPat +| SlicePat +| TuplePat +| TupleStructPat +| ConstBlockPat + +LiteralPat = + Literal + +IdentPat = + Attr* 'ref'? 'mut'? Name ('@' Pat)? + +WildcardPat = + '_' + +RangePat = + // 1.. + start:Pat op:('..' | '..=') + // 1..2 + | start:Pat op:('..' | '..=') end:Pat + // ..2 + | op:('..' | '..=') end:Pat + +RefPat = + '&' 'mut'? Pat + +RecordPat = + Path RecordPatFieldList + +RecordPatFieldList = + '{' + fields:(RecordPatField (',' RecordPatField)* ','?)? + RestPat? + '}' + +RecordPatField = + Attr* (NameRef ':')? Pat + +TupleStructPat = + Path '(' fields:(Pat (',' Pat)* ','?)? ')' + +TuplePat = + '(' fields:(Pat (',' Pat)* ','?)? ')' + +ParenPat = + '(' Pat ')' + +SlicePat = + '[' (Pat (',' Pat)* ','?)? ']' + +PathPat = + Path + +OrPat = + (Pat ('|' Pat)* '|'?) + +BoxPat = + 'box' Pat + +RestPat = + Attr* '..' + +MacroPat = + MacroCall + +ConstBlockPat = + 'const' BlockExpr diff --git a/crates/syntax/src/tests/sourcegen_ast.rs b/crates/syntax/src/tests/sourcegen_ast.rs index cb38abfe89..c2ed3eab0a 100644 --- a/crates/syntax/src/tests/sourcegen_ast.rs +++ b/crates/syntax/src/tests/sourcegen_ast.rs @@ -11,7 +11,7 @@ use std::{ use itertools::Itertools; use proc_macro2::{Punct, Spacing}; use quote::{format_ident, quote}; -use ungrammar::{rust_grammar, Grammar, Rule}; +use ungrammar::{Grammar, Rule}; use crate::tests::ast_src::{ AstEnumSrc, AstNodeSrc, AstSrc, Cardinality, Field, KindsSrc, KINDS_SRC, @@ -24,7 +24,8 @@ fn sourcegen_ast() { sourcegen::project_root().join("crates/parser/src/syntax_kind/generated.rs"); sourcegen::ensure_file_contents(syntax_kinds_file.as_path(), &syntax_kinds); - let grammar = rust_grammar(); + let grammar = + include_str!(concat!(env!("CARGO_MANIFEST_DIR"), "/rust.ungram")).parse().unwrap(); let ast = lower(&grammar); let ast_tokens = generate_tokens(&ast);