nushell/src/parser/parser.lalrpop
Yehuda Katz 2b876da56f Some function stuff
It's not done, but honestly it's getting too tedious. I will need to
rethink how I'm doing it.
2019-06-10 13:11:55 -07:00

330 lines
No EOL
12 KiB
Text

#![allow(unused)]
use std::str::FromStr;
use crate::parser::ast::expression::*;
use crate::parser::ast::module::*;
use crate::parser::ast::parser_utils::*;
use crate::parser::ast::{ExpressionBuilder, ModuleBuilder};
use crate::prelude::*;
use crate::parser::lexer::{SpannedToken, Spanned, Span, Token};
use byte_unit::Byte;
// nu's grammar is a little bit different from a lot of other languages, to better match
// the idioms and constraints of a shell environment. A lot of the constraints are
// the same as PowerShell, but mostly derived from the same first principles.
//
// - Other than at the beginning of a command, bare words are virtually always parsed as
// strings. This means that, in general, bare words cannot be used as keywords or
// variables.
// - Variable names begin with `$`, and so do keywords
// - Functions are invoked without `()` and without comma separation
// - In general, because of the lack of comma-separation, expressions must be grouped:
// - a single token
// - a path ($variable followed by any number of `"." member`)
// - parenthesized expression
// - This means that more elaborate expressions, like binary expressions, must usually
// be parenthesized
// - There is a special case for a command that takes a single expression, which can
// omit the parens
grammar<'input>;
// === MODULES === //
pub Module: Module = {
<l: @L> <first:Item> <items: ( ("newline")+ <Item> )*> <r: @R> => ModuleBuilder::spanned_items(concat(first, items), l, r),
}
Item: Item = {
<l: @L> <pipeline:Pipeline> <r: @R> => Spanned::from_item(RawItem::Expression(pipeline), Span::from((l, r))),
<l: @L> <function:Function> <r: @R> => Spanned::from_item(RawItem::Function(function), Span::from((l, r))),
}
// === FUNCTIONS === //
Function: Function = {
<l: @L> "function" <bare: CommandName> "start-params"
<params: ParameterList>
"end-params" <ret: ("->" <Type>)?> <block: Block> <r: @R> => {
ModuleBuilder::spanned_function((bare, params, ret, block), l, r)
}
}
ParameterList: Vec<FormalParameter> = {
<params: Comma<FormalParameter>> => params
}
FormalParameter: FormalParameter = {
<l: @L> <name: ParameterName> ":" <ty: Type> <r: @R> => ModuleBuilder::spanned_formal_parameter((name, ty), l, r),
}
ParameterName: ParameterIdentifier = {
<l: @L> "-" <b: SpannedBare> <r: @R> => ParameterIdentifier::shorthand(b.map(|b| b.to_string()), (l, r)),
<l: @L> "--" <b: SpannedBare> <r: @R> => ParameterIdentifier::flag(b.map(|b| b.to_string()), (l, r)),
<l: @L> <var: ParameterVariable> <r: @R> => ParameterIdentifier::var(var, (l, r)),
}
ParameterVariable: Spanned<Variable> = {
<l: @L> "$" <v: "variable"> <r: @R> => Spanned::from_item(ast::Variable::from_string(v.as_slice()), (l, r)),
}
// === TYPES === //
Type: Spanned<Type> = {
<l: @L> <ty: RawType> <r: @R> => Spanned::from_item(ty, (l, r)),
}
RawType: Type = {
"any" => Type::Any,
"int" => Type::Int,
"decimal" => Type::Decimal,
"bytes" => Type::Bytes,
"text" => Type::Text,
"boolean" => Type::Boolean,
"date" => Type::Date,
// TODO: generics
"object" => Type::Object,
"list" => Type::List,
"block" => Type::Block,
}
// === EXPRESSIONS === //
pub ReplLine: Pipeline = {
<Pipeline> ("newline")*
}
Pipeline: Pipeline = {
<l: @L> <first:PipelineElement> <rest: ( "|" <PipelineElement> )*> <r: @R> => Pipeline::from_parts(first, rest, l, r),
}
PipelineElement: Expression = {
<l: @L> <bare: BareExpression> <r: @R> => ExpressionBuilder::spanned_call((bare, vec![]), l, r),
<SingleExpression> => <>,
}
// A leaf expression is a single logical token that directly represents an expression
LeafExpression: Expression = {
<String>,
<l: @L> <int: Int> <r: @R> => ExpressionBuilder::spanned_int(int, l, r),
<UnitsNum>,
<Var>,
}
pub Call: Expression = {
<l: @L> <expr:Expression> <rest:SingleCallArgument> <r: @R> => ExpressionBuilder::spanned_call((expr, vec![rest]), l, r),
<l: @L> <expr:Expression> <first:CallArgument> <rest:( <CallArgument> )+> <r: @R> => ExpressionBuilder::spanned_call((expr, { let mut rest = rest; let mut v = vec![first]; v.append(&mut rest); v }), l, r),
<l: @L> <expr:BareExpression> <rest:SingleCallArgument> <r: @R> => ExpressionBuilder::spanned_call((expr, vec![rest]), l, r),
<l: @L> <expr:BareExpression> <first:CallArgument> <rest:( <CallArgument> )+> <r: @R> => ExpressionBuilder::spanned_call((expr, { let mut v = vec![first]; let mut rest = rest; v.append(&mut rest); v }), l, r),
}
Binary: Expression = {
<l: @L> <left:ArgumentExpression> <op:SpannedOperator> <right:ArgumentExpression> <r: @R> => ExpressionBuilder::spanned_binary((left, op, right), l, r)
}
// In a block, a single bare word is interpreted as a call:
//
// foreach { ls }
Block: Spanned<Block> = {
<l: @L> "{" <expr: SingleExpression> "}" <r: @R> => ExpressionBuilder::spanned_raw_block(expr, l, r),
<l: @L> "{" <bare: BareExpression> "}" <r: @R> => {
let call = ExpressionBuilder::spanned_call(bare.clone(), bare.span.start, bare.span.end);
ExpressionBuilder::spanned_raw_block(call, l, r)
}
}
// In a block, a single bare word is interpreted as a call:
//
// foreach { ls }
BlockExpression: Expression = {
<l: @L> "{" <expr: SingleExpression> "}" <r: @R> => ExpressionBuilder::spanned_block(expr, l, r),
<l: @L> "{" <bare: BareExpression> "}" <r: @R> => {
let call = ExpressionBuilder::spanned_call(bare.clone(), bare.span.start, bare.span.end);
ExpressionBuilder::spanned_block(call, l, r)
}
}
// An `Expression` is the most general kind of expression. It can go anywhere, even right next to another expression, and
// even as the first part of a call.
MemberHeadExpression: Expression = {
<LeafExpression> => <>,
<BlockExpression> => <>,
<l: @L> "(" <expr: Call> ")" <r: @R> => ExpressionBuilder::spanned_call(expr, l, r),
<l: @L> "(" <expr: BareExpression> ")" <r: @R> => ExpressionBuilder::spanned_call((expr, vec![]), l, r),
<l: @L> "(" <expr:Binary> ")" <r: @R> => ExpressionBuilder::spanned_parens(expr, l, r),
}
Expression: Expression = {
<MemberHeadExpression> => <>,
<l: @L> <expr:MemberHeadExpression> <rest: ( "???." <Member> )+> <r: @R> => ExpressionBuilder::spanned_path((expr, rest), l, r),
}
// An `ArgumentExpression` is an expression that appears in an argument list. It includes all of `Expression`, and
// bare words are interpreted as strings.
ArgumentExpression: Expression = {
<Expression>,
<BareExpression>,
}
CallArgument: Expression = {
<ArgumentExpression>,
<Flag>,
}
SingleCallArgument: Expression = {
<CallArgument>,
<Binary>,
}
// A `SingleExpression` is a special-case of `Expression` for situations where expressions do not appear side-by-side.
// Because expression lists in nu are not comma-separated, composite expressions (like binary expressions) must be
// parenthesized in lists. If only a single expression appears alone, the parentheses may be left out.
//
// `SingleExpression` does not include `Bare`, because expressions that include `SingleExpression` must decide how
// to interpret a single bare word (`foreach { ls }` vs `cd ls`).
SingleExpression: Expression = {
<Expression>,
<Call>,
<Binary>,
}
BareExpression: Expression = {
<l: @L> <bare: Bare> <r: @R> => ExpressionBuilder::spanned_bare(bare, l, r)
}
SpannedOperator: Spanned<Operator> = {
<l: @L> <op: Operator> <r: @R> => Spanned::from_item(op, Span::from((l, r)))
}
Newlines: () = {
("newline")+
}
OptionalNewlines: () = {
("newline")*
}
// === LOGICAL TOKENS === //
// A logical token may be composed of more than one raw token, but the tokens must be emitted
// from the stream in exactly one sequence. This allows us to use parser infrastructure to
// compose tokens without the risk that these logical tokens will introduce ambiguities.
Bare: Bare = {
<head: "bare"> => Bare::from_string(head.as_slice())
}
// A member is a special token that represents bare words or string literals immediate
// following a dot.
Member: Spanned<String> = {
<"member"> => <>.to_spanned_string(),
<"dqmember"> => <>.to_spanned_string(),
<"sqmember"> => <>.to_spanned_string(),
<"function"> => <>.to_spanned_string(),
}
CommandName: Spanned<Bare> = {
<l: @L> <name: "command-name"> <r: @R> => Spanned::from_item(Bare::from_string(name.as_slice()), (l, r)),
}
Operator: Operator = {
"==" => Operator::Equal,
"!=" => Operator::NotEqual,
"<" => Operator::LessThan,
">" => Operator::GreaterThan,
"<=" => Operator::LessThanOrEqual,
">=" => Operator::GreaterThanOrEqual
}
Int: i64 = {
<n: "num"> => i64::from_str(<>.as_slice()).unwrap(),
}
UnitsNum: Expression = {
<l: @L> <num: Int> <unit: "unit"> <r: @R> => ExpressionBuilder::spanned_unit((num, Unit::from_str(unit.as_slice()).unwrap()), l, r),
}
String: Expression = {
<l: @L> <s: "sqstring"> <r: @R> => ExpressionBuilder::spanned_string(&s.as_slice()[1..(s.as_slice().len() - 1)], l, r),
<l: @L> <s: "dqstring"> <r: @R> => ExpressionBuilder::spanned_string(&s.as_slice()[1..(s.as_slice().len() - 1)], l, r),
}
Flag: Expression = {
<l: @L> "-" <b: Bare> <r: @R> => ExpressionBuilder::spanned_shorthand(b.to_string(), l, r),
<l: @L> "--" <b: Bare> <r: @R> => ExpressionBuilder::spanned_flag(b.to_string(), l, r),
}
Var: Expression = {
<l: @L> "$" <v: "variable"> <r: @R> => ExpressionBuilder::spanned_var(v.as_slice(), l, r),
}
SpannedBare: Spanned<Bare> = {
<l: @L> <bare: Bare> <r: @R> => Spanned::from_item(bare, (l, r)),
}
// === MACROS === //
Comma<T>: Vec<T> = { // (1)
<v:(<T> OptionalNewlines "," OptionalNewlines)*> <e:T?> => match e { // (2)
None => v,
Some(e) => {
let mut v = v;
v.push(e);
v
}
}
};
extern {
type Location = usize;
type Error = ShellError;
enum SpannedToken<'input> {
"any" => SpannedToken { token: Token::TyAny, .. },
"int" => SpannedToken { token: Token::TyInt, .. },
"decimal" => SpannedToken { token: Token::TyDecimal, .. },
"bytes" => SpannedToken { token: Token::TyBytes, .. },
"text" => SpannedToken { token: Token::TyText, .. },
"boolean" => SpannedToken { token: Token::TyBoolean, .. },
"date" => SpannedToken { token: Token::TyDate, .. },
"object" => SpannedToken { token: Token::TyObject, .. },
"list" => SpannedToken { token: Token::TyList, .. },
"block" => SpannedToken { token: Token::TyBlock, .. },
"->" => SpannedToken { token: Token::ReturnArrow, .. },
"," => SpannedToken { token: Token::Comma, .. },
":" => SpannedToken { token: Token::Colon, .. },
"|" => SpannedToken { token: Token::Pipe, .. },
"(" => SpannedToken { token: Token::OpenParen, .. },
")" => SpannedToken { token: Token::CloseParen, .. },
"{" => SpannedToken { token: Token::OpenBrace, .. },
"}" => SpannedToken { token: Token::CloseBrace, .. },
"==" => SpannedToken { token: Token::OpEq, .. },
"!=" => SpannedToken { token: Token::OpNeq, .. },
"<" => SpannedToken { token: Token::OpLt, .. },
"<=" => SpannedToken { token: Token::OpLte, .. },
">" => SpannedToken { token: Token::OpGt, .. },
">=" => SpannedToken { token: Token::OpGte, .. },
"-" => SpannedToken { token: Token::Dash, .. },
"--" => SpannedToken { token: Token::DashDash, .. },
"$" => SpannedToken { token: Token::Dollar, .. },
"???." => SpannedToken { token: Token::PathDot, .. },
"command-name" => SpannedToken { token: Token::CommandName, .. },
"start-params" => SpannedToken { token: Token::StartParamList, .. },
"end-params" => SpannedToken { token: Token::EndParamList, .. },
"num" => SpannedToken { token: Token::Num, .. },
"member" => SpannedToken { token: Token::Member, .. },
"sqmember" => SpannedToken { token: Token::SQMember, .. },
"dqmember" => SpannedToken { token: Token::SQMember, .. },
"variable" => SpannedToken { token: Token::Variable, .. },
"bare" => SpannedToken { token: Token::Bare, .. },
"dqstring" => SpannedToken { token: Token::DQString, .. },
"sqstring" => SpannedToken { token: Token::SQString, .. },
"unit" => SpannedToken { token: Token::Unit, .. },
"newline" => SpannedToken { token: Token::Newline, .. },
"function" => SpannedToken { token: Token::KeywordFunction, .. },
}
}