From e3abadd6864c7d17213587676200f65eb5bb8115 Mon Sep 17 00:00:00 2001 From: JT Date: Thu, 1 Jul 2021 12:01:04 +1200 Subject: [PATCH] Add stmt parsing --- src/main.rs | 14 +- src/parse_error.rs | 2 + src/parser.rs | 320 ++++++++++++++++++++++++++++++++++++++++++-- src/parser_state.rs | 46 +++++-- src/span.rs | 8 ++ 5 files changed, 354 insertions(+), 36 deletions(-) diff --git a/src/main.rs b/src/main.rs index 889531d5b1..794df50f14 100644 --- a/src/main.rs +++ b/src/main.rs @@ -2,19 +2,11 @@ use engine_q::{lex, lite_parse, LexMode, ParserWorkingSet}; fn main() -> std::io::Result<()> { if let Some(path) = std::env::args().nth(1) { - let file = std::fs::read(&path)?; - - // let (output, err) = lex(&file, 0, 0, LexMode::Normal); - - // println!("{:?} tokens, error: {:?}", output, err); - - // let (output, err) = lite_parse(&output); - - // println!("{:?}, error: {:?}", output, err); - let mut working_set = ParserWorkingSet::new(None); - let (output, err) = working_set.parse_file(&path, &file); + //let file = std::fs::read(&path)?; + //let (output, err) = working_set.parse_file(&path, &file); + let (output, err) = working_set.parse_source(path.as_bytes()); println!("{:?} {:?}", output, err); Ok(()) diff --git a/src/parse_error.rs b/src/parse_error.rs index 40cc6ed675..c61f01d851 100644 --- a/src/parse_error.rs +++ b/src/parse_error.rs @@ -4,4 +4,6 @@ pub use crate::Span; pub enum ParseError { ExtraTokens(Span), UnexpectedEof(String, Span), + UnknownStatement(Span), + Mismatch(String, Span), } diff --git a/src/parser.rs b/src/parser.rs index fd86d624b7..dea68accd4 100644 --- a/src/parser.rs +++ b/src/parser.rs @@ -1,9 +1,71 @@ use std::str::Utf8Error; -use crate::{lex, lite_parse, LiteBlock, LiteStatement, ParseError, ParserWorkingSet, Span}; +use crate::{ + lex, lite_parse, + parser_state::{Type, VarId}, + LiteBlock, LiteCommand, LiteStatement, ParseError, ParserWorkingSet, Span, +}; + +/// The syntactic shapes that values must match to be passed into a command. You can think of this as the type-checking that occurs when you call a function. +#[derive(Debug, Copy, Clone)] +pub enum SyntaxShape { + /// Any syntactic form is allowed + Any, + /// Strings and string-like bare words are allowed + String, + /// A dotted path to navigate the table + ColumnPath, + /// A dotted path to navigate the table (including variable) + FullColumnPath, + /// Only a numeric (integer or decimal) value is allowed + Number, + /// A range is allowed (eg, `1..3`) + Range, + /// Only an integer value is allowed + Int, + /// A filepath is allowed + FilePath, + /// A glob pattern is allowed, eg `foo*` + GlobPattern, + /// A block is allowed, eg `{start this thing}` + Block, + /// A table is allowed, eg `[first second]` + Table, + /// A filesize value is allowed, eg `10kb` + Filesize, + /// A duration value is allowed, eg `19day` + Duration, + /// An operator + Operator, + /// A math expression which expands shorthand forms on the lefthand side, eg `foo > 1` + /// The shorthand allows us to more easily reach columns inside of the row being passed in + RowCondition, + /// A general math expression, eg the `1 + 2` of `= 1 + 2` + MathExpression, +} #[derive(Debug)] -pub enum Expression {} +pub enum Expr { + Int(i64), + Var(VarId), + Garbage, +} + +#[derive(Debug)] +pub struct Expression { + expr: Expr, + ty: Type, + span: Span, +} +impl Expression { + pub fn garbage(span: Span) -> Expression { + Expression { + expr: Expr::Garbage, + span, + ty: Type::Unknown, + } + } +} #[derive(Debug)] pub enum Import {} @@ -13,6 +75,12 @@ pub struct Block { stmts: Vec, } +impl Default for Block { + fn default() -> Self { + Self::new() + } +} + impl Block { pub fn new() -> Self { Self { stmts: vec![] } @@ -21,8 +89,8 @@ impl Block { #[derive(Debug)] pub struct VarDecl { - name: String, - value: Expression, + var_id: VarId, + expression: Expression, } #[derive(Debug)] @@ -30,19 +98,64 @@ pub enum Statement { Pipeline(Pipeline), VarDecl(VarDecl), Import(Import), + Expression(Expression), None, } #[derive(Debug)] pub struct Pipeline {} +impl Default for Pipeline { + fn default() -> Self { + Self::new() + } +} + impl Pipeline { pub fn new() -> Self { Self {} } } +fn garbage(span: Span) -> Expression { + Expression::garbage(span) +} + +fn span(spans: &[Span]) -> Span { + let length = spans.len(); + + if length == 0 { + Span::unknown() + } else if length == 1 || spans[0].file_id != spans[length - 1].file_id { + spans[0] + } else { + Span { + start: spans[0].start, + end: spans[length - 1].end, + file_id: spans[0].file_id, + } + } +} + impl ParserWorkingSet { + /* + fn parse_let(&mut self, command: &LiteCommand) -> (Statement, Option) { + + } + fn parse_special_command(&mut self, command: &LiteCommand) -> (Statement, Option) { + let command_name = self.get_span_contents(command.parts[0]); + println!("{:?}", command_name); + match command_name { + b"let" => self.parse_let(command), + b"def" => self.parse_def(command), + b"source" => self.parse_source(command), + _ => ( + Statement::None, + Some(ParseError::UnknownStatement(command.parts[0])), + ), + } + } + fn parse_statement( &mut self, block: &mut Block, @@ -50,20 +163,182 @@ impl ParserWorkingSet { ) -> Option { match lite_pipeline.commands.len() { 0 => None, - 1 => { - let command_name = self.get_span_contents(lite_pipeline.commands[0].parts[0]); - println!("{:?}", command_name); - if command_name == b"let" { - println!("found let") - } - None - } + 1 => None, _ => { // pipeline None } } } + */ + + pub fn parse_int(&mut self, token: &str, span: Span) -> (Expression, Option) { + if let Some(token) = token.strip_prefix("0x") { + if let Ok(v) = i64::from_str_radix(token, 16) { + ( + Expression { + expr: Expr::Int(v), + ty: Type::Int, + span, + }, + None, + ) + } else { + ( + garbage(span), + Some(ParseError::Mismatch("int".into(), span)), + ) + } + } else if let Some(token) = token.strip_prefix("0b") { + if let Ok(v) = i64::from_str_radix(token, 2) { + ( + Expression { + expr: Expr::Int(v), + ty: Type::Int, + span, + }, + None, + ) + } else { + ( + garbage(span), + Some(ParseError::Mismatch("int".into(), span)), + ) + } + } else if let Some(token) = token.strip_prefix("0o") { + if let Ok(v) = i64::from_str_radix(token, 8) { + ( + Expression { + expr: Expr::Int(v), + ty: Type::Int, + span, + }, + None, + ) + } else { + ( + garbage(span), + Some(ParseError::Mismatch("int".into(), span)), + ) + } + } else if let Ok(x) = token.parse::() { + ( + Expression { + expr: Expr::Int(x), + ty: Type::Int, + span, + }, + None, + ) + } else { + ( + garbage(span), + Some(ParseError::Mismatch("int".into(), span)), + ) + } + } + + pub fn parse_number(&mut self, token: &str, span: Span) -> (Expression, Option) { + if let (x, None) = self.parse_int(token, span) { + (x, None) + } else { + ( + garbage(span), + Some(ParseError::Mismatch("number".into(), span)), + ) + } + } + + pub fn parse_arg( + &mut self, + span: Span, + shape: SyntaxShape, + ) -> (Expression, Option) { + match shape { + SyntaxShape::Number => { + if let Ok(token) = String::from_utf8(self.get_span_contents(span).into()) { + self.parse_number(&token, span) + } else { + ( + garbage(span), + Some(ParseError::Mismatch("number".into(), span)), + ) + } + } + _ => ( + garbage(span), + Some(ParseError::Mismatch("number".into(), span)), + ), + } + } + + pub fn parse_math_expression(&mut self, spans: &[Span]) -> (Expression, Option) { + self.parse_arg(spans[0], SyntaxShape::Number) + } + + pub fn parse_expression(&mut self, spans: &[Span]) -> (Expression, Option) { + self.parse_math_expression(spans) + } + + pub fn parse_variable(&mut self, span: Span) -> Option { + let contents = self.get_span_contents(span); + + if !contents.is_empty() && contents[0] == b'$' { + None + } else { + Some(ParseError::Mismatch("variable".into(), span)) + } + } + + pub fn parse_keyword(&self, span: Span, keyword: &[u8]) -> Option { + if self.get_span_contents(span) == keyword { + None + } else { + Some(ParseError::Mismatch( + String::from_utf8_lossy(keyword).to_string(), + span, + )) + } + } + + pub fn parse_let(&mut self, spans: &[Span]) -> (Statement, Option) { + let mut error = None; + if spans.len() >= 4 && self.parse_keyword(spans[0], b"let").is_none() { + let err = self.parse_variable(spans[1]); + error = error.or(err); + + let err = self.parse_keyword(spans[2], b"="); + error = error.or(err); + + let (expression, err) = self.parse_expression(&spans[3..]); + error = error.or(err); + + let var_name: Vec<_> = self.get_span_contents(spans[1]).into(); + let var_id = self.add_variable(var_name, expression.ty); + + (Statement::VarDecl(VarDecl { var_id, expression }), error) + } else { + let span = span(spans); + ( + Statement::Expression(garbage(span)), + Some(ParseError::Mismatch("let".into(), span)), + ) + } + } + + pub fn parse_statement(&mut self, spans: &[Span]) -> (Statement, Option) { + if let (stmt, None) = self.parse_let(spans) { + (stmt, None) + } else if let (expr, None) = self.parse_expression(spans) { + (Statement::Expression(expr), None) + } else { + let span = span(spans); + ( + Statement::Expression(garbage(span)), + Some(ParseError::Mismatch("statement".into(), span)), + ) + } + } pub fn parse_block(&mut self, lite_block: &LiteBlock) -> (Block, Option) { let mut error = None; @@ -72,8 +347,10 @@ impl ParserWorkingSet { let mut block = Block::new(); for pipeline in &lite_block.block { - let err = self.parse_statement(&mut block, pipeline); + let (stmt, err) = self.parse_statement(&pipeline.commands[0].parts); error = error.or(err); + + block.stmts.push(stmt); } self.exit_scope(); @@ -99,4 +376,21 @@ impl ParserWorkingSet { (output, error) } + + pub fn parse_source(&mut self, source: &[u8]) -> (Block, Option) { + let mut error = None; + + let file_id = self.add_file("source".into(), source.into()); + + let (output, err) = lex(source, file_id, 0, crate::LexMode::Normal); + error = error.or(err); + + let (output, err) = lite_parse(&output); + error = error.or(err); + + let (output, err) = self.parse_block(&output); + error = error.or(err); + + (output, error) + } } diff --git a/src/parser_state.rs b/src/parser_state.rs index 5f0aa82f77..955da059fb 100644 --- a/src/parser_state.rs +++ b/src/parser_state.rs @@ -1,4 +1,4 @@ -use crate::Span; +use crate::{ParseError, Span}; use std::{collections::HashMap, sync::Arc}; pub struct ParserState { @@ -10,11 +10,16 @@ pub enum VarLocation { OuterScope, } -#[derive(Clone, Copy)] -pub enum Type {} +#[derive(Clone, Copy, Debug)] +pub enum Type { + Int, + Unknown, +} + +pub type VarId = usize; struct ScopeFrame { - vars: HashMap, + vars: HashMap, VarId>, } impl ScopeFrame { @@ -27,6 +32,7 @@ impl ScopeFrame { pub struct ParserWorkingSet { files: Vec<(String, Vec)>, + vars: HashMap, permanent_state: Option>, scope: Vec, } @@ -73,6 +79,7 @@ impl ParserWorkingSet { pub fn new(permanent_state: Option>) -> Self { Self { files: vec![], + vars: HashMap::new(), permanent_state, scope: vec![], } @@ -115,23 +122,38 @@ impl ParserWorkingSet { self.scope.push(ScopeFrame::new()); } - pub fn find_variable(&self, name: &str) -> Option<(VarLocation, Type)> { + pub fn find_variable(&self, name: &[u8]) -> Option<(VarLocation, Type)> { for scope in self.scope.iter().rev().enumerate() { if let Some(result) = scope.1.vars.get(name) { - if scope.0 == 0 { - // Top level - return Some((VarLocation::CurrentScope, result.clone())); - } else { - return Some((VarLocation::OuterScope, result.clone())); + if let Some(result) = self.vars.get(result) { + if scope.0 == 0 { + // Top level + return Some((VarLocation::CurrentScope, result.clone())); + } else { + return Some((VarLocation::OuterScope, result.clone())); + } } } } None } -} -fn main() {} + pub fn add_variable(&mut self, name: Vec, ty: Type) -> VarId { + let last = self + .scope + .last_mut() + .expect("internal error: missing stack frame"); + + let next_id = self.vars.len(); + + last.vars.insert(name, next_id); + + self.vars.insert(next_id, ty); + + next_id + } +} #[cfg(test)] mod parser_state_tests { diff --git a/src/span.rs b/src/span.rs index 344344a4b4..8c3f8664e4 100644 --- a/src/span.rs +++ b/src/span.rs @@ -13,4 +13,12 @@ impl Span { file_id, } } + + pub fn unknown() -> Span { + Span { + start: usize::MAX, + end: usize::MAX, + file_id: usize::MAX, + } + } }