Start rebuilding lite parser using nom

This commit is contained in:
Yehuda Katz 2019-06-10 22:53:04 -07:00
parent a3c3c4d776
commit 4291e31dc7
11 changed files with 352 additions and 45 deletions

55
Cargo.lock generated
View file

@ -166,6 +166,11 @@ dependencies = [
"regex 1.1.6 (registry+https://github.com/rust-lang/crates.io-index)",
]
[[package]]
name = "bytecount"
version = "0.3.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
[[package]]
name = "byteorder"
version = "1.3.1"
@ -1238,17 +1243,6 @@ name = "lazycell"
version = "1.2.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
[[package]]
name = "lexical-core"
version = "0.4.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
dependencies = [
"cfg-if 0.1.9 (registry+https://github.com/rust-lang/crates.io-index)",
"ryu 0.2.8 (registry+https://github.com/rust-lang/crates.io-index)",
"stackvector 1.0.2 (registry+https://github.com/rust-lang/crates.io-index)",
"static_assertions 0.2.5 (registry+https://github.com/rust-lang/crates.io-index)",
]
[[package]]
name = "libc"
version = "0.2.58"
@ -1530,13 +1524,13 @@ dependencies = [
]
[[package]]
name = "nom"
version = "5.0.0-beta1"
name = "nom_locate"
version = "0.3.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
dependencies = [
"lexical-core 0.4.0 (registry+https://github.com/rust-lang/crates.io-index)",
"bytecount 0.3.2 (registry+https://github.com/rust-lang/crates.io-index)",
"memchr 2.2.0 (registry+https://github.com/rust-lang/crates.io-index)",
"version_check 0.1.5 (registry+https://github.com/rust-lang/crates.io-index)",
"nom 4.2.3 (registry+https://github.com/rust-lang/crates.io-index)",
]
[[package]]
@ -1573,7 +1567,8 @@ dependencies = [
"log 0.4.6 (registry+https://github.com/rust-lang/crates.io-index)",
"logos 0.10.0-rc2 (registry+https://github.com/rust-lang/crates.io-index)",
"logos-derive 0.10.0-rc2 (registry+https://github.com/rust-lang/crates.io-index)",
"nom 5.0.0-beta1 (registry+https://github.com/rust-lang/crates.io-index)",
"nom 4.2.3 (registry+https://github.com/rust-lang/crates.io-index)",
"nom_locate 0.3.1 (registry+https://github.com/rust-lang/crates.io-index)",
"ordered-float 1.0.2 (registry+https://github.com/rust-lang/crates.io-index)",
"pancurses 0.16.1 (registry+https://github.com/rust-lang/crates.io-index)",
"pretty 0.5.2 (registry+https://github.com/rust-lang/crates.io-index)",
@ -2539,19 +2534,6 @@ name = "stable_deref_trait"
version = "1.1.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
[[package]]
name = "stackvector"
version = "1.0.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
dependencies = [
"unreachable 1.0.0 (registry+https://github.com/rust-lang/crates.io-index)",
]
[[package]]
name = "static_assertions"
version = "0.2.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
[[package]]
name = "string"
version = "0.2.0"
@ -2989,14 +2971,6 @@ name = "unicode-xid"
version = "0.1.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
[[package]]
name = "unreachable"
version = "1.0.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
dependencies = [
"void 1.0.2 (registry+https://github.com/rust-lang/crates.io-index)",
]
[[package]]
name = "url"
version = "1.7.2"
@ -3195,6 +3169,7 @@ dependencies = [
"checksum block 0.1.6 (registry+https://github.com/rust-lang/crates.io-index)" = "0d8c1fef690941d3e7788d328517591fecc684c084084702d6ff1641e993699a"
"checksum build_const 0.2.1 (registry+https://github.com/rust-lang/crates.io-index)" = "39092a32794787acd8525ee150305ff051b0aa6cc2abaf193924f5ab05425f39"
"checksum byte-unit 2.1.0 (registry+https://github.com/rust-lang/crates.io-index)" = "6754bb4703aa167bed5381f0c6842f1cc31a9ecde3b9443f726dde3ad3afb841"
"checksum bytecount 0.3.2 (registry+https://github.com/rust-lang/crates.io-index)" = "f861d9ce359f56dbcb6e0c2a1cb84e52ad732cadb57b806adeb3c7668caccbd8"
"checksum byteorder 1.3.1 (registry+https://github.com/rust-lang/crates.io-index)" = "a019b10a2a7cdeb292db131fc8113e57ea2a908f6e7894b0c3c671893b65dbeb"
"checksum bytes 0.4.12 (registry+https://github.com/rust-lang/crates.io-index)" = "206fdffcfa2df7cbe15601ef46c813fce0965eb3286db6b56c583b814b51c81c"
"checksum cc 1.0.37 (registry+https://github.com/rust-lang/crates.io-index)" = "39f75544d7bbaf57560d2168f28fd649ff9c76153874db88bdbdfd839b1a7e7d"
@ -3312,7 +3287,6 @@ dependencies = [
"checksum lazy_static 0.2.11 (registry+https://github.com/rust-lang/crates.io-index)" = "76f033c7ad61445c5b347c7382dd1237847eb1bce590fe50365dcb33d546be73"
"checksum lazy_static 1.3.0 (registry+https://github.com/rust-lang/crates.io-index)" = "bc5729f27f159ddd61f4df6228e827e86643d4d3e7c32183cb30a1c08f604a14"
"checksum lazycell 1.2.1 (registry+https://github.com/rust-lang/crates.io-index)" = "b294d6fa9ee409a054354afc4352b0b9ef7ca222c69b8812cbea9e7d2bf3783f"
"checksum lexical-core 0.4.0 (registry+https://github.com/rust-lang/crates.io-index)" = "3e82e023e062f1d25f807ad182008fba1b46538e999f908a08cc0c29e084462e"
"checksum libc 0.2.58 (registry+https://github.com/rust-lang/crates.io-index)" = "6281b86796ba5e4366000be6e9e18bf35580adf9e63fbe2294aadb587613a319"
"checksum libgit2-sys 0.7.11 (registry+https://github.com/rust-lang/crates.io-index)" = "48441cb35dc255da8ae72825689a95368bf510659ae1ad55dc4aa88cb1789bf1"
"checksum libssh2-sys 0.2.11 (registry+https://github.com/rust-lang/crates.io-index)" = "126a1f4078368b163bfdee65fbab072af08a1b374a5551b21e87ade27b1fbf9d"
@ -3343,7 +3317,7 @@ dependencies = [
"checksum nix 0.14.1 (registry+https://github.com/rust-lang/crates.io-index)" = "6c722bee1037d430d0f8e687bbdbf222f27cc6e4e68d5caf630857bb2b6dbdce"
"checksum nodrop 0.1.13 (registry+https://github.com/rust-lang/crates.io-index)" = "2f9667ddcc6cc8a43afc9b7917599d7216aa09c463919ea32c59ed6cac8bc945"
"checksum nom 4.2.3 (registry+https://github.com/rust-lang/crates.io-index)" = "2ad2a91a8e869eeb30b9cb3119ae87773a8f4ae617f41b1eb9c154b2905f7bd6"
"checksum nom 5.0.0-beta1 (registry+https://github.com/rust-lang/crates.io-index)" = "6527f311b2baba609e980e008460ab5ebff6d6da15213bb8eb193b7746eefa24"
"checksum nom_locate 0.3.1 (registry+https://github.com/rust-lang/crates.io-index)" = "6a47c112b3861d81f7fbf73892b9271af933af32bd5dee6889aa3c3fa9caed7e"
"checksum num 0.2.0 (registry+https://github.com/rust-lang/crates.io-index)" = "cf4825417e1e1406b3782a8ce92f4d53f26ec055e3622e1881ca8e9f5f9e08db"
"checksum num-complex 0.2.1 (registry+https://github.com/rust-lang/crates.io-index)" = "107b9be86cd2481930688277b675b0114578227f034674726605b8a482d8baf8"
"checksum num-integer 0.1.40 (registry+https://github.com/rust-lang/crates.io-index)" = "8b8af8caa3184078cd419b430ff93684cb13937970fcb7639f728992f33ce674"
@ -3448,8 +3422,6 @@ dependencies = [
"checksum slab 0.4.2 (registry+https://github.com/rust-lang/crates.io-index)" = "c111b5bd5695e56cffe5129854aa230b39c93a305372fdbb2668ca2394eea9f8"
"checksum smallvec 0.6.9 (registry+https://github.com/rust-lang/crates.io-index)" = "c4488ae950c49d403731982257768f48fada354a5203fe81f9bb6f43ca9002be"
"checksum stable_deref_trait 1.1.1 (registry+https://github.com/rust-lang/crates.io-index)" = "dba1a27d3efae4351c8051072d619e3ade2820635c3958d826bfea39d59b54c8"
"checksum stackvector 1.0.2 (registry+https://github.com/rust-lang/crates.io-index)" = "c049c77bf85fbc036484c97b008276d539d9ebff9dfbde37b632ebcd5b8746b6"
"checksum static_assertions 0.2.5 (registry+https://github.com/rust-lang/crates.io-index)" = "c19be23126415861cb3a23e501d34a708f7f9b2183c5252d690941c2e69199d5"
"checksum string 0.2.0 (registry+https://github.com/rust-lang/crates.io-index)" = "d0bbfb8937e38e34c3444ff00afb28b0811d9554f15c5ad64d12b0308d1d1995"
"checksum strsim 0.7.0 (registry+https://github.com/rust-lang/crates.io-index)" = "bb4f380125926a99e52bc279241539c018323fab05ad6368b56f93d9369ff550"
"checksum strsim 0.8.0 (registry+https://github.com/rust-lang/crates.io-index)" = "8ea5119cdb4c55b55d432abb513a0429384878c15dde60cc77b1c99de1a95a6a"
@ -3496,7 +3468,6 @@ dependencies = [
"checksum unicode-segmentation 1.3.0 (registry+https://github.com/rust-lang/crates.io-index)" = "1967f4cdfc355b37fd76d2a954fb2ed3871034eb4f26d60537d88795cfc332a9"
"checksum unicode-width 0.1.5 (registry+https://github.com/rust-lang/crates.io-index)" = "882386231c45df4700b275c7ff55b6f3698780a650026380e72dabe76fa46526"
"checksum unicode-xid 0.1.0 (registry+https://github.com/rust-lang/crates.io-index)" = "fc72304796d0818e357ead4e000d19c9c174ab23dc11093ac919054d20a6a7fc"
"checksum unreachable 1.0.0 (registry+https://github.com/rust-lang/crates.io-index)" = "382810877fe448991dfc7f0dd6e3ae5d58088fd0ea5e35189655f84e6814fa56"
"checksum url 1.7.2 (registry+https://github.com/rust-lang/crates.io-index)" = "dd4e7c0d531266369519a4aa4f399d748bd37043b00bde1e4ff1f60a120b355a"
"checksum utf8-ranges 1.0.2 (registry+https://github.com/rust-lang/crates.io-index)" = "796f7e48bef87609f7ade7e06495a87d5cd06c7866e6a5cbfceffc558a243737"
"checksum utf8parse 0.1.1 (registry+https://github.com/rust-lang/crates.io-index)" = "8772a4ccbb4e89959023bc5b7cb8623a795caa7092d99f3aa9501b9484d4557d"

View file

@ -19,7 +19,7 @@ prettytable-rs = "0.8.0"
itertools = "0.8.0"
ansi_term = "0.11.0"
conch-parser = "0.1.1"
nom = "5.0.0-beta1"
nom = "4.2.3"
dunce = "1.0.0"
indexmap = { version = "1.0.2", features = ["serde-1"] }
chrono-humanize = "0.0.11"
@ -61,6 +61,7 @@ clipboard = "0.5"
reqwest = "0.9"
roxmltree = "0.6.0"
pretty = "0.5.2"
nom_locate = "0.3.1"
[dependencies.pancurses]
version = "0.16"

View file

@ -177,8 +177,8 @@ impl std::convert::From<subprocess::PopenError> for ShellError {
}
}
impl std::convert::From<nom::Err<(&str, nom::error::ErrorKind)>> for ShellError {
fn from(input: nom::Err<(&str, nom::error::ErrorKind)>) -> ShellError {
impl std::convert::From<nom::Err<(&str, nom::ErrorKind)>> for ShellError {
fn from(input: nom::Err<(&str, nom::ErrorKind)>) -> ShellError {
ShellError::String(StringError {
title: format!("{:?}", input),
error: Value::nothing(),

View file

@ -4,6 +4,7 @@ crate mod lexer;
crate mod parser;
crate mod registry;
crate mod span;
crate mod parse2;
crate use ast::Pipeline;
crate use registry::{Args, CommandConfig};

6
src/parser/parse2.rs Normal file
View file

@ -0,0 +1,6 @@
crate mod operator;
crate mod parser;
crate mod span;
crate mod token_tree;
crate mod tokens;
crate mod util;

View file

@ -0,0 +1,50 @@
use serde_derive::{Deserialize, Serialize};
use std::str::FromStr;
#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash, Deserialize, Serialize)]
pub enum Operator {
Equal,
NotEqual,
LessThan,
GreaterThan,
LessThanOrEqual,
GreaterThanOrEqual,
}
impl Operator {
pub fn print(&self) -> String {
self.as_str().to_string()
}
pub fn as_str(&self) -> &str {
match *self {
Operator::Equal => "==",
Operator::NotEqual => "!=",
Operator::LessThan => "<",
Operator::GreaterThan => ">",
Operator::LessThanOrEqual => "<=",
Operator::GreaterThanOrEqual => ">=",
}
}
}
impl From<&str> for Operator {
fn from(input: &str) -> Operator {
Operator::from_str(input).unwrap()
}
}
impl FromStr for Operator {
type Err = ();
fn from_str(input: &str) -> Result<Self, <Self as std::str::FromStr>::Err> {
match input {
"==" => Ok(Operator::Equal),
"!=" => Ok(Operator::NotEqual),
"<" => Ok(Operator::LessThan),
">" => Ok(Operator::GreaterThan),
"<=" => Ok(Operator::LessThanOrEqual),
">=" => Ok(Operator::GreaterThanOrEqual),
_ => Err(()),
}
}
}

146
src/parser/parse2/parser.rs Normal file
View file

@ -0,0 +1,146 @@
use crate::parser::parse2::{operator::*, span::*, tokens::*};
use nom::types::CompleteStr;
use nom::*;
use nom_locate::{position, LocatedSpan};
use std::str::FromStr;
type NomSpan<'a> = LocatedSpan<CompleteStr<'a>>;
macro_rules! operator {
($name:tt : $token:tt ) => {
named!($name( NomSpan ) -> Token,
do_parse!(
l: position!()
>> t: tag!(stringify!($token))
>> r: position!()
>> (Spanned::from_nom(RawToken::Operator(Operator::from_str(t.fragment.0).unwrap()), l, r))
)
);
};
}
operator! { gt: > }
operator! { lt: < }
operator! { gte: >= }
operator! { lte: <= }
operator! { eq: == }
operator! { neq: != }
named!(integer( NomSpan ) -> Token,
do_parse!(
l: position!()
>> neg: opt!(tag!("-"))
>> num: digit1
>> r: position!()
>> (Spanned::from_nom(RawToken::Integer(int(num.fragment.0, neg)), l, r))
)
);
named!(operator( NomSpan ) -> Token,
alt!(
gte | lte | neq | gt | lt | eq
)
);
named!(dq_string( NomSpan ) -> Token,
do_parse!(
l: position!()
>> char!('"')
>> l1: position!()
>> many0!(none_of!("\""))
>> r1: position!()
>> char!('"')
>> r: position!()
>> (Spanned::from_nom(RawToken::String(Span::from((l1, r1))), l, r))
)
);
named!(sq_string( NomSpan ) -> Token,
do_parse!(
l: position!()
>> char!('\'')
>> l1: position!()
>> many0!(none_of!("'"))
>> r1: position!()
>> char!('\'')
>> r: position!()
>> (Spanned::from_nom(RawToken::String(Span::from((l1, r1))), l, r))
)
);
fn int<T>(frag: &str, neg: Option<T>) -> i64 {
let int = FromStr::from_str(frag).unwrap();
match neg {
None => int,
Some(_) => int * -1,
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_integer() {
assert_eq!(
integer(NomSpan::new(CompleteStr("123"))).unwrap().1,
Spanned::from_item(RawToken::Integer(123), (0, 3))
);
assert_eq!(
integer(NomSpan::new(CompleteStr("-123"))).unwrap().1,
Spanned::from_item(RawToken::Integer(-123), (0, 4))
);
}
#[test]
fn test_operator() {
assert_eq!(
operator(NomSpan::new(CompleteStr(">"))).unwrap().1,
Spanned::from_item(RawToken::Operator(Operator::GreaterThan), (0, 1))
);
assert_eq!(
operator(NomSpan::new(CompleteStr(">="))).unwrap().1,
Spanned::from_item(RawToken::Operator(Operator::GreaterThanOrEqual), (0, 2))
);
assert_eq!(
operator(NomSpan::new(CompleteStr("<"))).unwrap().1,
Spanned::from_item(RawToken::Operator(Operator::LessThan), (0, 1))
);
assert_eq!(
operator(NomSpan::new(CompleteStr("<="))).unwrap().1,
Spanned::from_item(RawToken::Operator(Operator::LessThanOrEqual), (0, 2))
);
assert_eq!(
operator(NomSpan::new(CompleteStr("=="))).unwrap().1,
Spanned::from_item(RawToken::Operator(Operator::Equal), (0, 2))
);
assert_eq!(
operator(NomSpan::new(CompleteStr("!="))).unwrap().1,
Spanned::from_item(RawToken::Operator(Operator::NotEqual), (0, 2))
);
}
#[test]
fn test_string() {
assert_eq!(
dq_string(NomSpan::new(CompleteStr(r#""hello world""#)))
.unwrap()
.1,
Spanned::from_item(RawToken::String(Span::from((1, 12))), (0, 13))
);
assert_eq!(
sq_string(NomSpan::new(CompleteStr(r#"'hello world'"#)))
.unwrap()
.1,
Spanned::from_item(RawToken::String(Span::from((1, 12))), (0, 13))
);
}
}

114
src/parser/parse2/span.rs Normal file
View file

@ -0,0 +1,114 @@
use derive_new::new;
use std::ops::Range;
#[derive(new, Debug, Clone, Copy, Eq, PartialEq, Ord, PartialOrd, Hash)]
pub struct Spanned<T> {
crate span: Span,
crate item: T,
}
impl<T> std::ops::Deref for Spanned<T> {
type Target = T;
fn deref(&self) -> &T {
&self.item
}
}
impl<T> Spanned<T> {
crate fn from_nom<U>(
item: T,
start: nom_locate::LocatedSpan<U>,
end: nom_locate::LocatedSpan<U>,
) -> Spanned<T> {
let start = start.offset;
let end = end.offset;
Spanned {
span: Span::from((start, end)),
item,
}
}
crate fn from_item(item: T, span: impl Into<Span>) -> Spanned<T> {
Spanned {
span: span.into(),
item,
}
}
crate fn map<U>(self, input: impl FnOnce(T) -> U) -> Spanned<U> {
let Spanned { span, item } = self;
let mapped = input(item);
Spanned { span, item: mapped }
}
}
#[derive(Debug, Clone, Copy, PartialEq, Eq, Ord, PartialOrd, Hash)]
pub struct Span {
crate start: usize,
crate end: usize,
// source: &'source str,
}
impl<T> From<(nom_locate::LocatedSpan<T>, nom_locate::LocatedSpan<T>)> for Span {
fn from(input: (nom_locate::LocatedSpan<T>, nom_locate::LocatedSpan<T>)) -> Span {
Span {
start: input.0.offset,
end: input.1.offset,
}
}
}
impl From<(usize, usize)> for Span {
fn from(input: (usize, usize)) -> Span {
Span {
start: input.0,
end: input.1,
}
}
}
impl From<&std::ops::Range<usize>> for Span {
fn from(input: &std::ops::Range<usize>) -> Span {
Span {
start: input.start,
end: input.end,
}
}
}
impl Span {
fn new(range: &Range<usize>) -> Span {
Span {
start: range.start,
end: range.end,
// source,
}
}
}
impl language_reporting::ReportingSpan for Span {
fn with_start(&self, start: usize) -> Self {
Span {
start,
end: self.end,
}
}
fn with_end(&self, end: usize) -> Self {
Span {
start: self.start,
end,
}
}
fn start(&self) -> usize {
self.start
}
fn end(&self) -> usize {
self.end
}
}

View file

@ -0,0 +1,6 @@
use crate::parser::parse2::{span::*, tokens::*};
#[derive(Debug, Clone, Copy, Eq, PartialEq, Ord, PartialOrd)]
pub enum TokenNode {
Token(Token),
}

View file

@ -0,0 +1,11 @@
use crate::parser::parse2::operator::*;
use crate::parser::parse2::span::*;
#[derive(Debug, Clone, Copy, Eq, PartialEq, Ord, PartialOrd, Hash)]
pub enum RawToken {
Integer(i64),
Operator(Operator),
String(Span),
}
pub type Token = Spanned<RawToken>;

View file

@ -0,0 +1 @@