From 16910bb637d17aa9f2af3eb5f92371bb6c9a64d4 Mon Sep 17 00:00:00 2001 From: David Stotijn Date: Fri, 13 Nov 2020 21:33:49 +0100 Subject: [PATCH] Add lexer for reqlog search --- go.mod | 1 + go.sum | 3 + pkg/reqlog/lexer.go | 144 +++++++++++++++++++++++++++++ pkg/reqlog/lexer_test.go | 194 +++++++++++++++++++++++++++++++++++++++ 4 files changed, 342 insertions(+) create mode 100644 pkg/reqlog/lexer.go create mode 100644 pkg/reqlog/lexer_test.go diff --git a/go.mod b/go.mod index beada7f..5ce68bb 100644 --- a/go.mod +++ b/go.mod @@ -6,6 +6,7 @@ require ( github.com/99designs/gqlgen v0.13.0 github.com/GeertJohan/go.rice v1.0.0 github.com/Masterminds/squirrel v1.4.0 + github.com/db47h/lex v1.2.1 github.com/gorilla/mux v1.7.4 github.com/hashicorp/golang-lru v0.5.1 // indirect github.com/jmoiron/sqlx v1.2.0 diff --git a/go.sum b/go.sum index 5f9e531..2981218 100644 --- a/go.sum +++ b/go.sum @@ -26,6 +26,8 @@ github.com/daaku/go.zipexe v1.0.0/go.mod h1:z8IiR6TsVLEYKwXAoE/I+8ys/sDkgTzSL0CL github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c= github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= +github.com/db47h/lex v1.2.1 h1:OPXYd/WUiM8UbB65L26nA/NI4Y9jlCFOJIHGDPvYn2M= +github.com/db47h/lex v1.2.1/go.mod h1:c1/b2FVNgxTLDASa/K0yVU07pAFiYx5Iiihijlvokqg= github.com/dgryski/trifles v0.0.0-20190318185328-a8d75aae118c h1:TUuUh0Xgj97tLMNtWtNvI9mIV6isjEb9lBMNv+77IGM= github.com/dgryski/trifles v0.0.0-20190318185328-a8d75aae118c/go.mod h1:if7Fbed8SFyPtHLHbg49SI7NAdJiC5WIA09pe59rfAA= github.com/go-chi/chi v3.3.2+incompatible/go.mod h1:eB3wogJHnLi3x/kFX2A+IbTBlXxmMeXJVKy9tTv1XzQ= @@ -128,6 +130,7 @@ golang.org/x/sys v0.0.0-20200116001909-b77594299b42 h1:vEOn+mP2zCOVzKckCZy6YsCtD golang.org/x/sys v0.0.0-20200116001909-b77594299b42/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/text v0.3.0 h1:g61tztE5qeGQ89tm6NTjjM9VPIm088od1l6aSorWRWg= golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ= +golang.org/x/text v0.3.2 h1:tW2bmiBqwgJj/UpqtC8EpXEZVYOwU0yG4iWbprSVAcs= golang.org/x/text v0.3.2/go.mod h1:bEr9sfX3Q8Zfm5fL9x+3itogRgK3+ptLWKqgva+5dAk= golang.org/x/tools v0.0.0-20180917221912-90fa682c2a6e/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ= golang.org/x/tools v0.0.0-20190125232054-d66bd3c5d5a6/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ= diff --git a/pkg/reqlog/lexer.go b/pkg/reqlog/lexer.go new file mode 100644 index 0000000..3167d86 --- /dev/null +++ b/pkg/reqlog/lexer.go @@ -0,0 +1,144 @@ +package reqlog + +import ( + "unicode" + + "github.com/db47h/lex" + "github.com/db47h/lex/state" +) + +const ( + tokEOF = iota + tokString + tokOpNot + tokOpAnd + tokOpOr + tokOpEq + tokOpNeq + tokOpGt + tokOpGteq + tokOpLt + tokOpLteq + tokOpHas + tokOpRe + tokOpNre + tokParenOpen + tokParenClose +) + +type lexItem struct { + token lex.Token + value string +} + +func lexQuery(s *lex.State) lex.StateFn { + str := lexString() + quotedString := state.QuotedString(tokString) + + return func(s *lex.State) lex.StateFn { + r := s.Next() + pos := s.Pos() + switch r { + case lex.EOF: + s.Emit(pos, tokEOF, nil) + return nil + case '"': + return quotedString + case '=': + if next := s.Next(); next == '~' { + s.Emit(pos, tokOpRe, nil) + } else { + s.Backup() + s.Emit(pos, tokOpEq, nil) + } + return nil + case '!': + switch next := s.Next(); next { + case '=': + s.Emit(pos, tokOpNeq, nil) + return nil + case '~': + s.Emit(pos, tokOpNre, nil) + return nil + default: + s.Backup() + } + case '>': + if next := s.Next(); next == '=' { + s.Emit(pos, tokOpGteq, nil) + } else { + s.Backup() + s.Emit(pos, tokOpGt, nil) + } + return nil + case '<': + if next := s.Next(); next == '=' { + s.Emit(pos, tokOpLteq, nil) + } else { + s.Backup() + s.Emit(pos, tokOpLt, nil) + } + return nil + case ':': + s.Emit(pos, tokOpHas, nil) + return nil + case '(': + s.Emit(pos, tokParenOpen, nil) + return nil + case ')': + s.Emit(pos, tokParenClose, nil) + return nil + } + + switch { + case unicode.IsSpace(r): + // Absorb spaces. + for r = s.Next(); unicode.IsSpace(r); r = s.Next() { + } + s.Backup() + return nil + default: + return str + } + } +} + +func lexString() lex.StateFn { + // Preallocate a buffer to store the value. It will end-up being at + // least as large as the largest value scanned. + b := make([]rune, 0, 64) + + isStringChar := func(r rune) bool { + switch r { + case '=', '!', '<', '>', ':', '(', ')': + return false + } + return !(unicode.IsSpace(r) || r == lex.EOF) + } + + return func(l *lex.State) lex.StateFn { + pos := l.Pos() + // Reset buffer and add first char. + b = append(b[:0], l.Current()) + // Read identifier. + for r := l.Next(); isStringChar(r); r = l.Next() { + b = append(b, r) + } + // The character returned by the last call to `l.Next` is not part of + // the value. Undo it. + l.Backup() + + switch { + case string(b) == "NOT": + l.Emit(pos, tokOpNot, nil) + case string(b) == "AND": + l.Emit(pos, tokOpAnd, nil) + case string(b) == "OR": + l.Emit(pos, tokOpOr, nil) + default: + l.Emit(pos, tokString, string(b)) + } + + return nil + } +} diff --git a/pkg/reqlog/lexer_test.go b/pkg/reqlog/lexer_test.go new file mode 100644 index 0000000..32a244b --- /dev/null +++ b/pkg/reqlog/lexer_test.go @@ -0,0 +1,194 @@ +package reqlog + +import ( + "strings" + "testing" + + "github.com/db47h/lex" +) + +func TestLex(t *testing.T) { + lexTests := []struct { + name string + input string + expected []lexItem + }{ + { + name: "empty query", + input: "", + expected: []lexItem{ + {tokEOF, ""}, + }, + }, + { + name: "single unquoted value", + input: "foobar", + expected: []lexItem{ + {tokString, "foobar"}, + {tokEOF, ""}, + }, + }, + { + name: "single unquoted value with non letter", + input: "foob*", + expected: []lexItem{ + {tokString, "foob*"}, + {tokEOF, ""}, + }, + }, + { + name: "multiple unquoted values", + input: "foo bar", + expected: []lexItem{ + {tokString, "foo"}, + {tokString, "bar"}, + {tokEOF, ""}, + }, + }, + { + name: "quoted value", + input: `"foo bar"`, + expected: []lexItem{ + {tokString, "foo bar"}, + {tokEOF, ""}, + }, + }, + { + name: "comparison with negation operator", + input: "NOT foobar", + expected: []lexItem{ + {tokOpNot, ""}, + {tokString, "foobar"}, + {tokEOF, ""}, + }, + }, + { + name: "comparison with and operator", + input: "foo AND bar", + expected: []lexItem{ + {tokString, "foo"}, + {tokOpAnd, ""}, + {tokString, "bar"}, + {tokEOF, ""}, + }, + }, + { + name: "comparison with or operator", + input: "foo OR bar", + expected: []lexItem{ + {tokString, "foo"}, + {tokOpOr, ""}, + {tokString, "bar"}, + {tokEOF, ""}, + }, + }, + { + name: "comparison with equals operator", + input: "foo = bar", + expected: []lexItem{ + {tokString, "foo"}, + {tokOpEq, ""}, + {tokString, "bar"}, + {tokEOF, ""}, + }, + }, + { + name: "comparison with greater than operator", + input: "foo > 42", + expected: []lexItem{ + {tokString, "foo"}, + {tokOpGt, ""}, + {tokString, "42"}, + {tokEOF, ""}, + }, + }, + { + name: "comparison with greater than or equal operator", + input: "foo >= 42", + expected: []lexItem{ + {tokString, "foo"}, + {tokOpGteq, ""}, + {tokString, "42"}, + {tokEOF, ""}, + }, + }, + { + name: "comparison with less than operator", + input: "foo < 42", + expected: []lexItem{ + {tokString, "foo"}, + {tokOpLt, ""}, + {tokString, "42"}, + {tokEOF, ""}, + }, + }, + { + name: "comparison with less than or equal operator", + input: "foo <= 42", + expected: []lexItem{ + {tokString, "foo"}, + {tokOpLteq, ""}, + {tokString, "42"}, + {tokEOF, ""}, + }, + }, + { + name: "comparison with regular expression operator", + input: "foo =~ 42", + expected: []lexItem{ + {tokString, "foo"}, + {tokOpRe, ""}, + {tokString, "42"}, + {tokEOF, ""}, + }, + }, + { + name: "comparison with not regular expression operator", + input: "foo !~ 42", + expected: []lexItem{ + {tokString, "foo"}, + {tokOpNre, ""}, + {tokString, "42"}, + {tokEOF, ""}, + }, + }, + { + name: "comparison with parentheses", + input: "(foo OR bar) AND baz", + expected: []lexItem{ + {tokParenOpen, ""}, + {tokString, "foo"}, + {tokOpOr, ""}, + {tokString, "bar"}, + {tokParenClose, ""}, + {tokOpAnd, ""}, + {tokString, "baz"}, + {tokEOF, ""}, + }, + }, + } + + for _, tt := range lexTests { + tt := tt + t.Run(tt.name, func(t *testing.T) { + t.Parallel() + file := lex.NewFile(tt.name, strings.NewReader(tt.input)) + l := lex.NewLexer(file, lexQuery) + + for i, exp := range tt.expected { + token, _, value := l.Lex() + if err, isErr := value.(error); isErr { + t.Fatalf("unexpected error: %v", err) + } + valueStr, _ := value.(string) + got := lexItem{ + token: token, + value: valueStr, + } + if got != exp { + t.Errorf("%v: got: %+v, expected: %+v", i, got, exp) + } + } + }) + } +}