This commit is contained in:
Zachary Rice 2023-07-24 19:09:57 -05:00 committed by GitHub
parent 302c3ce8d1
commit 85f363f093
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
4 changed files with 8 additions and 15 deletions

2
go.mod
View file

@ -8,6 +8,7 @@ require (
cloud.google.com/go/secretmanager v1.11.1
cloud.google.com/go/storage v1.31.0
github.com/Azure/go-autorest/autorest/azure/auth v0.5.11
github.com/BobuSumisu/aho-corasick v1.0.3
github.com/TheZeroSlave/zapsentry v1.17.0
github.com/aws/aws-sdk-go v1.44.83
github.com/bill-rich/disk-buffer-reader v0.1.7
@ -44,7 +45,6 @@ require (
github.com/mholt/archiver/v4 v4.0.0-alpha.8
github.com/patrickmn/go-cache v2.1.0+incompatible
github.com/paulbellamy/ratecounter v0.2.0
github.com/petar-dambovaliev/aho-corasick v0.0.0-20211021192214-5ab2d9280aa9
github.com/pkg/errors v0.9.1
github.com/prometheus/client_golang v1.16.0
github.com/rabbitmq/amqp091-go v1.8.1

4
go.sum
View file

@ -51,6 +51,8 @@ github.com/Azure/go-autorest/tracing v0.6.0 h1:TYi4+3m5t6K48TGI9AUdb+IzbnSxvnvUM
github.com/Azure/go-autorest/tracing v0.6.0/go.mod h1:+vhtPC754Xsa23ID7GlGsrdKBpUA79WCAKPPZVC2DeU=
github.com/Azure/go-ntlmssp v0.0.0-20221128193559-754e69321358 h1:mFRzDkZVAjdal+s7s0MwaRv9igoPqLRdzOLzw/8Xvq8=
github.com/Azure/go-ntlmssp v0.0.0-20221128193559-754e69321358/go.mod h1:chxPXzSsl7ZWRAuOIE23GDNzjWuZquvFlgA8xmpunjU=
github.com/BobuSumisu/aho-corasick v1.0.3 h1:uuf+JHwU9CHP2Vx+wAy6jcksJThhJS9ehR8a+4nPE9g=
github.com/BobuSumisu/aho-corasick v1.0.3/go.mod h1:hm4jLcvZKI2vRF2WDU1N4p/jpWtpOzp3nLmi9AzX/XE=
github.com/BurntSushi/toml v0.3.1/go.mod h1:xHWCNGjB5oqiDr8zfno3MHue2Ht5sIBksp03qcyfWMU=
github.com/BurntSushi/toml v1.2.1/go.mod h1:CxXYINrC8qIiEnFrOxCa7Jy5BFHlXnUU2pbicEuybxQ=
github.com/BurntSushi/xgb v0.0.0-20160522181843-27f122750802/go.mod h1:IVnqGOEym/WlBOVXweHU+Q+/VP0lqqI8lqeDx9IjBqo=
@ -383,8 +385,6 @@ github.com/patrickmn/go-cache v2.1.0+incompatible h1:HRMgzkcYKYpi3C8ajMPV8OFXaaR
github.com/patrickmn/go-cache v2.1.0+incompatible/go.mod h1:3Qf8kWWT7OJRJbdiICTKqZju1ZixQ/KpMGzzAfe6+WQ=
github.com/paulbellamy/ratecounter v0.2.0 h1:2L/RhJq+HA8gBQImDXtLPrDXK5qAj6ozWVK/zFXVJGs=
github.com/paulbellamy/ratecounter v0.2.0/go.mod h1:Hfx1hDpSGoqxkVVpBi/IlYD7kChlfo5C6hzIHwPqfFE=
github.com/petar-dambovaliev/aho-corasick v0.0.0-20211021192214-5ab2d9280aa9 h1:lL+y4Xv20pVlCGyLzNHRC0I0rIHhIL1lTvHizoS/dU8=
github.com/petar-dambovaliev/aho-corasick v0.0.0-20211021192214-5ab2d9280aa9/go.mod h1:EHPiTAKtiFmrMldLUNswFwfZ2eJIYBHktdaUTZxYWRw=
github.com/pierrec/lz4/v4 v4.1.15 h1:MO0/ucJhngq7299dKLwIMtgTfbkoSPF6AoMYDd8Q4q0=
github.com/pierrec/lz4/v4 v4.1.15/go.mod h1:gZWDp/Ze/IJXGXf23ltt2EXimqmTUXEy0GFuRQyBid4=
github.com/pingcap/errors v0.11.4 h1:lFuQV/oaUMGcD2tqt+01ROSmJs75VG1ToEOkZIZ4nE4=

View file

@ -36,7 +36,6 @@ func (s Scanner) Keywords() []string {
// FromData will find and optionally verify NpmToken secrets in a given set of bytes.
func (s Scanner) FromData(ctx context.Context, verify bool, data []byte) (results []detectors.Result, err error) {
dataStr := string(data)
matches := keyPat.FindAllStringSubmatch(dataStr, -1)
for _, match := range matches {
if len(match) != 2 {

View file

@ -10,7 +10,7 @@ import (
"sync/atomic"
"time"
ahocorasick "github.com/petar-dambovaliev/aho-corasick"
ahocorasick "github.com/BobuSumisu/aho-corasick"
"golang.org/x/sync/errgroup"
"google.golang.org/protobuf/proto"
@ -43,7 +43,7 @@ type Engine struct {
// prefilter is a ahocorasick struct used for doing efficient string
// matching given a set of words (keywords from the rules in the config)
prefilter ahocorasick.AhoCorasick
prefilter ahocorasick.Trie
}
type EngineOption func(*Engine)
@ -150,13 +150,7 @@ func Start(ctx context.Context, options ...EngineOption) *Engine {
for _, d := range e.detectors[true] {
keywords = append(keywords, d.Keywords()...)
}
builder := ahocorasick.NewAhoCorasickBuilder(ahocorasick.Opts{
AsciiCaseInsensitive: true,
MatchOnlyWholeWords: false,
MatchKind: ahocorasick.LeftMostLongestMatch,
DFA: true,
})
e.prefilter = builder.Build(keywords)
e.prefilter = *ahocorasick.NewTrieBuilder().AddStrings(keywords).Build()
ctx.Logger().Info("loaded decoders", "count", len(e.decoders))
ctx.Logger().Info("loaded detectors",
@ -297,8 +291,8 @@ func (e *Engine) detectorWorker(ctx context.Context) {
}
// build a map of all keywords that were matched in the chunk
for _, m := range e.prefilter.FindAll(string(decoded.Data)) {
matchedKeywords[strings.ToLower(string(decoded.Data[m.Start():m.End()]))] = struct{}{}
for _, m := range e.prefilter.MatchString(string(decoded.Data)) {
matchedKeywords[strings.ToLower(m.MatchString())] = struct{}{}
}
for verify, detectorsSet := range e.detectors {