2022-01-13 20:02:24 +00:00
|
|
|
package engine
|
|
|
|
|
|
|
|
import (
|
|
|
|
"context"
|
|
|
|
"runtime"
|
|
|
|
"strings"
|
|
|
|
"sync"
|
2022-01-19 06:24:56 +00:00
|
|
|
"sync/atomic"
|
2022-01-13 20:02:24 +00:00
|
|
|
"time"
|
|
|
|
|
|
|
|
"github.com/sirupsen/logrus"
|
|
|
|
|
|
|
|
"github.com/trufflesecurity/trufflehog/pkg/decoders"
|
|
|
|
"github.com/trufflesecurity/trufflehog/pkg/detectors"
|
|
|
|
"github.com/trufflesecurity/trufflehog/pkg/sources"
|
|
|
|
)
|
|
|
|
|
|
|
|
type Engine struct {
|
2022-02-07 18:29:06 +00:00
|
|
|
concurrency int
|
|
|
|
chunks chan *sources.Chunk
|
|
|
|
results chan detectors.ResultWithMetadata
|
|
|
|
decoders []decoders.Decoder
|
|
|
|
detectors map[bool][]detectors.Detector
|
|
|
|
chunksScanned uint64
|
|
|
|
detectorAvgTime map[string][]time.Duration
|
2022-01-13 20:02:24 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
type EngineOption func(*Engine)
|
|
|
|
|
|
|
|
func WithConcurrency(concurrency int) EngineOption {
|
|
|
|
return func(e *Engine) {
|
|
|
|
e.concurrency = concurrency
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
func WithDetectors(verify bool, d ...detectors.Detector) EngineOption {
|
|
|
|
return func(e *Engine) {
|
|
|
|
if e.detectors == nil {
|
|
|
|
e.detectors = make(map[bool][]detectors.Detector)
|
|
|
|
}
|
|
|
|
if e.detectors[verify] == nil {
|
2022-01-19 06:24:56 +00:00
|
|
|
e.detectors[true] = []detectors.Detector{}
|
|
|
|
e.detectors[false] = []detectors.Detector{}
|
2022-01-13 20:02:24 +00:00
|
|
|
}
|
|
|
|
e.detectors[verify] = append(e.detectors[verify], d...)
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
func WithDecoders(decoders ...decoders.Decoder) EngineOption {
|
|
|
|
return func(e *Engine) {
|
|
|
|
e.decoders = decoders
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
func Start(ctx context.Context, options ...EngineOption) *Engine {
|
|
|
|
e := &Engine{
|
2022-02-07 18:29:06 +00:00
|
|
|
chunks: make(chan *sources.Chunk),
|
|
|
|
results: make(chan detectors.ResultWithMetadata),
|
|
|
|
detectorAvgTime: map[string][]time.Duration{},
|
2022-01-13 20:02:24 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
for _, option := range options {
|
|
|
|
option(e)
|
|
|
|
}
|
|
|
|
|
|
|
|
// set defaults
|
|
|
|
|
|
|
|
if e.concurrency == 0 {
|
|
|
|
numCPU := runtime.NumCPU()
|
|
|
|
logrus.Warn("No concurrency specified, defaulting to ", numCPU)
|
|
|
|
e.concurrency = numCPU
|
|
|
|
}
|
2022-01-27 04:38:15 +00:00
|
|
|
logrus.Debugf("running with up to %d workers", e.concurrency)
|
2022-01-13 20:02:24 +00:00
|
|
|
|
|
|
|
var workerWg sync.WaitGroup
|
|
|
|
for i := 0; i < e.concurrency; i++ {
|
|
|
|
workerWg.Add(1)
|
|
|
|
go func() {
|
|
|
|
e.detectorWorker(ctx)
|
|
|
|
workerWg.Done()
|
|
|
|
}()
|
|
|
|
}
|
|
|
|
|
|
|
|
if len(e.decoders) == 0 {
|
|
|
|
e.decoders = decoders.DefaultDecoders()
|
|
|
|
}
|
|
|
|
|
|
|
|
if len(e.detectors) == 0 {
|
|
|
|
e.detectors = map[bool][]detectors.Detector{}
|
|
|
|
e.detectors[true] = DefaultDetectors()
|
2022-01-19 06:24:56 +00:00
|
|
|
e.detectors[false] = []detectors.Detector{}
|
2022-01-13 20:02:24 +00:00
|
|
|
}
|
|
|
|
|
2022-01-19 06:24:56 +00:00
|
|
|
logrus.Debugf("loaded %d decoders", len(e.decoders))
|
|
|
|
logrus.Debugf("loaded %d detectors total, %d with verification enabled. %d with verification disabled",
|
|
|
|
len(e.detectors[true])+len(e.detectors[false]),
|
|
|
|
len(e.detectors[true]),
|
|
|
|
len(e.detectors[false]))
|
|
|
|
|
|
|
|
// start the workers
|
|
|
|
go func() {
|
|
|
|
// close results chan when all workers are done
|
|
|
|
workerWg.Wait()
|
|
|
|
// not entirely sure why results don't get processed without this pause
|
|
|
|
// since we've put all results on the channel at this point.
|
|
|
|
time.Sleep(time.Second)
|
|
|
|
close(e.ResultsChan())
|
|
|
|
}()
|
|
|
|
|
2022-01-13 20:02:24 +00:00
|
|
|
return e
|
|
|
|
}
|
|
|
|
|
|
|
|
func (e *Engine) ChunksChan() chan *sources.Chunk {
|
|
|
|
return e.chunks
|
|
|
|
}
|
|
|
|
|
|
|
|
func (e *Engine) ResultsChan() chan detectors.ResultWithMetadata {
|
|
|
|
return e.results
|
|
|
|
}
|
|
|
|
|
2022-01-19 06:24:56 +00:00
|
|
|
func (e *Engine) ChunksScanned() uint64 {
|
|
|
|
return e.chunksScanned
|
|
|
|
}
|
|
|
|
|
2022-02-07 18:29:06 +00:00
|
|
|
func (e *Engine) DetectorAvgTime() map[string][]time.Duration {
|
|
|
|
return e.detectorAvgTime
|
|
|
|
}
|
|
|
|
|
2022-01-13 20:02:24 +00:00
|
|
|
func (e *Engine) detectorWorker(ctx context.Context) {
|
|
|
|
for chunk := range e.chunks {
|
|
|
|
for _, decoder := range e.decoders {
|
|
|
|
decoded := decoder.FromChunk(chunk)
|
|
|
|
if decoded == nil {
|
|
|
|
continue
|
|
|
|
}
|
|
|
|
dataLower := strings.ToLower(string(decoded.Data))
|
|
|
|
for verify, detectorsSet := range e.detectors {
|
|
|
|
for _, detector := range detectorsSet {
|
2022-02-07 18:29:06 +00:00
|
|
|
start := time.Now()
|
2022-01-13 20:02:24 +00:00
|
|
|
foundKeyword := false
|
|
|
|
for _, kw := range detector.Keywords() {
|
|
|
|
if strings.Contains(dataLower, strings.ToLower(kw)) {
|
|
|
|
foundKeyword = true
|
|
|
|
break
|
|
|
|
}
|
|
|
|
}
|
|
|
|
if !foundKeyword {
|
|
|
|
continue
|
|
|
|
}
|
|
|
|
results, err := detector.FromData(ctx, verify, decoded.Data)
|
|
|
|
if err != nil {
|
|
|
|
logrus.WithError(err).Error("could not scan chunk")
|
|
|
|
continue
|
|
|
|
}
|
|
|
|
for _, result := range results {
|
|
|
|
e.results <- detectors.CopyMetadata(chunk, result)
|
|
|
|
}
|
2022-02-07 18:29:06 +00:00
|
|
|
if len(results) > 0 {
|
|
|
|
elasped := time.Since(start)
|
|
|
|
detectorName := results[0].DetectorType.String()
|
|
|
|
e.detectorAvgTime[detectorName] = append(e.detectorAvgTime[detectorName], elasped)
|
|
|
|
}
|
2022-01-13 20:02:24 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
2022-01-19 06:24:56 +00:00
|
|
|
atomic.AddUint64(&e.chunksScanned, 1)
|
2022-01-13 20:02:24 +00:00
|
|
|
}
|
|
|
|
}
|