mirror of
https://github.com/trufflesecurity/trufflehog.git
synced 2024-11-10 07:04:24 +00:00
Modularize scanning engine (#2887)
* POC: Modularize scanning engine. * fix typo * update interface name * fix tests * update test * fix moar tests * fix bug * fixes. * fix merge * add detector verification overrides * handle --no-verification flag * support fp * add test * update name * filter * update test * explicit use of detector * updates
This commit is contained in:
parent
4addd81e29
commit
cb072603dc
6 changed files with 765 additions and 555 deletions
279
main.go
279
main.go
|
@ -1,6 +1,7 @@
|
|||
package main
|
||||
|
||||
import (
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
"io"
|
||||
"net/http"
|
||||
|
@ -24,8 +25,6 @@ import (
|
|||
"github.com/trufflesecurity/trufflehog/v3/pkg/common"
|
||||
"github.com/trufflesecurity/trufflehog/v3/pkg/config"
|
||||
"github.com/trufflesecurity/trufflehog/v3/pkg/context"
|
||||
"github.com/trufflesecurity/trufflehog/v3/pkg/decoders"
|
||||
"github.com/trufflesecurity/trufflehog/v3/pkg/detectors"
|
||||
"github.com/trufflesecurity/trufflehog/v3/pkg/engine"
|
||||
"github.com/trufflesecurity/trufflehog/v3/pkg/handlers"
|
||||
"github.com/trufflesecurity/trufflehog/v3/pkg/log"
|
||||
|
@ -352,88 +351,6 @@ func run(state overseer.State) {
|
|||
handlers.SetArchiveMaxTimeout(*archiveTimeout)
|
||||
}
|
||||
|
||||
// Build include and exclude detector sets for filtering on engine initialization.
|
||||
// Exit if there was an error to inform the user of the misconfiguration.
|
||||
var includeDetectorSet, excludeDetectorSet map[config.DetectorID]struct{}
|
||||
var detectorsWithCustomVerifierEndpoints map[config.DetectorID][]string
|
||||
{
|
||||
includeList, err := config.ParseDetectors(*includeDetectors)
|
||||
if err != nil {
|
||||
logFatal(err, "invalid include list detector configuration")
|
||||
}
|
||||
excludeList, err := config.ParseDetectors(*excludeDetectors)
|
||||
if err != nil {
|
||||
logFatal(err, "invalid exclude list detector configuration")
|
||||
}
|
||||
detectorsWithCustomVerifierEndpoints, err = config.ParseVerifierEndpoints(*verifiers)
|
||||
if err != nil {
|
||||
logFatal(err, "invalid verifier detector configuration")
|
||||
}
|
||||
includeDetectorSet = detectorTypeToSet(includeList)
|
||||
excludeDetectorSet = detectorTypeToSet(excludeList)
|
||||
}
|
||||
|
||||
// Verify that all the user-provided detectors support the optional
|
||||
// detector features.
|
||||
{
|
||||
if id, err := verifyDetectorsAreVersioner(includeDetectorSet); err != nil {
|
||||
logFatal(err, "invalid include list detector configuration", "detector", id)
|
||||
}
|
||||
if id, err := verifyDetectorsAreVersioner(excludeDetectorSet); err != nil {
|
||||
logFatal(err, "invalid exclude list detector configuration", "detector", id)
|
||||
}
|
||||
if id, err := verifyDetectorsAreVersioner(detectorsWithCustomVerifierEndpoints); err != nil {
|
||||
logFatal(err, "invalid verifier detector configuration", "detector", id)
|
||||
}
|
||||
// Extra check for endpoint customization.
|
||||
isEndpointCustomizer := engine.DefaultDetectorTypesImplementing[detectors.EndpointCustomizer]()
|
||||
for id := range detectorsWithCustomVerifierEndpoints {
|
||||
if _, ok := isEndpointCustomizer[id.ID]; !ok {
|
||||
logFatal(
|
||||
fmt.Errorf("endpoint provided but detector does not support endpoint customization"),
|
||||
"invalid custom verifier endpoint detector configuration",
|
||||
"detector", id,
|
||||
)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
includeFilter := func(d detectors.Detector) bool {
|
||||
_, ok := getWithDetectorID(d, includeDetectorSet)
|
||||
return ok
|
||||
}
|
||||
excludeFilter := func(d detectors.Detector) bool {
|
||||
_, ok := getWithDetectorID(d, excludeDetectorSet)
|
||||
return !ok
|
||||
}
|
||||
// Abuse filter to cause a side-effect.
|
||||
endpointCustomizer := func(d detectors.Detector) bool {
|
||||
urls, ok := getWithDetectorID(d, detectorsWithCustomVerifierEndpoints)
|
||||
if !ok {
|
||||
return true
|
||||
}
|
||||
id := config.GetDetectorID(d)
|
||||
customizer, ok := d.(detectors.EndpointCustomizer)
|
||||
if !ok {
|
||||
// NOTE: We should never reach here due to validation above.
|
||||
logFatal(
|
||||
fmt.Errorf("failed to configure a detector endpoint"),
|
||||
"the provided detector does not support endpoint configuration",
|
||||
"detector", id,
|
||||
)
|
||||
}
|
||||
if !*customVerifiersOnly || len(urls) == 0 {
|
||||
urls = append(urls, customizer.DefaultEndpoint())
|
||||
}
|
||||
if err := customizer.SetEndpoints(urls...); err != nil {
|
||||
logFatal(err, "failed configuring custom endpoint for detector", "detector", id)
|
||||
}
|
||||
logger.Info("configured detector with verification urls",
|
||||
"detector", id, "urls", urls,
|
||||
)
|
||||
return true
|
||||
}
|
||||
|
||||
// Set how the engine will print its results.
|
||||
var printer engine.Printer
|
||||
switch {
|
||||
|
@ -451,11 +368,6 @@ func run(state overseer.State) {
|
|||
fmt.Fprintf(os.Stderr, "🐷🔑🐷 TruffleHog. Unearth your secrets. 🐷🔑🐷\n\n")
|
||||
}
|
||||
|
||||
var jobReportWriter io.WriteCloser
|
||||
if *jobReportFile != nil {
|
||||
jobReportWriter = *jobReportFile
|
||||
}
|
||||
|
||||
// Parse --results flag.
|
||||
if *onlyVerified {
|
||||
r := "verified"
|
||||
|
@ -466,34 +378,31 @@ func run(state overseer.State) {
|
|||
logFatal(err, "failed to configure results flag")
|
||||
}
|
||||
|
||||
scanConfig := scanConfig{
|
||||
Command: cmd,
|
||||
Concurrency: *concurrency,
|
||||
Decoders: decoders.DefaultDecoders(),
|
||||
Conf: conf,
|
||||
IncludeFilter: includeFilter,
|
||||
ExcludeFilter: excludeFilter,
|
||||
EndpointCustomizer: endpointCustomizer,
|
||||
NoVerification: *noVerification,
|
||||
PrintAvgDetectorTime: *printAvgDetectorTime,
|
||||
FilterUnverified: *filterUnverified,
|
||||
FilterEntropy: *filterEntropy,
|
||||
ScanEntireChunk: *scanEntireChunk,
|
||||
JobReportWriter: jobReportWriter,
|
||||
AllowVerificationOverlap: *allowVerificationOverlap,
|
||||
ParsedResults: parsedResults,
|
||||
Printer: printer,
|
||||
engConf := engine.Config{
|
||||
Concurrency: *concurrency,
|
||||
Detectors: conf.Detectors,
|
||||
Verify: !*noVerification,
|
||||
IncludeDetectors: *includeDetectors,
|
||||
ExcludeDetectors: *excludeDetectors,
|
||||
CustomVerifiersOnly: *customVerifiersOnly,
|
||||
VerifierEndpoints: *verifiers,
|
||||
Dispatcher: engine.NewPrinterDispatcher(printer),
|
||||
FilterUnverified: *filterUnverified,
|
||||
FilterEntropy: *filterEntropy,
|
||||
VerificationOverlap: *allowVerificationOverlap,
|
||||
Results: parsedResults,
|
||||
PrintAvgDetectorTime: *printAvgDetectorTime,
|
||||
ShouldScanEntireChunk: *scanEntireChunk,
|
||||
}
|
||||
|
||||
if *compareDetectionStrategies {
|
||||
err := compareScans(ctx, scanConfig)
|
||||
if err != nil {
|
||||
if err := compareScans(ctx, cmd, engConf); err != nil {
|
||||
logFatal(err, "error comparing detection strategies")
|
||||
}
|
||||
return
|
||||
}
|
||||
|
||||
metrics, err := runSingleScan(ctx, scanConfig, *scanEntireChunk)
|
||||
metrics, err := runSingleScan(ctx, cmd, engConf)
|
||||
if err != nil {
|
||||
logFatal(err, "error running scan")
|
||||
}
|
||||
|
@ -514,26 +423,7 @@ func run(state overseer.State) {
|
|||
}
|
||||
}
|
||||
|
||||
type scanConfig struct {
|
||||
Command string
|
||||
Concurrency int
|
||||
Decoders []decoders.Decoder
|
||||
Conf *config.Config
|
||||
IncludeFilter func(detectors.Detector) bool
|
||||
ExcludeFilter func(detectors.Detector) bool
|
||||
EndpointCustomizer func(detectors.Detector) bool
|
||||
NoVerification bool
|
||||
PrintAvgDetectorTime bool
|
||||
FilterUnverified bool
|
||||
FilterEntropy float64
|
||||
ScanEntireChunk bool
|
||||
JobReportWriter io.WriteCloser
|
||||
AllowVerificationOverlap bool
|
||||
ParsedResults map[string]struct{}
|
||||
Printer engine.Printer
|
||||
}
|
||||
|
||||
func compareScans(ctx context.Context, cfg scanConfig) error {
|
||||
func compareScans(ctx context.Context, cmd string, cfg engine.Config) error {
|
||||
var (
|
||||
entireMetrics metrics
|
||||
maxLengthMetrics metrics
|
||||
|
@ -546,14 +436,15 @@ func compareScans(ctx context.Context, cfg scanConfig) error {
|
|||
go func() {
|
||||
defer wg.Done()
|
||||
// Run scan with entire chunk span calculator.
|
||||
entireMetrics, err = runSingleScan(ctx, cfg, true)
|
||||
cfg.ShouldScanEntireChunk = true
|
||||
entireMetrics, err = runSingleScan(ctx, cmd, cfg)
|
||||
if err != nil {
|
||||
ctx.Logger().Error(err, "error running scan with entire chunk span calculator")
|
||||
}
|
||||
}()
|
||||
|
||||
// Run scan with max-length span calculator.
|
||||
maxLengthMetrics, err = runSingleScan(ctx, cfg, false)
|
||||
maxLengthMetrics, err = runSingleScan(ctx, cmd, cfg)
|
||||
if err != nil {
|
||||
return fmt.Errorf("error running scan with custom span calculator: %v", err)
|
||||
}
|
||||
|
@ -585,28 +476,65 @@ type metrics struct {
|
|||
hasFoundResults bool
|
||||
}
|
||||
|
||||
func runSingleScan(ctx context.Context, cfg scanConfig, scanEntireChunk bool) (metrics, error) {
|
||||
eng, err := engine.Start(ctx,
|
||||
engine.WithConcurrency(cfg.Concurrency),
|
||||
engine.WithDecoders(cfg.Decoders...),
|
||||
engine.WithDetectors(engine.DefaultDetectors()...),
|
||||
engine.WithDetectors(cfg.Conf.Detectors...),
|
||||
engine.WithVerify(!cfg.NoVerification),
|
||||
engine.WithFilterDetectors(cfg.IncludeFilter),
|
||||
engine.WithFilterDetectors(cfg.ExcludeFilter),
|
||||
engine.WithFilterDetectors(cfg.EndpointCustomizer),
|
||||
engine.WithFilterUnverified(cfg.FilterUnverified),
|
||||
engine.WithResults(cfg.ParsedResults),
|
||||
engine.WithPrintAvgDetectorTime(cfg.PrintAvgDetectorTime),
|
||||
engine.WithPrinter(cfg.Printer),
|
||||
engine.WithFilterEntropy(cfg.FilterEntropy),
|
||||
engine.WithVerificationOverlap(cfg.AllowVerificationOverlap),
|
||||
engine.WithEntireChunkScan(scanEntireChunk),
|
||||
)
|
||||
if err != nil {
|
||||
return metrics{}, fmt.Errorf("error initializing engine: %v", err)
|
||||
func runSingleScan(ctx context.Context, cmd string, cfg engine.Config) (metrics, error) {
|
||||
var scanMetrics metrics
|
||||
|
||||
// Setup job report writer if provided
|
||||
var jobReportWriter io.WriteCloser
|
||||
if *jobReportFile != nil {
|
||||
jobReportWriter = *jobReportFile
|
||||
}
|
||||
|
||||
handleFinishedMetrics := func(ctx context.Context, finishedMetrics <-chan sources.UnitMetrics, jobReportWriter io.WriteCloser) {
|
||||
go func() {
|
||||
defer func() {
|
||||
jobReportWriter.Close()
|
||||
if namer, ok := jobReportWriter.(interface{ Name() string }); ok {
|
||||
ctx.Logger().Info("report written", "path", namer.Name())
|
||||
} else {
|
||||
ctx.Logger().Info("report written")
|
||||
}
|
||||
}()
|
||||
|
||||
for metrics := range finishedMetrics {
|
||||
metrics.Errors = common.ExportErrors(metrics.Errors...)
|
||||
details, err := json.Marshal(map[string]any{
|
||||
"version": 1,
|
||||
"data": metrics,
|
||||
})
|
||||
if err != nil {
|
||||
ctx.Logger().Error(err, "error marshalling job details")
|
||||
continue
|
||||
}
|
||||
if _, err := jobReportWriter.Write(append(details, '\n')); err != nil {
|
||||
ctx.Logger().Error(err, "error writing to file")
|
||||
}
|
||||
}
|
||||
}()
|
||||
}
|
||||
|
||||
const defaultOutputBufferSize = 64
|
||||
opts := []func(*sources.SourceManager){
|
||||
sources.WithConcurrentSources(cfg.Concurrency),
|
||||
sources.WithConcurrentUnits(cfg.Concurrency),
|
||||
sources.WithSourceUnits(),
|
||||
sources.WithBufferedOutput(defaultOutputBufferSize),
|
||||
}
|
||||
|
||||
if jobReportWriter != nil {
|
||||
unitHook, finishedMetrics := sources.NewUnitHook(ctx)
|
||||
opts = append(opts, sources.WithReportHook(unitHook))
|
||||
handleFinishedMetrics(ctx, finishedMetrics, jobReportWriter)
|
||||
}
|
||||
|
||||
cfg.SourceManager = sources.NewManager(opts...)
|
||||
|
||||
eng, err := engine.NewEngine(ctx, &cfg)
|
||||
if err != nil {
|
||||
return scanMetrics, fmt.Errorf("error initializing engine: %v", err)
|
||||
}
|
||||
eng.Start(ctx)
|
||||
|
||||
defer func() {
|
||||
// Clean up temporary artifacts.
|
||||
if err := cleantemp.CleanTempArtifacts(ctx); err != nil {
|
||||
|
@ -614,8 +542,7 @@ func runSingleScan(ctx context.Context, cfg scanConfig, scanEntireChunk bool) (m
|
|||
}
|
||||
}()
|
||||
|
||||
var scanMetrics metrics
|
||||
switch cfg.Command {
|
||||
switch cmd {
|
||||
case gitScan.FullCommand():
|
||||
gitCfg := sources.GitConfig{
|
||||
URI: *gitScanURI,
|
||||
|
@ -812,7 +739,7 @@ func runSingleScan(ctx context.Context, cfg scanConfig, scanEntireChunk bool) (m
|
|||
return scanMetrics, fmt.Errorf("failed to scan Jenkins: %v", err)
|
||||
}
|
||||
default:
|
||||
return scanMetrics, fmt.Errorf("invalid command: %s", cfg.Command)
|
||||
return scanMetrics, fmt.Errorf("invalid command: %s", cmd)
|
||||
}
|
||||
|
||||
// Wait for all workers to finish.
|
||||
|
@ -887,47 +814,3 @@ func printAverageDetectorTime(e *engine.Engine) {
|
|||
fmt.Fprintf(os.Stderr, "%s: %s\n", detectorName, duration)
|
||||
}
|
||||
}
|
||||
|
||||
// detectorTypeToSet is a helper function to convert a slice of detector IDs into a set.
|
||||
func detectorTypeToSet(detectors []config.DetectorID) map[config.DetectorID]struct{} {
|
||||
out := make(map[config.DetectorID]struct{}, len(detectors))
|
||||
for _, d := range detectors {
|
||||
out[d] = struct{}{}
|
||||
}
|
||||
return out
|
||||
}
|
||||
|
||||
// getWithDetectorID is a helper function to get a value from a map using a
|
||||
// detector's ID. This function behaves like a normal map lookup, with an extra
|
||||
// step of checking for the non-specific version of a detector.
|
||||
func getWithDetectorID[T any](d detectors.Detector, data map[config.DetectorID]T) (T, bool) {
|
||||
key := config.GetDetectorID(d)
|
||||
// Check if the specific ID is provided.
|
||||
if t, ok := data[key]; ok || key.Version == 0 {
|
||||
return t, ok
|
||||
}
|
||||
// Check if the generic type is provided without a version.
|
||||
// This means "all" versions of a type.
|
||||
key.Version = 0
|
||||
t, ok := data[key]
|
||||
return t, ok
|
||||
}
|
||||
|
||||
// verifyDetectorsAreVersioner checks all keys in a provided map to verify the
|
||||
// provided type is actually a Versioner.
|
||||
func verifyDetectorsAreVersioner[T any](data map[config.DetectorID]T) (config.DetectorID, error) {
|
||||
isVersioner := engine.DefaultDetectorTypesImplementing[detectors.Versioner]()
|
||||
for id := range data {
|
||||
if id.Version == 0 {
|
||||
// Version not provided.
|
||||
continue
|
||||
}
|
||||
if _, ok := isVersioner[id.ID]; ok {
|
||||
// Version provided for a Versioner detector.
|
||||
continue
|
||||
}
|
||||
// Version provided on a non-Versioner detector.
|
||||
return id, fmt.Errorf("version provided but detector does not have a version")
|
||||
}
|
||||
return config.DetectorID{}, nil
|
||||
}
|
||||
|
|
|
@ -121,10 +121,17 @@ func unwrapToLast(err error) error {
|
|||
}
|
||||
|
||||
type ResultWithMetadata struct {
|
||||
// IsWordlistFalsePositive indicates whether this secret was flagged as a false positive based on a wordlist check
|
||||
IsWordlistFalsePositive bool
|
||||
// SourceMetadata contains source-specific contextual information.
|
||||
SourceMetadata *source_metadatapb.MetaData
|
||||
// SourceID is the ID of the source that the API uses to map secrets to specific sources.
|
||||
SourceID sources.SourceID
|
||||
// JobID is the ID of the job that the API uses to map secrets to specific jobs.
|
||||
JobID sources.JobID
|
||||
// SecretID is the ID of the secret, if it exists.
|
||||
// Only secrets that are being reverified will have a SecretID.
|
||||
SecretID int64
|
||||
// SourceType is the type of Source.
|
||||
SourceType sourcespb.SourceType
|
||||
// SourceName is the name of the Source.
|
||||
|
@ -139,6 +146,8 @@ func CopyMetadata(chunk *sources.Chunk, result Result) ResultWithMetadata {
|
|||
return ResultWithMetadata{
|
||||
SourceMetadata: chunk.SourceMetadata,
|
||||
SourceID: chunk.SourceID,
|
||||
JobID: chunk.JobID,
|
||||
SecretID: chunk.SecretID,
|
||||
SourceType: chunk.SourceType,
|
||||
SourceName: chunk.SourceName,
|
||||
Result: result,
|
||||
|
|
|
@ -2,10 +2,8 @@ package engine
|
|||
|
||||
import (
|
||||
"bytes"
|
||||
"encoding/json"
|
||||
"errors"
|
||||
"fmt"
|
||||
"io"
|
||||
"runtime"
|
||||
"sync"
|
||||
"sync/atomic"
|
||||
|
@ -54,24 +52,106 @@ type runtimeMetrics struct {
|
|||
detectorAvgTime sync.Map
|
||||
}
|
||||
|
||||
// getScanDuration returns the duration of the scan.
|
||||
// If the scan is still running, it returns the time since the scan started.
|
||||
func (m *Metrics) getScanDuration() time.Duration {
|
||||
if m.ScanDuration == 0 {
|
||||
return time.Since(m.scanStartTime)
|
||||
}
|
||||
|
||||
return m.ScanDuration
|
||||
}
|
||||
|
||||
// ResultsDispatcher is an interface for dispatching findings of detected results.
|
||||
// Implementations can vary from printing results to the console to sending results to an external system.
|
||||
type ResultsDispatcher interface {
|
||||
Dispatch(ctx context.Context, result detectors.ResultWithMetadata) error
|
||||
}
|
||||
|
||||
// Printer is used to format found results and output them to the user. Ex JSON, plain text, etc.
|
||||
// Please note printer implementations SHOULD BE thread safe.
|
||||
type Printer interface {
|
||||
Print(ctx context.Context, r *detectors.ResultWithMetadata) error
|
||||
}
|
||||
|
||||
// PrinterDispatcher wraps an existing Printer implementation and adapts it to the ResultsDispatcher interface.
|
||||
type PrinterDispatcher struct{ printer Printer }
|
||||
|
||||
// NewPrinterDispatcher creates a new PrinterDispatcher instance with the provided Printer.
|
||||
func NewPrinterDispatcher(printer Printer) *PrinterDispatcher { return &PrinterDispatcher{printer} }
|
||||
|
||||
// Dispatch sends the result to the printer.
|
||||
func (p *PrinterDispatcher) Dispatch(ctx context.Context, result detectors.ResultWithMetadata) error {
|
||||
return p.printer.Print(ctx, &result)
|
||||
}
|
||||
|
||||
// Config used to configure the engine.
|
||||
type Config struct {
|
||||
// Number of concurrent scanner workers,
|
||||
// also serves as a multiplier for other worker types (e.g., detector workers, notifier workers)
|
||||
Concurrency int
|
||||
|
||||
Decoders []decoders.Decoder
|
||||
Detectors []detectors.Detector
|
||||
DetectorVerificationOverrides map[config.DetectorID]bool
|
||||
IncludeDetectors string
|
||||
ExcludeDetectors string
|
||||
CustomVerifiersOnly bool
|
||||
VerifierEndpoints map[string]string
|
||||
|
||||
// Verify determines whether the scanner will verify candidate secrets.
|
||||
Verify bool
|
||||
|
||||
// Defines which results will be notified by the engine
|
||||
// (e.g., verified, unverified, unknown)
|
||||
Results map[string]struct{}
|
||||
LogFilteredUnverified bool
|
||||
|
||||
// FilterEntropy filters out unverified results using Shannon entropy.
|
||||
FilterEntropy float64
|
||||
// FilterUnverified sets the filterUnverified flag on the engine. If set to
|
||||
// true, the engine will only return the first unverified result for a chunk for a detector.
|
||||
FilterUnverified bool
|
||||
ShouldScanEntireChunk bool
|
||||
|
||||
Dispatcher ResultsDispatcher
|
||||
|
||||
// SourceManager is used to manage the sources and units.
|
||||
// TODO (ahrav): Update this comment, i'm dumb and don't really know what else it does.
|
||||
SourceManager *sources.SourceManager
|
||||
|
||||
// PrintAvgDetectorTime sets the printAvgDetectorTime flag on the engine. If set to
|
||||
// true, the engine will print the average time taken by each detector.
|
||||
// This option allows us to measure the time taken for each detector ONLY if
|
||||
// the engine is configured to print the results.
|
||||
// Calculating the average time taken by each detector is an expensive operation
|
||||
// and should be avoided unless specified by the user.
|
||||
PrintAvgDetectorTime bool
|
||||
|
||||
// VerificationOverlap determines whether the scanner will attempt to verify candidate secrets
|
||||
// that have been detected by multiple detectors.
|
||||
// By default, it is set to true.
|
||||
VerificationOverlap bool
|
||||
}
|
||||
|
||||
// Engine represents the core scanning engine responsible for detecting secrets in input data.
|
||||
// It manages the lifecycle of the scanning process, including initialization, worker management,
|
||||
// and result notification. The engine is designed to be flexible and configurable, allowing for
|
||||
// customization through various options and configurations.
|
||||
type Engine struct {
|
||||
// CLI flags.
|
||||
concurrency int
|
||||
decoders []decoders.Decoder
|
||||
detectors []detectors.Detector
|
||||
jobReportWriter io.WriteCloser
|
||||
concurrency int
|
||||
decoders []decoders.Decoder
|
||||
detectors []detectors.Detector
|
||||
// Any detectors configured to override sources' verification flags
|
||||
detectorVerificationOverrides map[config.DetectorID]bool
|
||||
|
||||
// filterUnverified is used to reduce the number of unverified results.
|
||||
// If there are multiple unverified results for the same chunk for the same detector,
|
||||
// only the first one will be kept.
|
||||
filterUnverified bool
|
||||
// entropyFilter is used to filter out unverified results using Shannon entropy.
|
||||
filterEntropy *float64
|
||||
filterEntropy float64
|
||||
notifyVerifiedResults bool
|
||||
notifyUnverifiedResults bool
|
||||
notifyUnknownResults bool
|
||||
|
@ -100,152 +180,242 @@ type Engine struct {
|
|||
// numFoundResults is used to keep track of the number of results found.
|
||||
numFoundResults uint32
|
||||
|
||||
// printer provides a method for formatting and outputting search results.
|
||||
// The specific implementation (e.g., JSON, plain text)
|
||||
// should be set during initialization based on user preference or program requirements.
|
||||
printer Printer
|
||||
// ResultsDispatcher is used to send results.
|
||||
dispatcher ResultsDispatcher
|
||||
|
||||
// dedupeCache is used to deduplicate results by comparing the
|
||||
// detector type, raw result, and source metadata
|
||||
dedupeCache *lru.Cache[string, detectorspb.DecoderType]
|
||||
|
||||
// verify determines whether the scanner will attempt to verify candidate secrets
|
||||
// verify determines whether the scanner will attempt to verify candidate secrets.
|
||||
verify bool
|
||||
|
||||
// Note: bad hack only used for testing
|
||||
// Note: bad hack only used for testing.
|
||||
verificationOverlapTracker *verificationOverlapTracker
|
||||
}
|
||||
|
||||
type verificationOverlapTracker struct {
|
||||
verificationOverlapDuplicateCount int
|
||||
mu sync.Mutex
|
||||
}
|
||||
|
||||
func (r *verificationOverlapTracker) increment() {
|
||||
r.mu.Lock()
|
||||
r.verificationOverlapDuplicateCount++
|
||||
r.mu.Unlock()
|
||||
}
|
||||
|
||||
// Option is used to configure the engine during initialization using functional options.
|
||||
type Option func(*Engine)
|
||||
|
||||
func WithJobReportWriter(w io.WriteCloser) Option {
|
||||
return func(e *Engine) {
|
||||
e.jobReportWriter = w
|
||||
// NewEngine creates a new Engine instance with the provided configuration.
|
||||
func NewEngine(ctx context.Context, cfg *Config) (*Engine, error) {
|
||||
engine := &Engine{
|
||||
concurrency: cfg.Concurrency,
|
||||
decoders: cfg.Decoders,
|
||||
detectors: cfg.Detectors,
|
||||
dispatcher: cfg.Dispatcher,
|
||||
verify: cfg.Verify,
|
||||
filterUnverified: cfg.FilterUnverified,
|
||||
filterEntropy: cfg.FilterEntropy,
|
||||
printAvgDetectorTime: cfg.PrintAvgDetectorTime,
|
||||
logFilteredUnverified: cfg.LogFilteredUnverified,
|
||||
verificationOverlap: cfg.VerificationOverlap,
|
||||
sourceManager: cfg.SourceManager,
|
||||
scanEntireChunk: cfg.ShouldScanEntireChunk,
|
||||
detectorVerificationOverrides: cfg.DetectorVerificationOverrides,
|
||||
}
|
||||
}
|
||||
|
||||
func WithConcurrency(concurrency int) Option {
|
||||
return func(e *Engine) {
|
||||
e.concurrency = concurrency
|
||||
if engine.sourceManager == nil {
|
||||
return nil, fmt.Errorf("source manager is required")
|
||||
}
|
||||
}
|
||||
|
||||
const ignoreTag = "trufflehog:ignore"
|
||||
engine.setDefaults(ctx)
|
||||
|
||||
func WithDetectors(d ...detectors.Detector) Option {
|
||||
return func(e *Engine) {
|
||||
e.detectors = append(e.detectors, d...)
|
||||
// Build include and exclude detector sets for filtering on engine initialization.
|
||||
includeDetectorSet, excludeDetectorSet, err := buildDetectorSets(cfg)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
}
|
||||
|
||||
func WithDecoders(decoders ...decoders.Decoder) Option {
|
||||
return func(e *Engine) {
|
||||
e.decoders = decoders
|
||||
// Apply include/exclude filters.
|
||||
var filters []func(detectors.Detector) bool
|
||||
|
||||
if len(includeDetectorSet) > 0 {
|
||||
filters = append(filters, func(d detectors.Detector) bool {
|
||||
_, ok := getWithDetectorID(d, includeDetectorSet)
|
||||
return ok
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
// WithFilterUnverified sets the filterUnverified flag on the engine. If set to
|
||||
// true, the engine will only return the first unverified result for a chunk for a detector.
|
||||
func WithFilterUnverified(filter bool) Option {
|
||||
return func(e *Engine) {
|
||||
e.filterUnverified = filter
|
||||
if len(excludeDetectorSet) > 0 {
|
||||
filters = append(filters, func(d detectors.Detector) bool {
|
||||
_, ok := getWithDetectorID(d, excludeDetectorSet)
|
||||
return !ok
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
// WithFilterEntropy filters out unverified results using Shannon entropy.
|
||||
func WithFilterEntropy(entropy float64) Option {
|
||||
return func(e *Engine) {
|
||||
if entropy > 0 {
|
||||
e.filterEntropy = &entropy
|
||||
}
|
||||
// Apply custom verifier endpoints to detectors that support it.
|
||||
detectorsWithCustomVerifierEndpoints, err := parseCustomVerifierEndpoints(cfg.VerifierEndpoints)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
}
|
||||
|
||||
// WithResults defines which results will be printed by the engine.
|
||||
func WithResults(results map[string]struct{}) Option {
|
||||
return func(e *Engine) {
|
||||
if len(results) == 0 {
|
||||
return
|
||||
}
|
||||
if len(detectorsWithCustomVerifierEndpoints) > 0 {
|
||||
filters = append(filters, func(d detectors.Detector) bool {
|
||||
urls, ok := getWithDetectorID(d, detectorsWithCustomVerifierEndpoints)
|
||||
if !ok {
|
||||
return true
|
||||
}
|
||||
customizer, ok := d.(detectors.EndpointCustomizer)
|
||||
if !ok {
|
||||
return false
|
||||
}
|
||||
|
||||
if !cfg.CustomVerifiersOnly || len(urls) == 0 {
|
||||
urls = append(urls, customizer.DefaultEndpoint())
|
||||
}
|
||||
if err := customizer.SetEndpoints(urls...); err != nil {
|
||||
return false
|
||||
}
|
||||
return true
|
||||
})
|
||||
}
|
||||
engine.applyFilters(filters...)
|
||||
|
||||
if results := cfg.Results; len(results) > 0 {
|
||||
_, ok := results["verified"]
|
||||
e.notifyVerifiedResults = ok
|
||||
engine.notifyVerifiedResults = ok
|
||||
|
||||
_, ok = results["unknown"]
|
||||
e.notifyUnknownResults = ok
|
||||
engine.notifyUnknownResults = ok
|
||||
|
||||
_, ok = results["unverified"]
|
||||
e.notifyUnverifiedResults = ok
|
||||
engine.notifyUnverifiedResults = ok
|
||||
|
||||
_, ok = results["filtered_unverified"]
|
||||
e.logFilteredUnverified = ok
|
||||
engine.logFilteredUnverified = ok
|
||||
}
|
||||
|
||||
if err := engine.initialize(ctx); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
return engine, nil
|
||||
}
|
||||
|
||||
// WithPrintAvgDetectorTime sets the printAvgDetectorTime flag on the engine. If set to
|
||||
// true, the engine will print the average time taken by each detector.
|
||||
// This option allows us to measure the time taken for each detector ONLY if
|
||||
// the engine is configured to print the results.
|
||||
// Calculating the average time taken by each detector is an expensive operation
|
||||
// and should be avoided unless specified by the user.
|
||||
func WithPrintAvgDetectorTime(printAvgDetectorTime bool) Option {
|
||||
return func(e *Engine) {
|
||||
e.printAvgDetectorTime = printAvgDetectorTime
|
||||
// setDefaults ensures that if specific engine properties aren't provided,
|
||||
// they're set to reasonable default values. It makes the engine robust to
|
||||
// incomplete configuration.
|
||||
func (e *Engine) setDefaults(ctx context.Context) {
|
||||
if e.concurrency == 0 {
|
||||
numCPU := runtime.NumCPU()
|
||||
ctx.Logger().Info("No concurrency specified, defaulting to max", "cpu", numCPU)
|
||||
e.concurrency = numCPU
|
||||
}
|
||||
ctx.Logger().V(3).Info("engine started", "workers", e.concurrency)
|
||||
|
||||
// Default decoders handle common encoding formats.
|
||||
if len(e.decoders) == 0 {
|
||||
e.decoders = decoders.DefaultDecoders()
|
||||
}
|
||||
|
||||
if len(e.detectors) == 0 {
|
||||
e.detectors = DefaultDetectors()
|
||||
}
|
||||
|
||||
if e.dispatcher == nil {
|
||||
e.dispatcher = NewPrinterDispatcher(new(output.PlainPrinter))
|
||||
}
|
||||
e.notifyVerifiedResults = true
|
||||
e.notifyUnverifiedResults = true
|
||||
e.notifyUnknownResults = true
|
||||
|
||||
ctx.Logger().V(4).Info("default engine options set")
|
||||
}
|
||||
|
||||
// WithFilterDetectors applies a filter to the configured list of detectors. If
|
||||
// the filterFunc returns true, the detector will be included for scanning.
|
||||
// This option applies to the existing list of detectors configured, so the
|
||||
// order this option appears matters. All filtering happens before scanning.
|
||||
func WithFilterDetectors(filterFunc func(detectors.Detector) bool) Option {
|
||||
return func(e *Engine) {
|
||||
// If no detectors are configured, do nothing.
|
||||
if e.detectors == nil {
|
||||
return
|
||||
}
|
||||
e.detectors = filterDetectors(filterFunc, e.detectors)
|
||||
func buildDetectorSets(cfg *Config) (map[config.DetectorID]struct{}, map[config.DetectorID]struct{}, error) {
|
||||
includeList, err := config.ParseDetectors(cfg.IncludeDetectors)
|
||||
if err != nil {
|
||||
return nil, nil, fmt.Errorf("invalid include list detector configuration: %w", err)
|
||||
}
|
||||
excludeList, err := config.ParseDetectors(cfg.ExcludeDetectors)
|
||||
if err != nil {
|
||||
return nil, nil, fmt.Errorf("invalid exclude list detector configuration: %w", err)
|
||||
}
|
||||
|
||||
includeDetectorSet := detectorTypeToSet(includeList)
|
||||
excludeDetectorSet := detectorTypeToSet(excludeList)
|
||||
|
||||
// Verify that all the user-provided detectors support the optional
|
||||
// detector features.
|
||||
if id, err := verifyDetectorsAreVersioner(includeDetectorSet); err != nil {
|
||||
return nil, nil, fmt.Errorf("invalid include list detector configuration id %v: %w", id, err)
|
||||
}
|
||||
|
||||
if id, err := verifyDetectorsAreVersioner(excludeDetectorSet); err != nil {
|
||||
return nil, nil, fmt.Errorf("invalid exclude list detector configuration id %v: %w", id, err)
|
||||
}
|
||||
|
||||
return includeDetectorSet, excludeDetectorSet, nil
|
||||
}
|
||||
|
||||
// WithPrinter sets the Printer on the engine.
|
||||
func WithPrinter(printer Printer) Option {
|
||||
return func(e *Engine) {
|
||||
e.printer = printer
|
||||
func parseCustomVerifierEndpoints(endpoints map[string]string) (map[config.DetectorID][]string, error) {
|
||||
if len(endpoints) == 0 {
|
||||
return nil, nil
|
||||
}
|
||||
}
|
||||
|
||||
// WithVerify configures whether the scanner will verify candidate secrets.
|
||||
func WithVerify(verify bool) Option {
|
||||
return func(e *Engine) {
|
||||
e.verify = verify
|
||||
customVerifierEndpoints, err := config.ParseVerifierEndpoints(endpoints)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("invalid verifier detector configuration: %w", err)
|
||||
}
|
||||
}
|
||||
|
||||
func withVerificationOverlapTracking() Option {
|
||||
return func(e *Engine) {
|
||||
e.verificationOverlapTracker = &verificationOverlapTracker{
|
||||
verificationOverlapDuplicateCount: 0,
|
||||
if id, err := verifyDetectorsAreVersioner(customVerifierEndpoints); err != nil {
|
||||
return nil, fmt.Errorf("invalid verifier detector configuration id %v: %w", id, err)
|
||||
}
|
||||
// Extra check for endpoint customization.
|
||||
isEndpointCustomizer := DefaultDetectorTypesImplementing[detectors.EndpointCustomizer]()
|
||||
for id := range customVerifierEndpoints {
|
||||
if _, ok := isEndpointCustomizer[id.ID]; !ok {
|
||||
return nil, fmt.Errorf("endpoint provided but detector does not support endpoint customization: %w", err)
|
||||
}
|
||||
}
|
||||
return customVerifierEndpoints, nil
|
||||
}
|
||||
|
||||
// WithVerificationOverlap
|
||||
func WithVerificationOverlap(verificationOverlap bool) Option {
|
||||
return func(e *Engine) {
|
||||
e.verificationOverlap = verificationOverlap
|
||||
// detectorTypeToSet is a helper function to convert a slice of detector IDs into a set.
|
||||
func detectorTypeToSet(detectors []config.DetectorID) map[config.DetectorID]struct{} {
|
||||
out := make(map[config.DetectorID]struct{}, len(detectors))
|
||||
for _, d := range detectors {
|
||||
out[d] = struct{}{}
|
||||
}
|
||||
return out
|
||||
}
|
||||
|
||||
// getWithDetectorID is a helper function to get a value from a map using a
|
||||
// detector's ID. This function behaves like a normal map lookup, with an extra
|
||||
// step of checking for the non-specific version of a detector.
|
||||
func getWithDetectorID[T any](d detectors.Detector, data map[config.DetectorID]T) (T, bool) {
|
||||
key := config.GetDetectorID(d)
|
||||
// Check if the specific ID is provided.
|
||||
if t, ok := data[key]; ok || key.Version == 0 {
|
||||
return t, ok
|
||||
}
|
||||
// Check if the generic type is provided without a version.
|
||||
// This means "all" versions of a type.
|
||||
key.Version = 0
|
||||
t, ok := data[key]
|
||||
return t, ok
|
||||
}
|
||||
|
||||
// verifyDetectorsAreVersioner checks all keys in a provided map to verify the
|
||||
// provided type is actually a Versioner.
|
||||
func verifyDetectorsAreVersioner[T any](data map[config.DetectorID]T) (config.DetectorID, error) {
|
||||
isVersioner := DefaultDetectorTypesImplementing[detectors.Versioner]()
|
||||
for id := range data {
|
||||
if id.Version == 0 {
|
||||
// Version not provided.
|
||||
continue
|
||||
}
|
||||
if _, ok := isVersioner[id.ID]; ok {
|
||||
// Version provided for a Versioner detector.
|
||||
continue
|
||||
}
|
||||
// Version provided on a non-Versioner detector.
|
||||
return id, fmt.Errorf("version provided but detector does not have a version")
|
||||
}
|
||||
return config.DetectorID{}, nil
|
||||
}
|
||||
|
||||
// applyFilters applies a variable number of filters to the detectors.
|
||||
func (e *Engine) applyFilters(filters ...func(detectors.Detector) bool) {
|
||||
for _, filter := range filters {
|
||||
e.detectors = filterDetectors(filter, e.detectors)
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -259,11 +429,68 @@ func filterDetectors(filterFunc func(detectors.Detector) bool, input []detectors
|
|||
return out
|
||||
}
|
||||
|
||||
// WithEntireChunkScan sets the flag to configure AhoCorasickCore to scan entire chunks.
|
||||
func WithEntireChunkScan(enabled bool) Option {
|
||||
return func(e *Engine) { e.scanEntireChunk = enabled }
|
||||
// initialize prepares the engine's internal structures. The LRU cache optimizes
|
||||
// deduplication efforts, allowing the engine to quickly check if a chunk has
|
||||
// been processed before, thereby saving computational overhead.
|
||||
func (e *Engine) initialize(ctx context.Context) error {
|
||||
// TODO (ahrav): Determine the optimal cache size.
|
||||
const cacheSize = 512 // number of entries in the LRU cache
|
||||
|
||||
cache, err := lru.New[string, detectorspb.DecoderType](cacheSize)
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to initialize LRU cache: %w", err)
|
||||
}
|
||||
const (
|
||||
// detectableChunksChanMultiplier is set to accommodate a high number of concurrent worker goroutines.
|
||||
// This multiplier ensures that the detectableChunksChan channel has sufficient buffer capacity
|
||||
// to hold messages from multiple worker groups (detector workers/ verificationOverlap workers) without blocking.
|
||||
// A large buffer helps accommodate for the fact workers are producing data at a faster rate
|
||||
// than it can be consumed.
|
||||
detectableChunksChanMultiplier = 50
|
||||
// verificationOverlapChunksChanMultiplier uses a smaller buffer compared to detectableChunksChanMultiplier.
|
||||
// This reflects the anticipated lower volume of data that needs re-verification.
|
||||
// The buffer size is a trade-off between memory usage and the need to prevent blocking.
|
||||
verificationOverlapChunksChanMultiplier = 25
|
||||
)
|
||||
|
||||
// Channels are used for communication between different parts of the engine,
|
||||
// ensuring that data flows smoothly without race conditions.
|
||||
// The buffer sizes for these channels are set to multiples of defaultChannelBuffer,
|
||||
// considering the expected concurrency and workload in the system.
|
||||
e.detectableChunksChan = make(chan detectableChunk, defaultChannelBuffer*detectableChunksChanMultiplier)
|
||||
e.verificationOverlapChunksChan = make(
|
||||
chan verificationOverlapChunk, defaultChannelBuffer*verificationOverlapChunksChanMultiplier,
|
||||
)
|
||||
e.results = make(chan detectors.ResultWithMetadata, defaultChannelBuffer)
|
||||
e.dedupeCache = cache
|
||||
ctx.Logger().V(4).Info("engine initialized")
|
||||
|
||||
// Configure the EntireChunkSpanCalculator if the engine is set to scan the entire chunk.
|
||||
var ahoCOptions []ahocorasick.CoreOption
|
||||
if e.scanEntireChunk {
|
||||
ahoCOptions = append(ahoCOptions, ahocorasick.WithSpanCalculator(new(ahocorasick.EntireChunkSpanCalculator)))
|
||||
}
|
||||
|
||||
ctx.Logger().V(4).Info("setting up aho-corasick core")
|
||||
e.ahoCorasickCore = ahocorasick.NewAhoCorasickCore(e.detectors, ahoCOptions...)
|
||||
ctx.Logger().V(4).Info("set up aho-corasick core")
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
type verificationOverlapTracker struct {
|
||||
verificationOverlapDuplicateCount int
|
||||
mu sync.Mutex
|
||||
}
|
||||
|
||||
func (r *verificationOverlapTracker) increment() {
|
||||
r.mu.Lock()
|
||||
r.verificationOverlapDuplicateCount++
|
||||
r.mu.Unlock()
|
||||
}
|
||||
|
||||
const ignoreTag = "trufflehog:ignore"
|
||||
|
||||
// HasFoundResults returns true if any results are found.
|
||||
func (e *Engine) HasFoundResults() bool {
|
||||
return atomic.LoadUint32(&e.numFoundResults) > 0
|
||||
|
@ -310,16 +537,6 @@ func (e *Engine) GetDetectorsMetrics() map[string]time.Duration {
|
|||
return result
|
||||
}
|
||||
|
||||
// getScanDuration returns the duration of the scan.
|
||||
// If the scan is still running, it returns the time since the scan started.
|
||||
func (m *Metrics) getScanDuration() time.Duration {
|
||||
if m.ScanDuration == 0 {
|
||||
return time.Since(m.scanStartTime)
|
||||
}
|
||||
|
||||
return m.ScanDuration
|
||||
}
|
||||
|
||||
// DetectorAvgTime returns the average time taken by each detector.
|
||||
func (e *Engine) DetectorAvgTime() map[string][]time.Duration {
|
||||
logger := context.Background().Logger()
|
||||
|
@ -344,147 +561,16 @@ func (e *Engine) DetectorAvgTime() map[string][]time.Duration {
|
|||
|
||||
// Start initializes and activates the engine's processing pipeline.
|
||||
// It sets up various default configurations, prepares lookup structures for
|
||||
// detectors, conducts basic sanity checks, and kickstarts all necessary workers.
|
||||
// Once started, the engine begins processing input data to identify secrets.
|
||||
func Start(ctx context.Context, options ...Option) (*Engine, error) {
|
||||
e := &Engine{}
|
||||
|
||||
if err := e.initialize(ctx, options...); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
e.initSourceManager(ctx)
|
||||
e.setDefaults(ctx)
|
||||
// detectors, and kickstarts all necessary workers. Once started, the engine
|
||||
// begins processing input data to identify secrets.
|
||||
func (e *Engine) Start(ctx context.Context) {
|
||||
e.metrics = runtimeMetrics{Metrics: Metrics{scanStartTime: time.Now()}}
|
||||
e.sanityChecks(ctx)
|
||||
e.startWorkers(ctx)
|
||||
|
||||
return e, nil
|
||||
}
|
||||
|
||||
var defaultChannelBuffer = runtime.NumCPU()
|
||||
|
||||
// initialize prepares the engine's internal structures. The LRU cache optimizes
|
||||
// deduplication efforts, allowing the engine to quickly check if a chunk has
|
||||
// been processed before, thereby saving computational overhead.
|
||||
func (e *Engine) initialize(ctx context.Context, options ...Option) error {
|
||||
// TODO (ahrav): Determine the optimal cache size.
|
||||
const cacheSize = 512 // number of entries in the LRU cache
|
||||
|
||||
cache, err := lru.New[string, detectorspb.DecoderType](cacheSize)
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to initialize LRU cache: %w", err)
|
||||
}
|
||||
const (
|
||||
// detectableChunksChanMultiplier is set to accommodate a high number of concurrent worker goroutines.
|
||||
// This multiplier ensures that the detectableChunksChan channel has sufficient buffer capacity
|
||||
// to hold messages from multiple worker groups (detector workers/ verificationOverlap workers) without blocking.
|
||||
// A large buffer helps accommodate for the fact workers are producing data at a faster rate
|
||||
// than it can be consumed.
|
||||
detectableChunksChanMultiplier = 50
|
||||
// verificationOverlapChunksChanMultiplier uses a smaller buffer compared to detectableChunksChanMultiplier.
|
||||
// This reflects the anticipated lower volume of data that needs re-verification.
|
||||
// The buffer size is a trade-off between memory usage and the need to prevent blocking.
|
||||
verificationOverlapChunksChanMultiplier = 25
|
||||
)
|
||||
|
||||
// Channels are used for communication between different parts of the engine,
|
||||
// ensuring that data flows smoothly without race conditions.
|
||||
// The buffer sizes for these channels are set to multiples of defaultChannelBuffer,
|
||||
// considering the expected concurrency and workload in the system.
|
||||
e.detectableChunksChan = make(chan detectableChunk, defaultChannelBuffer*detectableChunksChanMultiplier)
|
||||
e.notifyVerifiedResults = true
|
||||
e.notifyUnknownResults = true
|
||||
e.notifyUnverifiedResults = true
|
||||
e.verificationOverlapChunksChan = make(
|
||||
chan verificationOverlapChunk, defaultChannelBuffer*verificationOverlapChunksChanMultiplier,
|
||||
)
|
||||
e.results = make(chan detectors.ResultWithMetadata, defaultChannelBuffer)
|
||||
e.dedupeCache = cache
|
||||
e.printer = new(output.PlainPrinter)
|
||||
e.metrics = runtimeMetrics{Metrics: Metrics{scanStartTime: time.Now()}}
|
||||
|
||||
for _, option := range options {
|
||||
option(e)
|
||||
}
|
||||
ctx.Logger().V(4).Info("engine initialized")
|
||||
|
||||
// Configure the EntireChunkSpanCalculator if the engine is set to scan the entire chunk.
|
||||
var ahoCOptions []ahocorasick.CoreOption
|
||||
if e.scanEntireChunk {
|
||||
ahoCOptions = append(ahoCOptions, ahocorasick.WithSpanCalculator(new(ahocorasick.EntireChunkSpanCalculator)))
|
||||
}
|
||||
|
||||
ctx.Logger().V(4).Info("setting up aho-corasick core")
|
||||
e.ahoCorasickCore = ahocorasick.NewAhoCorasickCore(e.detectors, ahoCOptions...)
|
||||
ctx.Logger().V(4).Info("set up aho-corasick core")
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
func (e *Engine) initSourceManager(ctx context.Context) {
|
||||
const defaultOutputBufferSize = 64
|
||||
|
||||
opts := []func(*sources.SourceManager){
|
||||
sources.WithConcurrentSources(e.concurrency),
|
||||
sources.WithConcurrentUnits(e.concurrency),
|
||||
sources.WithSourceUnits(),
|
||||
sources.WithBufferedOutput(defaultOutputBufferSize),
|
||||
}
|
||||
if e.jobReportWriter != nil {
|
||||
unitHook, finishedMetrics := sources.NewUnitHook(ctx)
|
||||
opts = append(opts, sources.WithReportHook(unitHook))
|
||||
e.wgDetectorWorkers.Add(1)
|
||||
go func() {
|
||||
defer e.wgDetectorWorkers.Done()
|
||||
defer func() {
|
||||
e.jobReportWriter.Close()
|
||||
// Add a bit of extra information if it's a *os.File.
|
||||
if namer, ok := e.jobReportWriter.(interface{ Name() string }); ok {
|
||||
ctx.Logger().Info("report written", "path", namer.Name())
|
||||
} else {
|
||||
ctx.Logger().Info("report written")
|
||||
}
|
||||
}()
|
||||
for metrics := range finishedMetrics {
|
||||
metrics.Errors = common.ExportErrors(metrics.Errors...)
|
||||
details, err := json.Marshal(map[string]any{
|
||||
"version": 1,
|
||||
"data": metrics,
|
||||
})
|
||||
if err != nil {
|
||||
ctx.Logger().Error(err, "error marshalling job details")
|
||||
continue
|
||||
}
|
||||
if _, err := e.jobReportWriter.Write(append(details, '\n')); err != nil {
|
||||
ctx.Logger().Error(err, "error writing to file")
|
||||
}
|
||||
}
|
||||
}()
|
||||
}
|
||||
e.sourceManager = sources.NewManager(opts...)
|
||||
}
|
||||
|
||||
// setDefaults ensures that if specific engine properties aren't provided,
|
||||
// they're set to reasonable default values. It makes the engine robust to
|
||||
// incomplete configuration.
|
||||
func (e *Engine) setDefaults(ctx context.Context) {
|
||||
if e.concurrency == 0 {
|
||||
numCPU := runtime.NumCPU()
|
||||
ctx.Logger().Info("No concurrency specified, defaulting to max", "cpu", numCPU)
|
||||
e.concurrency = numCPU
|
||||
}
|
||||
ctx.Logger().V(3).Info("engine started", "workers", e.concurrency)
|
||||
|
||||
// Default decoders handle common encoding formats.
|
||||
if len(e.decoders) == 0 {
|
||||
e.decoders = decoders.DefaultDecoders()
|
||||
}
|
||||
|
||||
if len(e.detectors) == 0 {
|
||||
e.detectors = DefaultDetectors()
|
||||
}
|
||||
ctx.Logger().V(4).Info("default engine options set")
|
||||
}
|
||||
|
||||
// Sanity check detectors for duplicate configuration. Only log in case
|
||||
// a detector has been configured in a way that isn't represented by
|
||||
// the DetectorID (type and version).
|
||||
|
@ -504,19 +590,36 @@ func (e *Engine) sanityChecks(ctx context.Context) {
|
|||
// workers helps in scalability and makes it easier to diagnose issues.
|
||||
func (e *Engine) startWorkers(ctx context.Context) {
|
||||
// Scanner workers process input data and extract chunks for detectors.
|
||||
e.startScannerWorkers(ctx)
|
||||
|
||||
// Detector workers apply keyword matching, regexes and API calls to detect secrets in chunks.
|
||||
e.startDetectorWorkers(ctx)
|
||||
|
||||
// verificationOverlap workers handle verification of chunks that have been detected by multiple detectors.
|
||||
// They ensure that verification is disabled for any secrets that have been detected by multiple detectors.
|
||||
e.startVerificationOverlapWorkers(ctx)
|
||||
|
||||
// ResultsDispatcher workers communicate detected issues to the user or any downstream systems.
|
||||
// We want 1/4th of the notifier workers as the number of scanner workers.
|
||||
e.startNotifierWorkers(ctx)
|
||||
}
|
||||
|
||||
func (e *Engine) startScannerWorkers(ctx context.Context) {
|
||||
ctx.Logger().V(2).Info("starting scanner workers", "count", e.concurrency)
|
||||
for worker := uint64(0); worker < uint64(e.concurrency); worker++ {
|
||||
e.workersWg.Add(1)
|
||||
go func() {
|
||||
ctx := context.WithValue(ctx, "secret_worker_id", common.RandomID(5))
|
||||
ctx := context.WithValue(ctx, "scanner_worker_id", common.RandomID(5))
|
||||
defer common.Recover(ctx)
|
||||
defer e.workersWg.Done()
|
||||
e.detectorWorker(ctx)
|
||||
e.scannerWorker(ctx)
|
||||
}()
|
||||
}
|
||||
}
|
||||
|
||||
// Detector workers apply keyword matching, regexes and API calls to detect secrets in chunks.
|
||||
const detectorWorkerMultiplier = 50
|
||||
const detectorWorkerMultiplier = 50
|
||||
|
||||
func (e *Engine) startDetectorWorkers(ctx context.Context) {
|
||||
ctx.Logger().V(2).Info("starting detector workers", "count", e.concurrency*detectorWorkerMultiplier)
|
||||
for worker := uint64(0); worker < uint64(e.concurrency*detectorWorkerMultiplier); worker++ {
|
||||
e.wgDetectorWorkers.Add(1)
|
||||
|
@ -524,12 +627,12 @@ func (e *Engine) startWorkers(ctx context.Context) {
|
|||
ctx := context.WithValue(ctx, "detector_worker_id", common.RandomID(5))
|
||||
defer common.Recover(ctx)
|
||||
defer e.wgDetectorWorkers.Done()
|
||||
e.detectChunks(ctx)
|
||||
e.detectorWorker(ctx)
|
||||
}()
|
||||
}
|
||||
}
|
||||
|
||||
// verificationOverlap workers handle verification of chunks that have been detected by multiple detectors.
|
||||
// They ensure that verification is disabled for any secrets that have been detected by multiple detectors.
|
||||
func (e *Engine) startVerificationOverlapWorkers(ctx context.Context) {
|
||||
const verificationOverlapWorkerMultiplier = detectorWorkerMultiplier
|
||||
ctx.Logger().V(2).Info("starting verificationOverlap workers", "count", e.concurrency)
|
||||
for worker := uint64(0); worker < uint64(e.concurrency*verificationOverlapWorkerMultiplier); worker++ {
|
||||
|
@ -541,9 +644,9 @@ func (e *Engine) startWorkers(ctx context.Context) {
|
|||
e.verificationOverlapWorker(ctx)
|
||||
}()
|
||||
}
|
||||
}
|
||||
|
||||
// Notifier workers communicate detected issues to the user or any downstream systems.
|
||||
// We want 1/4th of the notifier workers as the number of scanner workers.
|
||||
func (e *Engine) startNotifierWorkers(ctx context.Context) {
|
||||
const notifierWorkerRatio = 4
|
||||
maxNotifierWorkers := 1
|
||||
if numWorkers := e.concurrency / notifierWorkerRatio; numWorkers > 0 {
|
||||
|
@ -556,7 +659,7 @@ func (e *Engine) startWorkers(ctx context.Context) {
|
|||
ctx := context.WithValue(ctx, "notifier_worker_id", common.RandomID(5))
|
||||
defer common.Recover(ctx)
|
||||
defer e.WgNotifier.Done()
|
||||
e.notifyResults(ctx)
|
||||
e.notifierWorker(ctx)
|
||||
}()
|
||||
}
|
||||
}
|
||||
|
@ -618,11 +721,12 @@ type verificationOverlapChunk struct {
|
|||
verificationOverlapWgDoneFn func()
|
||||
}
|
||||
|
||||
func (e *Engine) detectorWorker(ctx context.Context) {
|
||||
func (e *Engine) scannerWorker(ctx context.Context) {
|
||||
var wgDetect sync.WaitGroup
|
||||
var wgVerificationOverlap sync.WaitGroup
|
||||
|
||||
for chunk := range e.ChunksChan() {
|
||||
sourceVerify := chunk.Verify
|
||||
atomic.AddUint64(&e.metrics.BytesScanned, uint64(len(chunk.Data)))
|
||||
for _, decoder := range e.decoders {
|
||||
decoded := decoder.FromChunk(chunk)
|
||||
|
@ -644,7 +748,7 @@ func (e *Engine) detectorWorker(ctx context.Context) {
|
|||
}
|
||||
|
||||
for _, detector := range matchingDetectors {
|
||||
decoded.Chunk.Verify = e.verify
|
||||
decoded.Chunk.Verify = e.shouldVerifyChunk(sourceVerify, detector, e.detectorVerificationOverrides)
|
||||
wgDetect.Add(1)
|
||||
e.detectableChunksChan <- detectableChunk{
|
||||
chunk: *decoded.Chunk,
|
||||
|
@ -664,6 +768,41 @@ func (e *Engine) detectorWorker(ctx context.Context) {
|
|||
ctx.Logger().V(4).Info("finished scanning chunks")
|
||||
}
|
||||
|
||||
func (e *Engine) shouldVerifyChunk(
|
||||
sourceVerify bool,
|
||||
detector detectors.Detector,
|
||||
detectorVerificationOverrides map[config.DetectorID]bool,
|
||||
) bool {
|
||||
// The verify flag takes precedence over the detector's verification flag.
|
||||
if !e.verify {
|
||||
return false
|
||||
}
|
||||
|
||||
detectorId := config.DetectorID{ID: detector.Type(), Version: 0}
|
||||
|
||||
if v, ok := detector.(detectors.Versioner); ok {
|
||||
detectorId.Version = v.Version()
|
||||
}
|
||||
|
||||
if detectorVerify, ok := detectorVerificationOverrides[detectorId]; ok {
|
||||
return detectorVerify
|
||||
}
|
||||
|
||||
// If the user is running with a detector verification override that does not specify a particular detector version,
|
||||
// then its override map entry will have version 0. We should check for that too, but if the detector being checked
|
||||
// doesn't have any version information then its version is 0, so we've already done the check, and we don't need to
|
||||
// do it a second time.
|
||||
if detectorId.Version != 0 {
|
||||
detectorId.Version = 0
|
||||
|
||||
if detectorVerify, ok := detectorVerificationOverrides[detectorId]; ok {
|
||||
return detectorVerify
|
||||
}
|
||||
}
|
||||
|
||||
return sourceVerify
|
||||
}
|
||||
|
||||
// chunkSecretKey ties secrets to the specific detector that found them. This allows identifying identical
|
||||
// credentials extracted by multiple different detectors processing the same chunk. Or duplicates found
|
||||
// by the same detector in the chunk. Exact matches on lookup indicate a duplicate secret for a detector
|
||||
|
@ -720,6 +859,8 @@ func (e *Engine) verificationOverlapWorker(ctx context.Context) {
|
|||
|
||||
for chunk := range e.verificationOverlapChunksChan {
|
||||
for _, detector := range chunk.detectors {
|
||||
isFalsePositive := detectors.GetFalsePositiveCheck(detector.Detector)
|
||||
|
||||
// DO NOT VERIFY at this stage of the pipeline.
|
||||
matchedBytes := detector.Matches()
|
||||
for _, match := range matchedBytes {
|
||||
|
@ -760,12 +901,17 @@ func (e *Engine) verificationOverlapWorker(ctx context.Context) {
|
|||
e.verificationOverlapTracker.increment()
|
||||
}
|
||||
res.SetVerificationError(errOverlap)
|
||||
e.processResult(ctx, detectableChunk{
|
||||
chunk: chunk.chunk,
|
||||
detector: detector,
|
||||
decoder: chunk.decoder,
|
||||
wgDoneFn: wgDetect.Done,
|
||||
}, res)
|
||||
e.processResult(
|
||||
ctx,
|
||||
detectableChunk{
|
||||
chunk: chunk.chunk,
|
||||
detector: detector,
|
||||
decoder: chunk.decoder,
|
||||
wgDoneFn: wgDetect.Done,
|
||||
},
|
||||
res,
|
||||
isFalsePositive,
|
||||
)
|
||||
|
||||
// Remove the detector key from the list of detector keys with results.
|
||||
// This is to ensure that the chunk is not reprocessed with verification enabled
|
||||
|
@ -779,7 +925,7 @@ func (e *Engine) verificationOverlapWorker(ctx context.Context) {
|
|||
|
||||
for _, detector := range detectorKeysWithResults {
|
||||
wgDetect.Add(1)
|
||||
chunk.chunk.Verify = e.verify
|
||||
chunk.chunk.Verify = e.shouldVerifyChunk(chunk.chunk.Verify, detector, e.detectorVerificationOverrides)
|
||||
e.detectableChunksChan <- detectableChunk{
|
||||
chunk: chunk.chunk,
|
||||
detector: detector,
|
||||
|
@ -802,7 +948,7 @@ func (e *Engine) verificationOverlapWorker(ctx context.Context) {
|
|||
wgDetect.Wait()
|
||||
}
|
||||
|
||||
func (e *Engine) detectChunks(ctx context.Context) {
|
||||
func (e *Engine) detectorWorker(ctx context.Context) {
|
||||
for data := range e.detectableChunksChan {
|
||||
e.detectChunk(ctx, data)
|
||||
}
|
||||
|
@ -817,6 +963,8 @@ func (e *Engine) detectChunk(ctx context.Context, data detectableChunk) {
|
|||
defer common.Recover(ctx)
|
||||
defer cancel()
|
||||
|
||||
isFalsePositive := detectors.GetFalsePositiveCheck(data.detector)
|
||||
|
||||
// To reduce the overhead of regex calls in the detector,
|
||||
// we limit the amount of data passed to each detector.
|
||||
// The matches field of the DetectorMatch struct contains the
|
||||
|
@ -824,7 +972,7 @@ func (e *Engine) detectChunk(ctx context.Context, data detectableChunk) {
|
|||
// This avoids the need for additional regex processing on the entire chunk data.
|
||||
matchedBytes := data.detector.Matches()
|
||||
for _, match := range matchedBytes {
|
||||
results, err := data.detector.FromData(ctx, data.chunk.Verify, match)
|
||||
results, err := data.detector.Detector.FromData(ctx, data.chunk.Verify, match)
|
||||
if err != nil {
|
||||
ctx.Logger().Error(err, "error scanning chunk")
|
||||
continue
|
||||
|
@ -848,16 +996,12 @@ func (e *Engine) detectChunk(ctx context.Context, data detectableChunk) {
|
|||
results = e.filterResults(ctx, data.detector, results, e.logFilteredUnverified)
|
||||
|
||||
for _, res := range results {
|
||||
e.processResult(ctx, data, res)
|
||||
e.processResult(ctx, data, res, isFalsePositive)
|
||||
}
|
||||
}
|
||||
data.wgDoneFn()
|
||||
}
|
||||
|
||||
// filterResults applies multiple filters to the detection results to reduce false positives
|
||||
// and ensure the results meet specific criteria such as verification status and entropy level.
|
||||
// This function centralizes the filtering logic, making it reusable across different stages
|
||||
// of the detection pipeline.
|
||||
func (e *Engine) filterResults(
|
||||
ctx context.Context,
|
||||
detector detectors.Detector,
|
||||
|
@ -868,13 +1012,18 @@ func (e *Engine) filterResults(
|
|||
results = detectors.CleanResults(results)
|
||||
}
|
||||
results = detectors.FilterKnownFalsePositives(ctx, detector, results, logFilteredUnverified)
|
||||
if e.filterEntropy != nil {
|
||||
results = detectors.FilterResultsWithEntropy(ctx, results, *e.filterEntropy, logFilteredUnverified)
|
||||
if e.filterEntropy != 0 {
|
||||
results = detectors.FilterResultsWithEntropy(ctx, results, e.filterEntropy, logFilteredUnverified)
|
||||
}
|
||||
return results
|
||||
}
|
||||
|
||||
func (e *Engine) processResult(ctx context.Context, data detectableChunk, res detectors.Result) {
|
||||
func (e *Engine) processResult(
|
||||
ctx context.Context,
|
||||
data detectableChunk,
|
||||
res detectors.Result,
|
||||
isFalsePositive func(detectors.Result) bool,
|
||||
) {
|
||||
ignoreLinePresent := false
|
||||
if SupportsLineNumbers(data.chunk.SourceType) {
|
||||
copyChunk := data.chunk
|
||||
|
@ -896,14 +1045,19 @@ func (e *Engine) processResult(ctx context.Context, data detectableChunk, res de
|
|||
|
||||
secret := detectors.CopyMetadata(&data.chunk, res)
|
||||
secret.DecoderType = data.decoder
|
||||
|
||||
if !res.Verified && res.Raw != nil {
|
||||
secret.IsWordlistFalsePositive = isFalsePositive(res)
|
||||
}
|
||||
|
||||
e.results <- secret
|
||||
}
|
||||
|
||||
func (e *Engine) notifyResults(ctx context.Context) {
|
||||
for r := range e.ResultsChan() {
|
||||
func (e *Engine) notifierWorker(ctx context.Context) {
|
||||
for result := range e.ResultsChan() {
|
||||
// Filter unwanted results, based on `--results`.
|
||||
if !r.Verified {
|
||||
if r.VerificationError() != nil {
|
||||
if !result.Verified {
|
||||
if result.VerificationError() != nil {
|
||||
if !e.notifyUnknownResults {
|
||||
// Skip results with verification errors.
|
||||
continue
|
||||
|
@ -925,21 +1079,21 @@ func (e *Engine) notifyResults(ctx context.Context) {
|
|||
// Duplicate results with the same decoder type SHOULD have their own entry in the
|
||||
// results list, this would happen if the same secret is found multiple times.
|
||||
// Note: If the source type is postman, we dedupe the results regardless of decoder type.
|
||||
key := fmt.Sprintf("%s%s%s%+v", r.DetectorType.String(), r.Raw, r.RawV2, r.SourceMetadata)
|
||||
if val, ok := e.dedupeCache.Get(key); ok && (val != r.DecoderType ||
|
||||
r.SourceType == sourcespb.SourceType_SOURCE_TYPE_POSTMAN) {
|
||||
key := fmt.Sprintf("%s%s%s%+v", result.DetectorType.String(), result.Raw, result.RawV2, result.SourceMetadata)
|
||||
if val, ok := e.dedupeCache.Get(key); ok && (val != result.DecoderType ||
|
||||
result.SourceType == sourcespb.SourceType_SOURCE_TYPE_POSTMAN) {
|
||||
continue
|
||||
}
|
||||
e.dedupeCache.Add(key, r.DecoderType)
|
||||
e.dedupeCache.Add(key, result.DecoderType)
|
||||
|
||||
if r.Verified {
|
||||
if result.Verified {
|
||||
atomic.AddUint64(&e.metrics.VerifiedSecretsFound, 1)
|
||||
} else {
|
||||
atomic.AddUint64(&e.metrics.UnverifiedSecretsFound, 1)
|
||||
}
|
||||
|
||||
if err := e.printer.Print(ctx, &r); err != nil {
|
||||
ctx.Logger().Error(err, "error printing result")
|
||||
if err := e.dispatcher.Dispatch(ctx, result); err != nil {
|
||||
ctx.Logger().Error(err, "error notifying result")
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -13,6 +13,7 @@ import (
|
|||
|
||||
"github.com/stretchr/testify/assert"
|
||||
|
||||
"github.com/trufflesecurity/trufflehog/v3/pkg/detectors/gitlab/v2"
|
||||
"github.com/trufflesecurity/trufflehog/v3/pkg/pb/detectorspb"
|
||||
|
||||
"github.com/trufflesecurity/trufflehog/v3/pkg/config"
|
||||
|
@ -243,14 +244,27 @@ func TestEngine_DuplicateSecrets(t *testing.T) {
|
|||
ctx, cancel := context.WithTimeout(ctx, 10*time.Second)
|
||||
defer cancel()
|
||||
|
||||
e, err := Start(ctx,
|
||||
WithConcurrency(1),
|
||||
WithDecoders(decoders.DefaultDecoders()...),
|
||||
WithDetectors(DefaultDetectors()...),
|
||||
WithVerify(false),
|
||||
WithPrinter(new(discardPrinter)),
|
||||
)
|
||||
assert.Nil(t, err)
|
||||
const defaultOutputBufferSize = 64
|
||||
opts := []func(*sources.SourceManager){
|
||||
sources.WithSourceUnits(),
|
||||
sources.WithBufferedOutput(defaultOutputBufferSize),
|
||||
}
|
||||
|
||||
sourceManager := sources.NewManager(opts...)
|
||||
|
||||
conf := Config{
|
||||
Concurrency: 1,
|
||||
Decoders: decoders.DefaultDecoders(),
|
||||
Detectors: DefaultDetectors(),
|
||||
Verify: false,
|
||||
SourceManager: sourceManager,
|
||||
Dispatcher: NewPrinterDispatcher(new(discardPrinter)),
|
||||
}
|
||||
|
||||
e, err := NewEngine(ctx, &conf)
|
||||
assert.NoError(t, err)
|
||||
|
||||
e.Start(ctx)
|
||||
|
||||
cfg := sources.FilesystemConfig{Paths: []string{absPath}}
|
||||
if err := e.ScanFileSystem(ctx, cfg); err != nil {
|
||||
|
@ -277,15 +291,28 @@ func TestEngine_VersionedDetectorsVerifiedSecrets(t *testing.T) {
|
|||
_, err = tmpFile.WriteString(fmt.Sprintf("test data using keyword %s", fakeDetectorKeyword))
|
||||
assert.NoError(t, err)
|
||||
|
||||
e, err := Start(ctx,
|
||||
WithConcurrency(1),
|
||||
WithDecoders(decoders.DefaultDecoders()...),
|
||||
WithDetectors(&fakeDetectorV1{}, &fakeDetectorV2{}),
|
||||
WithVerify(true),
|
||||
WithPrinter(new(discardPrinter)),
|
||||
)
|
||||
const defaultOutputBufferSize = 64
|
||||
opts := []func(*sources.SourceManager){
|
||||
sources.WithSourceUnits(),
|
||||
sources.WithBufferedOutput(defaultOutputBufferSize),
|
||||
}
|
||||
|
||||
sourceManager := sources.NewManager(opts...)
|
||||
|
||||
conf := Config{
|
||||
Concurrency: 1,
|
||||
Decoders: decoders.DefaultDecoders(),
|
||||
Detectors: []detectors.Detector{new(fakeDetectorV1), new(fakeDetectorV2)},
|
||||
Verify: true,
|
||||
SourceManager: sourceManager,
|
||||
Dispatcher: NewPrinterDispatcher(new(discardPrinter)),
|
||||
}
|
||||
|
||||
e, err := NewEngine(ctx, &conf)
|
||||
assert.NoError(t, err)
|
||||
|
||||
e.Start(ctx)
|
||||
|
||||
cfg := sources.FilesystemConfig{Paths: []string{tmpFile.Name()}}
|
||||
if err := e.ScanFileSystem(ctx, cfg); err != nil {
|
||||
return
|
||||
|
@ -333,14 +360,28 @@ func TestEngine_CustomDetectorsDetectorsVerifiedSecrets(t *testing.T) {
|
|||
|
||||
ctx, cancel := context.WithTimeout(context.Background(), time.Second*5)
|
||||
defer cancel()
|
||||
e, err := Start(ctx,
|
||||
WithConcurrency(1),
|
||||
WithDecoders(decoders.DefaultDecoders()...),
|
||||
WithDetectors(allDetectors...),
|
||||
WithVerify(true),
|
||||
WithPrinter(new(discardPrinter)),
|
||||
)
|
||||
assert.Nil(t, err)
|
||||
|
||||
const defaultOutputBufferSize = 64
|
||||
opts := []func(*sources.SourceManager){
|
||||
sources.WithSourceUnits(),
|
||||
sources.WithBufferedOutput(defaultOutputBufferSize),
|
||||
}
|
||||
|
||||
sourceManager := sources.NewManager(opts...)
|
||||
|
||||
conf := Config{
|
||||
Concurrency: 1,
|
||||
Decoders: decoders.DefaultDecoders(),
|
||||
Detectors: allDetectors,
|
||||
Verify: true,
|
||||
SourceManager: sourceManager,
|
||||
Dispatcher: NewPrinterDispatcher(new(discardPrinter)),
|
||||
}
|
||||
|
||||
e, err := NewEngine(ctx, &conf)
|
||||
assert.NoError(t, err)
|
||||
|
||||
e.Start(ctx)
|
||||
|
||||
cfg := sources.FilesystemConfig{Paths: []string{tmpFile.Name()}}
|
||||
if err := e.ScanFileSystem(ctx, cfg); err != nil {
|
||||
|
@ -367,15 +408,29 @@ func TestVerificationOverlapChunk(t *testing.T) {
|
|||
conf, err := config.Read(confPath)
|
||||
assert.Nil(t, err)
|
||||
|
||||
e, err := Start(ctx,
|
||||
WithConcurrency(1),
|
||||
WithDecoders(decoders.DefaultDecoders()...),
|
||||
WithDetectors(conf.Detectors...),
|
||||
WithVerify(false),
|
||||
WithPrinter(new(discardPrinter)),
|
||||
withVerificationOverlapTracking(),
|
||||
)
|
||||
assert.Nil(t, err)
|
||||
const defaultOutputBufferSize = 64
|
||||
opts := []func(*sources.SourceManager){
|
||||
sources.WithSourceUnits(),
|
||||
sources.WithBufferedOutput(defaultOutputBufferSize),
|
||||
}
|
||||
|
||||
sourceManager := sources.NewManager(opts...)
|
||||
|
||||
c := Config{
|
||||
Concurrency: 1,
|
||||
Decoders: decoders.DefaultDecoders(),
|
||||
Detectors: conf.Detectors,
|
||||
Verify: false,
|
||||
SourceManager: sourceManager,
|
||||
Dispatcher: NewPrinterDispatcher(new(discardPrinter)),
|
||||
}
|
||||
|
||||
e, err := NewEngine(ctx, &c)
|
||||
assert.NoError(t, err)
|
||||
|
||||
e.verificationOverlapTracker = new(verificationOverlapTracker)
|
||||
|
||||
e.Start(ctx)
|
||||
|
||||
cfg := sources.FilesystemConfig{Paths: []string{absPath}}
|
||||
if err := e.ScanFileSystem(ctx, cfg); err != nil {
|
||||
|
@ -407,16 +462,30 @@ func TestVerificationOverlapChunkFalsePositive(t *testing.T) {
|
|||
conf, err := config.Read(confPath)
|
||||
assert.NoError(t, err)
|
||||
|
||||
e, err := Start(ctx,
|
||||
WithConcurrency(1),
|
||||
WithDecoders(decoders.DefaultDecoders()...),
|
||||
WithDetectors(conf.Detectors...),
|
||||
WithVerify(false),
|
||||
WithPrinter(new(discardPrinter)),
|
||||
withVerificationOverlapTracking(),
|
||||
)
|
||||
const defaultOutputBufferSize = 64
|
||||
opts := []func(*sources.SourceManager){
|
||||
sources.WithSourceUnits(),
|
||||
sources.WithBufferedOutput(defaultOutputBufferSize),
|
||||
}
|
||||
|
||||
sourceManager := sources.NewManager(opts...)
|
||||
|
||||
c := Config{
|
||||
Concurrency: 1,
|
||||
Decoders: decoders.DefaultDecoders(),
|
||||
Detectors: conf.Detectors,
|
||||
Verify: false,
|
||||
SourceManager: sourceManager,
|
||||
Dispatcher: NewPrinterDispatcher(new(discardPrinter)),
|
||||
}
|
||||
|
||||
e, err := NewEngine(ctx, &c)
|
||||
assert.NoError(t, err)
|
||||
|
||||
e.verificationOverlapTracker = new(verificationOverlapTracker)
|
||||
|
||||
e.Start(ctx)
|
||||
|
||||
cfg := sources.FilesystemConfig{Paths: []string{absPath}}
|
||||
err = e.ScanFileSystem(ctx, cfg)
|
||||
assert.NoError(t, err)
|
||||
|
@ -757,3 +826,60 @@ func generateRandomDataWithKeywords(size int, detectors []detectors.Detector) st
|
|||
|
||||
return string(data)
|
||||
}
|
||||
|
||||
func TestEngine_ShouldVerifyChunk(t *testing.T) {
|
||||
tests := []struct {
|
||||
name string
|
||||
detector detectors.Detector
|
||||
overrideKey config.DetectorID
|
||||
want func(sourceVerify, detectorVerify bool) bool
|
||||
}{
|
||||
{
|
||||
name: "detector override by exact version",
|
||||
detector: &gitlab.Scanner{},
|
||||
overrideKey: config.DetectorID{ID: detectorspb.DetectorType_Gitlab, Version: 2},
|
||||
want: func(sourceVerify, detectorVerify bool) bool { return detectorVerify },
|
||||
},
|
||||
{
|
||||
name: "detector override by versionless config",
|
||||
detector: &gitlab.Scanner{},
|
||||
overrideKey: config.DetectorID{ID: detectorspb.DetectorType_Gitlab, Version: 0},
|
||||
want: func(sourceVerify, detectorVerify bool) bool { return detectorVerify },
|
||||
},
|
||||
{
|
||||
name: "no detector override because of detector type mismatch",
|
||||
detector: &gitlab.Scanner{},
|
||||
overrideKey: config.DetectorID{ID: detectorspb.DetectorType_NpmToken, Version: 2},
|
||||
want: func(sourceVerify, detectorVerify bool) bool { return sourceVerify },
|
||||
},
|
||||
{
|
||||
name: "no detector override because of detector version mismatch",
|
||||
detector: &gitlab.Scanner{},
|
||||
overrideKey: config.DetectorID{ID: detectorspb.DetectorType_Gitlab, Version: 1},
|
||||
want: func(sourceVerify, detectorVerify bool) bool { return sourceVerify },
|
||||
},
|
||||
}
|
||||
|
||||
booleanChoices := [2]bool{true, false}
|
||||
|
||||
engine := &Engine{verify: true}
|
||||
|
||||
for _, tt := range tests {
|
||||
for _, sourceVerify := range booleanChoices {
|
||||
for _, detectorVerify := range booleanChoices {
|
||||
|
||||
t.Run(fmt.Sprintf("%s (source verify = %v, detector verify = %v)", tt.name, sourceVerify, detectorVerify), func(t *testing.T) {
|
||||
overrides := map[config.DetectorID]bool{
|
||||
tt.overrideKey: detectorVerify,
|
||||
}
|
||||
|
||||
want := tt.want(sourceVerify, detectorVerify)
|
||||
|
||||
got := engine.shouldVerifyChunk(sourceVerify, tt.detector, overrides)
|
||||
|
||||
assert.Equal(t, want, got)
|
||||
})
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -61,13 +61,27 @@ func TestScanGCS(t *testing.T) {
|
|||
ctx, cancel := context.WithCancel(context.TODO())
|
||||
defer cancel()
|
||||
|
||||
e, err := Start(ctx,
|
||||
WithConcurrency(1),
|
||||
WithDecoders(decoders.DefaultDecoders()...),
|
||||
WithDetectors(DefaultDetectors()...),
|
||||
WithVerify(false),
|
||||
)
|
||||
assert.Nil(t, err)
|
||||
const defaultOutputBufferSize = 64
|
||||
opts := []func(*sources.SourceManager){
|
||||
sources.WithSourceUnits(),
|
||||
sources.WithBufferedOutput(defaultOutputBufferSize),
|
||||
}
|
||||
|
||||
sourceManager := sources.NewManager(opts...)
|
||||
|
||||
conf := Config{
|
||||
Concurrency: 1,
|
||||
Decoders: decoders.DefaultDecoders(),
|
||||
Detectors: DefaultDetectors(),
|
||||
Verify: false,
|
||||
SourceManager: sourceManager,
|
||||
Dispatcher: NewPrinterDispatcher(new(discardPrinter)),
|
||||
}
|
||||
|
||||
e, err := NewEngine(ctx, &conf)
|
||||
assert.NoError(t, err)
|
||||
|
||||
e.Start(ctx)
|
||||
|
||||
go func() {
|
||||
resultCount := 0
|
||||
|
|
|
@ -62,14 +62,27 @@ func TestGitEngine(t *testing.T) {
|
|||
},
|
||||
} {
|
||||
t.Run(tName, func(t *testing.T) {
|
||||
e, err := Start(ctx,
|
||||
WithConcurrency(1),
|
||||
WithDecoders(decoders.DefaultDecoders()...),
|
||||
WithDetectors(DefaultDetectors()...),
|
||||
WithVerify(true),
|
||||
WithPrinter(new(discardPrinter)),
|
||||
)
|
||||
assert.Nil(t, err)
|
||||
const defaultOutputBufferSize = 64
|
||||
opts := []func(*sources.SourceManager){
|
||||
sources.WithSourceUnits(),
|
||||
sources.WithBufferedOutput(defaultOutputBufferSize),
|
||||
}
|
||||
|
||||
sourceManager := sources.NewManager(opts...)
|
||||
|
||||
conf := Config{
|
||||
Concurrency: 1,
|
||||
Decoders: decoders.DefaultDecoders(),
|
||||
Detectors: DefaultDetectors(),
|
||||
Verify: true,
|
||||
SourceManager: sourceManager,
|
||||
Dispatcher: NewPrinterDispatcher(new(discardPrinter)),
|
||||
}
|
||||
|
||||
e, err := NewEngine(ctx, &conf)
|
||||
assert.NoError(t, err)
|
||||
|
||||
e.Start(ctx)
|
||||
|
||||
cfg := sources.GitConfig{
|
||||
URI: path,
|
||||
|
@ -111,14 +124,25 @@ func BenchmarkGitEngine(b *testing.B) {
|
|||
ctx, cancel := context.WithCancel(ctx)
|
||||
defer cancel()
|
||||
|
||||
e, err := Start(ctx,
|
||||
WithConcurrency(runtime.NumCPU()),
|
||||
WithDecoders(decoders.DefaultDecoders()...),
|
||||
WithDetectors(DefaultDetectors()...),
|
||||
WithVerify(false),
|
||||
WithPrinter(new(discardPrinter)),
|
||||
)
|
||||
assert.Nil(b, err)
|
||||
const defaultOutputBufferSize = 64
|
||||
opts := []func(*sources.SourceManager){
|
||||
sources.WithSourceUnits(),
|
||||
sources.WithBufferedOutput(defaultOutputBufferSize),
|
||||
}
|
||||
|
||||
sourceManager := sources.NewManager(opts...)
|
||||
|
||||
conf := Config{
|
||||
Concurrency: runtime.NumCPU(),
|
||||
Decoders: decoders.DefaultDecoders(),
|
||||
Detectors: DefaultDetectors(),
|
||||
Verify: false,
|
||||
SourceManager: sourceManager,
|
||||
Dispatcher: NewPrinterDispatcher(new(discardPrinter)),
|
||||
}
|
||||
|
||||
e, err := NewEngine(ctx, &conf)
|
||||
assert.NoError(b, err)
|
||||
|
||||
go func() {
|
||||
resultCount := 0
|
||||
|
|
Loading…
Reference in a new issue