diff --git a/pkg/engine/engine.go b/pkg/engine/engine.go index ffe23e181..74ec912e9 100644 --- a/pkg/engine/engine.go +++ b/pkg/engine/engine.go @@ -735,6 +735,7 @@ func (e *Engine) verificationOverlapWorker(ctx context.Context) { detectorKeysWithResults[detector.Key] = detector } + results = e.filterResults(ctx, detector, results, e.logFilteredUnverified) for _, res := range results { var val []byte if res.RawV2 != nil { @@ -844,15 +845,7 @@ func (e *Engine) detectChunk(ctx context.Context, data detectableChunk) { e.metrics.detectorAvgTime.Store(detectorName, avgTime) } - if e.filterUnverified { - results = detectors.CleanResults(results) - } - - results = detectors.FilterKnownFalsePositives(ctx, data.detector, results, e.logFilteredUnverified) - - if e.filterEntropy != nil { - results = detectors.FilterResultsWithEntropy(ctx, results, *e.filterEntropy, e.logFilteredUnverified) - } + results = e.filterResults(ctx, data.detector, results, e.logFilteredUnverified) for _, res := range results { e.processResult(ctx, data, res) @@ -861,6 +854,26 @@ func (e *Engine) detectChunk(ctx context.Context, data detectableChunk) { data.wgDoneFn() } +// filterResults applies multiple filters to the detection results to reduce false positives +// and ensure the results meet specific criteria such as verification status and entropy level. +// This function centralizes the filtering logic, making it reusable across different stages +// of the detection pipeline. +func (e *Engine) filterResults( + ctx context.Context, + detector detectors.Detector, + results []detectors.Result, + logFilteredUnverified bool, +) []detectors.Result { + if e.filterUnverified { + results = detectors.CleanResults(results) + } + results = detectors.FilterKnownFalsePositives(ctx, detector, results, logFilteredUnverified) + if e.filterEntropy != nil { + results = detectors.FilterResultsWithEntropy(ctx, results, *e.filterEntropy, logFilteredUnverified) + } + return results +} + func (e *Engine) processResult(ctx context.Context, data detectableChunk, res detectors.Result) { ignoreLinePresent := false if SupportsLineNumbers(data.chunk.SourceType) { diff --git a/pkg/engine/engine_test.go b/pkg/engine/engine_test.go index 67dfccd81..a523fb835 100644 --- a/pkg/engine/engine_test.go +++ b/pkg/engine/engine_test.go @@ -393,6 +393,41 @@ func TestVerificationOverlapChunk(t *testing.T) { assert.Equal(t, wantDupe, e.verificationOverlapTracker.verificationOverlapDuplicateCount) } +func TestVerificationOverlapChunkFalsePositive(t *testing.T) { + ctx := context.Background() + + absPath, err := filepath.Abs("./testdata/verificationoverlap_secrets_fp.txt") + assert.NoError(t, err) + + ctx, cancel := context.WithTimeout(ctx, 10*time.Second) + defer cancel() + + confPath, err := filepath.Abs("./testdata/verificationoverlap_detectors_fp.yaml") + assert.NoError(t, err) + conf, err := config.Read(confPath) + assert.NoError(t, err) + + e, err := Start(ctx, + WithConcurrency(1), + WithDecoders(decoders.DefaultDecoders()...), + WithDetectors(conf.Detectors...), + WithVerify(false), + WithPrinter(new(discardPrinter)), + withVerificationOverlapTracking(), + ) + assert.NoError(t, err) + + cfg := sources.FilesystemConfig{Paths: []string{absPath}} + err = e.ScanFileSystem(ctx, cfg) + assert.NoError(t, err) + + // Wait for all the chunks to be processed. + assert.NoError(t, e.Finish(ctx)) + // We want 0 because the secret is a false positive. + want := uint64(0) + assert.Equal(t, want, e.GetMetrics().UnverifiedSecretsFound) +} + func TestFragmentFirstLineAndLink(t *testing.T) { tests := []struct { name string diff --git a/pkg/engine/testdata/verificationoverlap_detectors_fp.yaml b/pkg/engine/testdata/verificationoverlap_detectors_fp.yaml new file mode 100644 index 000000000..5e4403a7c --- /dev/null +++ b/pkg/engine/testdata/verificationoverlap_detectors_fp.yaml @@ -0,0 +1,13 @@ +# config.yaml +detectors: + - name: detector1 + keywords: + - sample + regex: + api_key: \b(sample-[a-zA-Z-0-9]{59})\b + + - name: detector2 + keywords: + - ample + regex: + api_key: \b(ssample-[a-zA-Z-0-9]{59})\b \ No newline at end of file diff --git a/pkg/engine/testdata/verificationoverlap_secrets_fp.txt b/pkg/engine/testdata/verificationoverlap_secrets_fp.txt new file mode 100644 index 000000000..03c34d478 --- /dev/null +++ b/pkg/engine/testdata/verificationoverlap_secrets_fp.txt @@ -0,0 +1,2 @@ + +POSTMAN_API_KEY="ssample-qnwfsLyRSyfCwfpHaQP1UzDhrgpWvHjbYzjpRCMshjt417zWcrzyHUArs7r"