mirror of
https://github.com/trufflesecurity/trufflehog.git
synced 2024-11-10 07:04:24 +00:00
[feat] - Add Option to Retain False Positives During Detection (#2967)
* provide a mechanism to retain false positive findings * update * reorganzie * revert comment * update test * typo * fix test * fix test * update * update
This commit is contained in:
parent
a0108df67a
commit
347e8a6683
4 changed files with 113 additions and 25 deletions
|
@ -2,6 +2,7 @@ package detectors
|
|||
|
||||
import (
|
||||
_ "embed"
|
||||
"fmt"
|
||||
"math"
|
||||
"strings"
|
||||
"unicode"
|
||||
|
@ -151,22 +152,26 @@ func FilterResultsWithEntropy(ctx context.Context, results []Result, entropy flo
|
|||
}
|
||||
|
||||
// FilterKnownFalsePositives filters out known false positives from the results.
|
||||
func FilterKnownFalsePositives(ctx context.Context, detector Detector, results []Result, shouldLog bool) []Result {
|
||||
func FilterKnownFalsePositives(ctx context.Context, detector Detector, results []Result) []Result {
|
||||
var filteredResults []Result
|
||||
|
||||
isFalsePositive := GetFalsePositiveCheck(detector)
|
||||
|
||||
for _, result := range results {
|
||||
if !result.Verified && result.Raw != nil {
|
||||
isFp, reason := isFalsePositive(result)
|
||||
if !isFp {
|
||||
filteredResults = append(filteredResults, result)
|
||||
} else if shouldLog {
|
||||
ctx.Logger().Info("Filtered out known false positive", "result", result, "reason", reason)
|
||||
}
|
||||
} else {
|
||||
if len(result.Raw) == 0 {
|
||||
ctx.Logger().Error(fmt.Errorf("empty raw"), "invalid result; skipping")
|
||||
continue
|
||||
}
|
||||
|
||||
if result.Verified {
|
||||
filteredResults = append(filteredResults, result)
|
||||
continue
|
||||
}
|
||||
|
||||
if isFp, _ := isFalsePositive(result); !isFp {
|
||||
filteredResults = append(filteredResults, result)
|
||||
}
|
||||
}
|
||||
|
||||
return filteredResults
|
||||
}
|
||||
|
|
|
@ -42,7 +42,7 @@ func TestFilterKnownFalsePositives_DefaultLogic(t *testing.T) {
|
|||
expected := []Result{
|
||||
{Raw: []byte("hga8adshla3434g")},
|
||||
}
|
||||
filtered := FilterKnownFalsePositives(logContext.Background(), fakeDetector{}, results, false)
|
||||
filtered := FilterKnownFalsePositives(logContext.Background(), fakeDetector{}, results)
|
||||
assert.ElementsMatch(t, expected, filtered)
|
||||
}
|
||||
|
||||
|
@ -58,7 +58,7 @@ func TestFilterKnownFalsePositives_CustomLogic(t *testing.T) {
|
|||
{Raw: []byte("number")},
|
||||
{Raw: []byte("hga8adshla3434g")},
|
||||
}
|
||||
filtered := FilterKnownFalsePositives(logContext.Background(), customFalsePositiveChecker{}, results, false)
|
||||
filtered := FilterKnownFalsePositives(logContext.Background(), customFalsePositiveChecker{}, results)
|
||||
assert.ElementsMatch(t, expected, filtered)
|
||||
}
|
||||
|
||||
|
|
|
@ -156,7 +156,7 @@ type Engine struct {
|
|||
notifyVerifiedResults bool
|
||||
notifyUnverifiedResults bool
|
||||
notifyUnknownResults bool
|
||||
logFilteredUnverified bool
|
||||
retainFalsePositives bool
|
||||
verificationOverlap bool
|
||||
printAvgDetectorTime bool
|
||||
// By default, the engine will only scan a subset of the chunk if a detector matches the chunk.
|
||||
|
@ -206,7 +206,7 @@ func NewEngine(ctx context.Context, cfg *Config) (*Engine, error) {
|
|||
filterUnverified: cfg.FilterUnverified,
|
||||
filterEntropy: cfg.FilterEntropy,
|
||||
printAvgDetectorTime: cfg.PrintAvgDetectorTime,
|
||||
logFilteredUnverified: cfg.LogFilteredUnverified,
|
||||
retainFalsePositives: cfg.LogFilteredUnverified,
|
||||
verificationOverlap: cfg.VerificationOverlap,
|
||||
sourceManager: cfg.SourceManager,
|
||||
scanEntireChunk: cfg.ShouldScanEntireChunk,
|
||||
|
@ -279,8 +279,10 @@ func NewEngine(ctx context.Context, cfg *Config) (*Engine, error) {
|
|||
_, ok = results["unverified"]
|
||||
engine.notifyUnverifiedResults = ok
|
||||
|
||||
_, ok = results["filtered_unverified"]
|
||||
engine.logFilteredUnverified = ok
|
||||
if _, ok = results["filtered_unverified"]; ok {
|
||||
engine.retainFalsePositives = ok
|
||||
engine.notifyUnverifiedResults = ok
|
||||
}
|
||||
}
|
||||
|
||||
if err := engine.initialize(ctx); err != nil {
|
||||
|
@ -893,7 +895,7 @@ func (e *Engine) verificationOverlapWorker(ctx context.Context) {
|
|||
detectorKeysWithResults[detector.Key] = detector
|
||||
}
|
||||
|
||||
results = e.filterResults(ctx, detector, results, e.logFilteredUnverified)
|
||||
results = e.filterResults(ctx, detector, results)
|
||||
for _, res := range results {
|
||||
var val []byte
|
||||
if res.RawV2 != nil {
|
||||
|
@ -1024,7 +1026,7 @@ func (e *Engine) detectChunk(ctx context.Context, data detectableChunk) {
|
|||
e.metrics.detectorAvgTime.Store(detectorName, avgTime)
|
||||
}
|
||||
|
||||
results = e.filterResults(ctx, data.detector, results, e.logFilteredUnverified)
|
||||
results = e.filterResults(ctx, data.detector, results)
|
||||
|
||||
for _, res := range results {
|
||||
e.processResult(ctx, data, res, isFalsePositive)
|
||||
|
@ -1038,16 +1040,17 @@ func (e *Engine) detectChunk(ctx context.Context, data detectableChunk) {
|
|||
|
||||
func (e *Engine) filterResults(
|
||||
ctx context.Context,
|
||||
detector detectors.Detector,
|
||||
detector *ahocorasick.DetectorMatch,
|
||||
results []detectors.Result,
|
||||
logFilteredUnverified bool,
|
||||
) []detectors.Result {
|
||||
if e.filterUnverified {
|
||||
results = detectors.CleanResults(results)
|
||||
}
|
||||
results = detectors.FilterKnownFalsePositives(ctx, detector, results, logFilteredUnverified)
|
||||
if !e.retainFalsePositives {
|
||||
results = detectors.FilterKnownFalsePositives(ctx, detector.Detector, results)
|
||||
}
|
||||
if e.filterEntropy != 0 {
|
||||
results = detectors.FilterResultsWithEntropy(ctx, results, e.filterEntropy, logFilteredUnverified)
|
||||
results = detectors.FilterResultsWithEntropy(ctx, results, e.filterEntropy, e.retainFalsePositives)
|
||||
}
|
||||
return results
|
||||
}
|
||||
|
|
|
@ -448,6 +448,43 @@ func TestVerificationOverlapChunk(t *testing.T) {
|
|||
assert.Equal(t, wantDupe, e.verificationOverlapTracker.verificationOverlapDuplicateCount)
|
||||
}
|
||||
|
||||
const (
|
||||
TestDetectorType = -1
|
||||
TestDetectorType2 = -2
|
||||
)
|
||||
|
||||
var _ detectors.Detector = (*testDetectorV1)(nil)
|
||||
|
||||
type testDetectorV1 struct{}
|
||||
|
||||
func (testDetectorV1) FromData(_ aCtx.Context, _ bool, _ []byte) ([]detectors.Result, error) {
|
||||
result := detectors.Result{
|
||||
DetectorType: TestDetectorType,
|
||||
Raw: []byte("ssample-qnwfsLyRSyfCwfpHaQP1UzDhrgpWvHjbYzjpRCMshjt417zWcrzyHUArs7r"),
|
||||
}
|
||||
return []detectors.Result{result}, nil
|
||||
}
|
||||
|
||||
func (testDetectorV1) Keywords() []string { return []string{"sample"} }
|
||||
|
||||
func (testDetectorV1) Type() detectorspb.DetectorType { return TestDetectorType }
|
||||
|
||||
var _ detectors.Detector = (*testDetectorV2)(nil)
|
||||
|
||||
type testDetectorV2 struct{}
|
||||
|
||||
func (testDetectorV2) FromData(_ aCtx.Context, _ bool, _ []byte) ([]detectors.Result, error) {
|
||||
result := detectors.Result{
|
||||
DetectorType: TestDetectorType,
|
||||
Raw: []byte("sample-qnwfsLyRSyfCwfpHaQP1UzDhrgpWvHjbYzjpRCMshjt417zWcrzyHUArs7r"),
|
||||
}
|
||||
return []detectors.Result{result}, nil
|
||||
}
|
||||
|
||||
func (testDetectorV2) Keywords() []string { return []string{"ample"} }
|
||||
|
||||
func (testDetectorV2) Type() detectorspb.DetectorType { return TestDetectorType2 }
|
||||
|
||||
func TestVerificationOverlapChunkFalsePositive(t *testing.T) {
|
||||
ctx := context.Background()
|
||||
|
||||
|
@ -457,6 +494,50 @@ func TestVerificationOverlapChunkFalsePositive(t *testing.T) {
|
|||
ctx, cancel := context.WithTimeout(ctx, 10*time.Second)
|
||||
defer cancel()
|
||||
|
||||
const defaultOutputBufferSize = 64
|
||||
opts := []func(*sources.SourceManager){
|
||||
sources.WithSourceUnits(),
|
||||
sources.WithBufferedOutput(defaultOutputBufferSize),
|
||||
}
|
||||
|
||||
sourceManager := sources.NewManager(opts...)
|
||||
|
||||
c := Config{
|
||||
Concurrency: 1,
|
||||
Decoders: decoders.DefaultDecoders(),
|
||||
Detectors: []detectors.Detector{testDetectorV1{}, testDetectorV2{}},
|
||||
Verify: false,
|
||||
SourceManager: sourceManager,
|
||||
Dispatcher: NewPrinterDispatcher(new(discardPrinter)),
|
||||
}
|
||||
|
||||
e, err := NewEngine(ctx, &c)
|
||||
assert.NoError(t, err)
|
||||
|
||||
e.verificationOverlapTracker = new(verificationOverlapTracker)
|
||||
|
||||
e.Start(ctx)
|
||||
|
||||
cfg := sources.FilesystemConfig{Paths: []string{absPath}}
|
||||
err = e.ScanFileSystem(ctx, cfg)
|
||||
assert.NoError(t, err)
|
||||
|
||||
// Wait for all the chunks to be processed.
|
||||
assert.NoError(t, e.Finish(ctx))
|
||||
// We want 0 because the secret is a false positive.
|
||||
want := uint64(0)
|
||||
assert.Equal(t, want, e.GetMetrics().UnverifiedSecretsFound)
|
||||
}
|
||||
|
||||
func TestRetainFalsePositives(t *testing.T) {
|
||||
ctx := context.Background()
|
||||
|
||||
absPath, err := filepath.Abs("./testdata/verificationoverlap_secrets_fp.txt")
|
||||
assert.NoError(t, err)
|
||||
|
||||
ctx, cancel := context.WithTimeout(ctx, 10*time.Second)
|
||||
defer cancel()
|
||||
|
||||
confPath, err := filepath.Abs("./testdata/verificationoverlap_detectors_fp.yaml")
|
||||
assert.NoError(t, err)
|
||||
conf, err := config.Read(confPath)
|
||||
|
@ -477,13 +558,12 @@ func TestVerificationOverlapChunkFalsePositive(t *testing.T) {
|
|||
Verify: false,
|
||||
SourceManager: sourceManager,
|
||||
Dispatcher: NewPrinterDispatcher(new(discardPrinter)),
|
||||
Results: map[string]struct{}{"filtered_unverified": {}},
|
||||
}
|
||||
|
||||
e, err := NewEngine(ctx, &c)
|
||||
assert.NoError(t, err)
|
||||
|
||||
e.verificationOverlapTracker = new(verificationOverlapTracker)
|
||||
|
||||
e.Start(ctx)
|
||||
|
||||
cfg := sources.FilesystemConfig{Paths: []string{absPath}}
|
||||
|
@ -492,8 +572,8 @@ func TestVerificationOverlapChunkFalsePositive(t *testing.T) {
|
|||
|
||||
// Wait for all the chunks to be processed.
|
||||
assert.NoError(t, e.Finish(ctx))
|
||||
// We want 0 because the secret is a false positive.
|
||||
want := uint64(0)
|
||||
// We want 1 because the secret is a false positive and we are retaining it.
|
||||
want := uint64(1)
|
||||
assert.Equal(t, want, e.GetMetrics().UnverifiedSecretsFound)
|
||||
}
|
||||
|
||||
|
|
Loading…
Reference in a new issue