cache dupes w/ different decoders (#1754)

* only cache dupes that have different decoders.

* add test.

* remove file.

* update comment.
This commit is contained in:
ahrav 2023-09-11 08:18:48 -07:00 committed by GitHub
parent 70cdff915b
commit fdeccf06a0
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
4 changed files with 60 additions and 13 deletions

20
main.go
View file

@ -105,11 +105,11 @@ var (
filesystemScanIncludePaths = filesystemScan.Flag("include-paths", "Path to file with newline separated regexes for files to include in scan.").Short('i').String()
filesystemScanExcludePaths = filesystemScan.Flag("exclude-paths", "Path to file with newline separated regexes for files to exclude in scan.").Short('x').String()
s3Scan = cli.Command("s3", "Find credentials in S3 buckets.")
s3ScanKey = s3Scan.Flag("key", "S3 key used to authenticate. Can be provided with environment variable AWS_ACCESS_KEY_ID.").Envar("AWS_ACCESS_KEY_ID").String()
s3ScanRoleArns = s3Scan.Flag("role-arn", "Specify the ARN of an IAM role to assume for scanning. You can repeat this flag.").Strings()
s3ScanSecret = s3Scan.Flag("secret", "S3 secret used to authenticate. Can be provided with environment variable AWS_SECRET_ACCESS_KEY.").Envar("AWS_SECRET_ACCESS_KEY").String()
s3ScanSessionToken = s3Scan.Flag("session-token", "S3 session token used to authenticate temporary credentials. Can be provided with environment variable AWS_SESSION_TOKEN.").Envar("AWS_SESSION_TOKEN").String()
s3Scan = cli.Command("s3", "Find credentials in S3 buckets.")
s3ScanKey = s3Scan.Flag("key", "S3 key used to authenticate. Can be provided with environment variable AWS_ACCESS_KEY_ID.").Envar("AWS_ACCESS_KEY_ID").String()
s3ScanRoleArns = s3Scan.Flag("role-arn", "Specify the ARN of an IAM role to assume for scanning. You can repeat this flag.").Strings()
s3ScanSecret = s3Scan.Flag("secret", "S3 secret used to authenticate. Can be provided with environment variable AWS_SECRET_ACCESS_KEY.").Envar("AWS_SECRET_ACCESS_KEY").String()
s3ScanSessionToken = s3Scan.Flag("session-token", "S3 session token used to authenticate temporary credentials. Can be provided with environment variable AWS_SESSION_TOKEN.").Envar("AWS_SESSION_TOKEN").String()
s3ScanCloudEnv = s3Scan.Flag("cloud-environment", "Use IAM credentials in cloud environment.").Bool()
s3ScanBuckets = s3Scan.Flag("bucket", "Name of S3 bucket to scan. You can repeat this flag.").Strings()
s3ScanMaxObjectSize = s3Scan.Flag("max-object-size", "Maximum size of objects to scan. Objects larger than this will be skipped. (Byte units eg. 512B, 2KB, 4MB)").Default("250MB").Bytes()
@ -354,6 +354,10 @@ func run(state overseer.State) {
printer = new(output.PlainPrinter)
}
if !*jsonLegacy && !*jsonOut {
fmt.Fprintf(os.Stderr, "🐷🔑🐷 TruffleHog. Unearth your secrets. 🐷🔑🐷\n\n")
}
e, err := engine.Start(ctx,
engine.WithConcurrency(uint8(*concurrency)),
engine.WithDecoders(decoders.DefaultDecoders()...),
@ -469,7 +473,7 @@ func run(state overseer.State) {
Secret: *s3ScanSecret,
SessionToken: *s3ScanSessionToken,
Buckets: *s3ScanBuckets,
Roles: *s3ScanRoleArns,
Roles: *s3ScanRoleArns,
CloudCred: *s3ScanCloudEnv,
MaxObjectSize: int64(*s3ScanMaxObjectSize),
}
@ -525,10 +529,6 @@ func run(state overseer.State) {
}
}
if !*jsonLegacy && !*jsonOut {
fmt.Fprintf(os.Stderr, "🐷🔑🐷 TruffleHog. Unearth your secrets. 🐷🔑🐷\n\n")
}
// Wait for all workers to finish.
if err = e.Finish(ctx); err != nil {
logFatal(err, "engine failed to finish execution")

View file

@ -565,11 +565,18 @@ func (e *Engine) notifyResults(ctx context.Context) {
}
atomic.AddUint32(&e.numFoundResults, 1)
// Dedupe results by comparing the detector type, raw result, and source metadata.
// We want to avoid duplicate results with different decoder types, but we also
// want to include duplicate results with the same decoder type.
// Duplicate results with the same decoder type SHOULD have their own entry in the
// results list, this would happen if the same secret is found multiple times.
key := fmt.Sprintf("%s%s%s%+v", r.DetectorType.String(), r.Raw, r.RawV2, r.SourceMetadata)
if _, ok := e.dedupeCache.Get(key); ok {
continue
if val, ok := e.dedupeCache.Get(key); ok {
if res, ok := val.(detectorspb.DecoderType); ok && res != r.DecoderType {
continue
}
}
e.dedupeCache.Add(key, struct{}{})
e.dedupeCache.Add(key, r.DecoderType)
if r.Verified {
atomic.AddUint64(&e.metrics.VerifiedSecretsFound, 1)

View file

@ -2,8 +2,13 @@ package engine
import (
"fmt"
"path/filepath"
"testing"
"time"
"github.com/stretchr/testify/assert"
"github.com/trufflesecurity/trufflehog/v3/pkg/context"
"github.com/trufflesecurity/trufflehog/v3/pkg/decoders"
"github.com/trufflesecurity/trufflehog/v3/pkg/detectors"
"github.com/trufflesecurity/trufflehog/v3/pkg/pb/sourcespb"
@ -171,3 +176,32 @@ func BenchmarkSupportsLineNumbersLoop(b *testing.B) {
_ = SupportsLineNumbers(sourceType)
}
}
// TestEngine_DuplicatSecrets is a test that detects ALL duplicate secrets with the same decoder.
func TestEngine_DuplicatSecrets(t *testing.T) {
ctx := context.Background()
absPath, err := filepath.Abs("./testdata")
assert.Nil(t, err)
ctx, cancel := context.WithTimeout(ctx, 10*time.Second)
defer cancel()
e, err := Start(ctx,
WithConcurrency(1),
WithDecoders(decoders.DefaultDecoders()...),
WithDetectors(true, DefaultDetectors()...),
WithPrinter(new(discardPrinter)),
)
assert.Nil(t, err)
cfg := sources.FilesystemConfig{Paths: []string{absPath}}
if err := e.ScanFileSystem(ctx, cfg); err != nil {
return
}
// Wait for all the chunks to be processed.
assert.Nil(t, e.Finish(ctx))
want := uint64(5)
assert.Equal(t, want, e.GetMetrics().UnverifiedSecretsFound)
}

6
pkg/engine/testdata/secrets.txt vendored Normal file
View file

@ -0,0 +1,6 @@
1 aws AKIAWARWQKZNHMZBLY4I
2 secret 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
sentry 27ac84f4bcdb4fca9701f4d6f6f58cd7d96b69c9d9754d40800645a51d668f90
sentry 27ac84f4bcdb4fca9701f4d6f6f58cd7d96b69c9d9754d40800645a51d668f90
sentry 27ac84f4bcdb4fca9701f4d6f6f58cd7d96b69c9d9754d40800645a51d668f90
sentry 27ac84f4bcdb4fca9701f4d6f6f58cd7d96b69c9d9754d40800645a51d668f90