feat(detectors): log falsepositive reason (#2969)

This commit is contained in:
Richard Gomez 2024-06-14 11:26:05 -04:00 committed by GitHub
parent 235b27964b
commit 2964b3b2d2
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
15 changed files with 52 additions and 41 deletions

View file

@ -8,11 +8,12 @@ import (
"regexp"
"strings"
"golang.org/x/sync/errgroup"
"github.com/trufflesecurity/trufflehog/v3/pkg/common"
"github.com/trufflesecurity/trufflehog/v3/pkg/detectors"
"github.com/trufflesecurity/trufflehog/v3/pkg/pb/custom_detectorspb"
"github.com/trufflesecurity/trufflehog/v3/pkg/pb/detectorspb"
"golang.org/x/sync/errgroup"
)
// The maximum number of matches from one chunk. This const is used when
@ -110,8 +111,8 @@ func (c *CustomRegexWebhook) FromData(ctx context.Context, verify bool, data []b
return results, nil
}
func (c *CustomRegexWebhook) IsFalsePositive(_ detectors.Result) bool {
return false
func (c *CustomRegexWebhook) IsFalsePositive(_ detectors.Result) (bool, string) {
return false, ""
}
func (c *CustomRegexWebhook) createResults(ctx context.Context, match map[string][]string, verify bool, results chan<- detectors.Result) error {

View file

@ -107,8 +107,8 @@ func (s Scanner) FromData(ctx context.Context, verify bool, data []byte) (result
return results, nil
}
func (s Scanner) IsFalsePositive(_ detectors.Result) bool {
return false
func (s Scanner) IsFalsePositive(_ detectors.Result) (bool, string) {
return false, ""
}
func (s Scanner) Type() detectorspb.DetectorType {

View file

@ -96,8 +96,8 @@ func (s Scanner) FromData(ctx context.Context, verify bool, data []byte) (result
return results, nil
}
func (s Scanner) IsFalsePositive(_ detectors.Result) bool {
return false
func (s Scanner) IsFalsePositive(_ detectors.Result) (bool, string) {
return false, ""
}
func (s Scanner) Type() detectorspb.DetectorType {

View file

@ -17,7 +17,10 @@ var DefaultFalsePositives = []FalsePositive{"example", "xxxxxx", "aaaaaa", "abcd
type FalsePositive string
type CustomFalsePositiveChecker interface {
IsFalsePositive(result Result) bool
// IsFalsePositive returns two values:
// 1. Whether the result is a false positive.
// 2. If #1 is `true`, the reason why.
IsFalsePositive(result Result) (bool, string)
}
//go:embed "badlist.txt"
@ -46,38 +49,40 @@ func init() {
filter = builder.Build()
}
func GetFalsePositiveCheck(detector Detector) func(Result) bool {
func GetFalsePositiveCheck(detector Detector) func(Result) (bool, string) {
checker, ok := detector.(CustomFalsePositiveChecker)
if ok {
return checker.IsFalsePositive
}
return func(res Result) bool {
return func(res Result) (bool, string) {
return IsKnownFalsePositive(string(res.Raw), DefaultFalsePositives, true)
}
}
// IsKnownFalsePositive will not return a valid secret finding if any of the disqualifying conditions are met
// Currently that includes: No number, english word in key, or matches common example patterns.
// IsKnownFalsePositive returns whether a finding is (likely) a known false positive, and the reason for the detection.
//
// Currently, this includes: english word in key or matches common example patterns.
// Only the secret key material should be passed into this function
func IsKnownFalsePositive(match string, falsePositives []FalsePositive, wordCheck bool) bool {
func IsKnownFalsePositive(match string, falsePositives []FalsePositive, wordCheck bool) (bool, string) {
if !utf8.ValidString(match) {
return true
return true, "invalid utf8"
}
lower := strings.ToLower(match)
for _, fp := range falsePositives {
if strings.Contains(lower, string(fp)) {
return true
fps := string(fp)
if strings.Contains(lower, fps) {
return true, "matches term: " + fps
}
}
if wordCheck {
if filter.MatchFirstString(lower) != nil {
return true
if m := filter.MatchFirstString(lower); m != nil {
return true, "matches wordlist: " + m.MatchString()
}
}
return false
return false, ""
}
func HasDigit(key string) bool {
@ -153,10 +158,11 @@ func FilterKnownFalsePositives(ctx context.Context, detector Detector, results [
for _, result := range results {
if !result.Verified && result.Raw != nil {
if !isFalsePositive(result) {
isFp, reason := isFalsePositive(result)
if !isFp {
filteredResults = append(filteredResults, result)
} else if shouldLog {
ctx.Logger().Info("Filtered out known false positive", "result", result)
ctx.Logger().Info("Filtered out known false positive", "result", result, "reason", reason)
}
} else {
filteredResults = append(filteredResults, result)

View file

@ -9,6 +9,7 @@ import (
"testing"
"github.com/stretchr/testify/assert"
logContext "github.com/trufflesecurity/trufflehog/v3/pkg/context"
"github.com/trufflesecurity/trufflehog/v3/pkg/pb/detectorspb"
)
@ -28,7 +29,7 @@ func (d fakeDetector) Type() detectorspb.DetectorType {
return detectorspb.DetectorType(0)
}
func (d customFalsePositiveChecker) IsFalsePositive(result Result) bool {
func (d customFalsePositiveChecker) IsFalsePositive(result Result) (bool, string) {
return IsKnownFalsePositive(string(result.Raw), []FalsePositive{"a specific magic string"}, false)
}
@ -120,7 +121,7 @@ func TestIsFalsePositive(t *testing.T) {
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
if got := IsKnownFalsePositive(tt.args.match, tt.args.falsePositives, tt.args.useWordlist); got != tt.want {
if got, _ := IsKnownFalsePositive(tt.args.match, tt.args.falsePositives, tt.args.useWordlist); got != tt.want {
t.Errorf("IsKnownFalsePositive() = %v, want %v", got, tt.want)
}
})

View file

@ -103,7 +103,7 @@ func (s Scanner) FromData(ctx context.Context, verify bool, data []byte) (result
return results, nil
}
func (s Scanner) IsFalsePositive(result detectors.Result) bool {
func (s Scanner) IsFalsePositive(result detectors.Result) (bool, string) {
return detectors.IsKnownFalsePositive(string(result.Raw), []detectors.FalsePositive{"@ftp.freebsd.org"}, false)
}

View file

@ -133,8 +133,8 @@ func (s Scanner) FromData(ctx context.Context, verify bool, data []byte) (result
return
}
func (s Scanner) IsFalsePositive(_ detectors.Result) bool {
return false
func (s Scanner) IsFalsePositive(_ detectors.Result) (bool, string) {
return false, ""
}
func (s Scanner) Type() detectorspb.DetectorType {

View file

@ -74,7 +74,7 @@ func (s Scanner) FromData(ctx context.Context, verify bool, data []byte) (result
// Note that this false positive check happens **before** verification! I don't know why it's written this way
// but that's why this logic wasn't moved into a CustomFalsePositiveChecker implementation.
specificFPs := []detectors.FalsePositive{"github commit"}
if detectors.IsKnownFalsePositive(token, specificFPs, false) {
if isFp, _ := detectors.IsKnownFalsePositive(token, specificFPs, false); isFp {
continue
}

View file

@ -106,8 +106,8 @@ matchLoop:
return
}
func (s Scanner) IsFalsePositive(_ detectors.Result) bool {
return false
func (s Scanner) IsFalsePositive(_ detectors.Result) (bool, string) {
return false, ""
}
func tryRedactAnonymousJDBC(conn string) string {

View file

@ -73,8 +73,8 @@ func (s Scanner) FromData(ctx context.Context, verify bool, data []byte) (result
return results, nil
}
func (s Scanner) IsFalsePositive(_ detectors.Result) bool {
return false
func (s Scanner) IsFalsePositive(_ detectors.Result) (bool, string) {
return false, ""
}
func isErrDeterminate(err error) bool {

View file

@ -148,8 +148,8 @@ func (s Scanner) FromData(ctx context.Context, verify bool, data []byte) ([]dete
return results, nil
}
func (s Scanner) IsFalsePositive(_ detectors.Result) bool {
return false
func (s Scanner) IsFalsePositive(_ detectors.Result) (bool, string) {
return false, ""
}
func findUriMatches(data []byte) []map[string]string {

View file

@ -151,7 +151,9 @@ func (s Scanner) FromData(ctx context.Context, verify bool, data []byte) (result
return results, nil
}
func (s Scanner) IsFalsePositive(_ detectors.Result) bool { return false }
func (s Scanner) IsFalsePositive(_ detectors.Result) (bool, string) {
return false, ""
}
type result struct {
CertificateURLs []string

View file

@ -13,7 +13,7 @@ import (
"github.com/trufflesecurity/trufflehog/v3/pkg/pb/detectorspb"
)
type Scanner struct{
type Scanner struct {
detectors.DefaultMultiPartCredentialProvider
}
@ -91,8 +91,8 @@ func (s Scanner) FromData(ctx context.Context, verify bool, data []byte) (result
}
func (s Scanner) IsFalsePositive(_ detectors.Result) bool {
return false
func (s Scanner) IsFalsePositive(_ detectors.Result) (bool, string) {
return false, ""
}
type shopifyTokenAccessScopes struct {

View file

@ -103,8 +103,8 @@ func (s Scanner) FromData(ctx context.Context, verify bool, data []byte) (result
return results, nil
}
func (s Scanner) IsFalsePositive(_ detectors.Result) bool {
return false
func (s Scanner) IsFalsePositive(_ detectors.Result) (bool, string) {
return false, ""
}
func verifyURL(ctx context.Context, client *http.Client, u *url.URL) (bool, error) {

View file

@ -1056,7 +1056,7 @@ func (e *Engine) processResult(
ctx context.Context,
data detectableChunk,
res detectors.Result,
isFalsePositive func(detectors.Result) bool,
isFalsePositive func(detectors.Result) (bool, string),
) {
ignoreLinePresent := false
if SupportsLineNumbers(data.chunk.SourceType) {
@ -1081,7 +1081,8 @@ func (e *Engine) processResult(
secret.DecoderType = data.decoder
if !res.Verified && res.Raw != nil {
secret.IsWordlistFalsePositive = isFalsePositive(res)
isFp, _ := isFalsePositive(res)
secret.IsWordlistFalsePositive = isFp
}
e.results <- secret