Expose detector-specific false positive logic (#2743)

This PR:

Creates an optional interface that detectors can use to customize their false positive detection
Implements this interface on detectors that have custom logic
In most cases this "custom logic" is simply a no-op because the detector does not participate in false positive detection
Eliminates inline (old-style) false positive exclusion in a few detectors that #2643 missed
This commit is contained in:
Cody Rose 2024-04-30 16:10:26 -04:00 committed by GitHub
parent dc930f9594
commit 2f7029bc4d
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
21 changed files with 135 additions and 71 deletions

View file

@ -29,6 +29,7 @@ type CustomRegexWebhook struct {
// Ensure the Scanner satisfies the interface at compile time.
var _ detectors.Detector = (*CustomRegexWebhook)(nil)
var _ detectors.CustomFalsePositiveChecker = (*CustomRegexWebhook)(nil)
// NewWebhookCustomRegex initializes and validates a CustomRegexWebhook. An
// unexported type is intentionally returned here to ensure the values have
@ -109,6 +110,10 @@ func (c *CustomRegexWebhook) FromData(ctx context.Context, verify bool, data []b
return results, nil
}
func (c *CustomRegexWebhook) IsFalsePositive(_ detectors.Result) bool {
return false
}
func (c *CustomRegexWebhook) createResults(ctx context.Context, match map[string][]string, verify bool, results chan<- detectors.Result) error {
if common.IsDone(ctx) {
// TODO: Log we're possibly leaving out results.

View file

@ -23,6 +23,7 @@ type Scanner struct {
// Ensure the Scanner satisfies the interface at compile time.
var _ detectors.Detector = (*Scanner)(nil)
var _ detectors.CustomFalsePositiveChecker = (*Scanner)(nil)
var (
defaultClient = common.SaneHttpClient()
@ -105,6 +106,10 @@ func (s Scanner) FromData(ctx context.Context, verify bool, data []byte) (result
return results, nil
}
func (s Scanner) IsFalsePositive(_ detectors.Result) bool {
return false
}
func (s Scanner) Type() detectorspb.DetectorType {
return detectorspb.DetectorType_AzureBatch
}

View file

@ -19,6 +19,7 @@ type Scanner struct {
// Ensure the Scanner satisfies the interface at compile time.
var _ detectors.Detector = (*Scanner)(nil)
var _ detectors.CustomFalsePositiveChecker = (*Scanner)(nil)
var (
defaultClient = common.SaneHttpClient()
@ -94,6 +95,10 @@ func (s Scanner) FromData(ctx context.Context, verify bool, data []byte) (result
return results, nil
}
func (s Scanner) IsFalsePositive(_ detectors.Result) bool {
return false
}
func (s Scanner) Type() detectorspb.DetectorType {
return detectorspb.DetectorType_AzureContainerRegistry
}

View file

@ -8,8 +8,6 @@ import (
"unicode/utf8"
ahocorasick "github.com/BobuSumisu/aho-corasick"
"github.com/trufflesecurity/trufflehog/v3/pkg/pb/detectorspb"
"github.com/trufflesecurity/trufflehog/v3/pkg/context"
)
@ -17,6 +15,10 @@ var DefaultFalsePositives = []FalsePositive{"example", "xxxxxx", "aaaaaa", "abcd
type FalsePositive string
type CustomFalsePositiveChecker interface {
IsFalsePositive(result Result) bool
}
//go:embed "badlist.txt"
var badList []byte
@ -43,6 +45,17 @@ func init() {
filter = builder.Build()
}
func GetFalsePositiveCheck(detector Detector) func(Result) bool {
checker, ok := detector.(CustomFalsePositiveChecker)
if ok {
return checker.IsFalsePositive
}
return func(res Result) bool {
return IsKnownFalsePositive(string(res.Raw), DefaultFalsePositives, true)
}
}
// IsKnownFalsePositive will not return a valid secret finding if any of the disqualifying conditions are met
// Currently that includes: No number, english word in key, or matches common example pattens.
// Only the secret key material should be passed into this function
@ -132,34 +145,17 @@ func FilterResultsWithEntropy(ctx context.Context, results []Result, entropy flo
}
// FilterKnownFalsePositives filters out known false positives from the results.
func FilterKnownFalsePositives(ctx context.Context, results []Result, falsePositives []FalsePositive, wordCheck bool, shouldLog bool) []Result {
func FilterKnownFalsePositives(ctx context.Context, detector Detector, results []Result, shouldLog bool) []Result {
var filteredResults []Result
isFalsePositive := GetFalsePositiveCheck(detector)
for _, result := range results {
if !result.Verified {
switch result.DetectorType {
case detectorspb.DetectorType_CustomRegex:
if !result.Verified && result.Raw != nil {
if !isFalsePositive(result) {
filteredResults = append(filteredResults, result)
case detectorspb.DetectorType_GCP,
detectorspb.DetectorType_URI,
detectorspb.DetectorType_AzureBatch,
detectorspb.DetectorType_AzureContainerRegistry,
detectorspb.DetectorType_Shopify,
detectorspb.DetectorType_Postgres,
detectorspb.DetectorType_MongoDB,
detectorspb.DetectorType_JDBC:
filteredResults = append(filteredResults, result)
default:
if result.Raw != nil {
if !IsKnownFalsePositive(string(result.Raw), falsePositives, wordCheck) {
filteredResults = append(filteredResults, result)
} else {
if shouldLog {
ctx.Logger().Info("Filtered out known false positive", "result", result)
}
}
} else {
filteredResults = append(filteredResults, result)
}
} else if shouldLog {
ctx.Logger().Info("Filtered out known false positive", "result", result)
}
} else {
filteredResults = append(filteredResults, result)

View file

@ -4,10 +4,63 @@
package detectors
import (
"context"
_ "embed"
"testing"
"github.com/stretchr/testify/assert"
logContext "github.com/trufflesecurity/trufflehog/v3/pkg/context"
"github.com/trufflesecurity/trufflehog/v3/pkg/pb/detectorspb"
)
type fakeDetector struct{}
type customFalsePositiveChecker struct{ fakeDetector }
func (d fakeDetector) FromData(ctx context.Context, verify bool, data []byte) ([]Result, error) {
return nil, nil
}
func (d fakeDetector) Keywords() []string {
return nil
}
func (d fakeDetector) Type() detectorspb.DetectorType {
return detectorspb.DetectorType(0)
}
func (d customFalsePositiveChecker) IsFalsePositive(result Result) bool {
return IsKnownFalsePositive(string(result.Raw), []FalsePositive{"a specific magic string"}, false)
}
func TestFilterKnownFalsePositives_DefaultLogic(t *testing.T) {
results := []Result{
{Raw: []byte("00000")}, // "default" false positive list
{Raw: []byte("number")}, // from wordlist
{Raw: []byte("hga8adshla3434g")}, // real secret
}
expected := []Result{
{Raw: []byte("hga8adshla3434g")},
}
filtered := FilterKnownFalsePositives(logContext.Background(), fakeDetector{}, results, false)
assert.ElementsMatch(t, expected, filtered)
}
func TestFilterKnownFalsePositives_CustomLogic(t *testing.T) {
results := []Result{
{Raw: []byte("a specific magic string")}, // specific target
{Raw: []byte("00000")}, // "default" false positive list
{Raw: []byte("number")}, // from wordlist
{Raw: []byte("hga8adshla3434g")}, // real secret
}
expected := []Result{
{Raw: []byte("00000")},
{Raw: []byte("number")},
{Raw: []byte("hga8adshla3434g")},
}
filtered := FilterKnownFalsePositives(logContext.Background(), customFalsePositiveChecker{}, results, false)
assert.ElementsMatch(t, expected, filtered)
}
func TestIsFalsePositive(t *testing.T) {
type args struct {
match string

View file

@ -30,6 +30,7 @@ type Scanner struct {
// Ensure the Scanner satisfies the interface at compile time.
var _ detectors.Detector = (*Scanner)(nil)
var _ detectors.CustomFalsePositiveChecker = (*Scanner)(nil)
var (
keyPat = regexp.MustCompile(`\bftp://[\S]{3,50}:([\S]{3,50})@[-.%\w\/:]+\b`)
@ -96,16 +97,16 @@ func (s Scanner) FromData(ctx context.Context, verify bool, data []byte) (result
}
}
if detectors.IsKnownFalsePositive(string(s1.Raw), []detectors.FalsePositive{"@ftp.freebsd.org"}, false) {
continue
}
results = append(results, s1)
}
return results, nil
}
func (s Scanner) IsFalsePositive(result detectors.Result) bool {
return detectors.IsKnownFalsePositive(string(result.Raw), []detectors.FalsePositive{"@ftp.freebsd.org"}, false)
}
func isErrDeterminate(e error) bool {
ftpErr := &textproto.Error{}
return errors.As(e, &ftpErr) && ftpErr.Code == ftpNotLoggedIn

View file

@ -18,6 +18,7 @@ type Scanner struct{}
// Ensure the Scanner satisfies the interface at compile time.
var _ detectors.Detector = (*Scanner)(nil)
var _ detectors.CustomFalsePositiveChecker = (*Scanner)(nil)
var (
keyPat = regexp.MustCompile(`\{[^{]+auth_provider_x509_cert_url[^}]+\}`)
@ -120,6 +121,10 @@ func (s Scanner) FromData(ctx context.Context, verify bool, data []byte) (result
return
}
func (s Scanner) IsFalsePositive(_ detectors.Result) bool {
return false
}
func (s Scanner) Type() detectorspb.DetectorType {
return detectorspb.DetectorType_GCP
}

View file

@ -82,11 +82,6 @@ func (s Scanner) FromData(ctx context.Context, verify bool, data []byte) (result
s1.SetVerificationError(verificationErr, match)
}
// This function will check false positives for common test words, but also it will make sure the key appears 'random' enough to be a real key.
if !s1.Verified && detectors.IsKnownFalsePositive(string(s1.Raw), detectors.DefaultFalsePositives, true) {
continue
}
results = append(results, s1)
}

View file

@ -75,10 +75,6 @@ func (s Scanner) FromData(ctx context.Context, verify bool, data []byte) (result
// Least expensive-> most expensive filters.
// Substrings, then patterns.
if detectors.IsKnownFalsePositive(token, detectors.DefaultFalsePositives, true) {
continue
}
// toss any that match regexes
if hasReMatch(s.excludeMatchers, token) {
continue

View file

@ -69,6 +69,8 @@ func (s Scanner) FromData(ctx context.Context, verify bool, data []byte) (result
token := match[1]
// Note that this false positive check happens **before** verification! I don't know why it's written this way
// but that's why this logic wasn't moved into a CustomFalsePositiveChecker implementation.
specificFPs := []detectors.FalsePositive{"github commit"}
if detectors.IsKnownFalsePositive(token, specificFPs, false) {
continue

View file

@ -83,9 +83,6 @@ func (s Scanner) FromData(ctx context.Context, verify bool, data []byte) (result
}
}
if !s1.Verified && detectors.IsKnownFalsePositive(string(s1.Raw), detectors.DefaultFalsePositives, true) {
continue
}
results = append(results, s1)
}

View file

@ -45,6 +45,7 @@ func WithIgnorePattern(ignoreStrings []string) func(*Scanner) {
// Ensure the Scanner satisfies the interface at compile time.
var _ detectors.Detector = (*Scanner)(nil)
var _ detectors.CustomFalsePositiveChecker = (*Scanner)(nil)
var (
keyPat = regexp.MustCompile(`(?i)jdbc:[\w]{3,10}:[^\s"']{0,512}`)
@ -98,14 +99,16 @@ matchLoop:
// TODO: specialized redaction
}
results = append(results, s)
}
return
}
func (s Scanner) IsFalsePositive(_ detectors.Result) bool {
return false
}
func tryRedactAnonymousJDBC(conn string) string {
if s, ok := tryRedactURLParams(conn); ok {
return s

View file

@ -83,11 +83,6 @@ func (s Scanner) FromData(ctx context.Context, verify bool, data []byte) (result
}
}
// This function will check false positives for common test words, but also it will make sure the key appears 'random' enough to be a real key.
if !s1.Verified && detectors.IsKnownFalsePositive(resMatch, detectors.DefaultFalsePositives, true) {
continue
}
results = append(results, s1)
}

View file

@ -24,6 +24,7 @@ type Scanner struct {
// Ensure the Scanner satisfies the interface at compile time.
var _ detectors.Detector = (*Scanner)(nil)
var _ detectors.CustomFalsePositiveChecker = (*Scanner)(nil)
var (
defaultTimeout = 2 * time.Second
@ -72,6 +73,10 @@ func (s Scanner) FromData(ctx context.Context, verify bool, data []byte) (result
return results, nil
}
func (s Scanner) IsFalsePositive(_ detectors.Result) bool {
return false
}
func isErrDeterminate(err error) bool {
switch e := err.(type) {
case topology.ConnectionError:

View file

@ -60,11 +60,6 @@ func (s Scanner) FromData(ctx context.Context, verify bool, data []byte) (result
s1.SetVerificationError(verificationErr, match)
}
// This function will check false positives for common test words, but also it will make sure the key appears 'random' enough to be a real key.
if !s1.Verified && detectors.IsKnownFalsePositive(match, detectors.DefaultFalsePositives, true) {
continue
}
results = append(results, s1)
}

View file

@ -60,11 +60,6 @@ func (s Scanner) FromData(ctx context.Context, verify bool, data []byte) (result
s1.SetVerificationError(verificationErr, match)
}
// This function will check false positives for common test words, but also it will make sure the key appears 'random' enough to be a real key.
if !s1.Verified && detectors.IsKnownFalsePositive(match, detectors.DefaultFalsePositives, true) {
continue
}
results = append(results, s1)
}

View file

@ -55,6 +55,9 @@ type Scanner struct {
detectLoopback bool // Automated tests run against localhost, but we want to ignore those results in the wild
}
var _ detectors.Detector = (*Scanner)(nil)
var _ detectors.CustomFalsePositiveChecker = (*Scanner)(nil)
func (s Scanner) Keywords() []string {
return []string{"postgres"}
}
@ -144,6 +147,10 @@ func (s Scanner) FromData(ctx context.Context, verify bool, data []byte) ([]dete
return results, nil
}
func (s Scanner) IsFalsePositive(_ detectors.Result) bool {
return false
}
func findUriMatches(data []byte) []map[string]string {
var matches []map[string]string
for _, uri := range uriPattern.FindAll(data, -1) {

View file

@ -3,10 +3,11 @@ package shopify
import (
"context"
"encoding/json"
regexp "github.com/wasilibs/go-re2"
"net/http"
"strings"
regexp "github.com/wasilibs/go-re2"
"github.com/trufflesecurity/trufflehog/v3/pkg/common"
"github.com/trufflesecurity/trufflehog/v3/pkg/detectors"
"github.com/trufflesecurity/trufflehog/v3/pkg/pb/detectorspb"
@ -16,6 +17,7 @@ type Scanner struct{}
// Ensure the Scanner satisfies the interface at compile time
var _ detectors.Detector = (*Scanner)(nil)
var _ detectors.CustomFalsePositiveChecker = (*Scanner)(nil)
var (
client = common.SaneHttpClient()
@ -87,6 +89,10 @@ func (s Scanner) FromData(ctx context.Context, verify bool, data []byte) (result
}
func (s Scanner) IsFalsePositive(_ detectors.Result) bool {
return false
}
type shopifyTokenAccessScopes struct {
AccessScopes []struct {
Handle string `json:"handle"`

View file

@ -2,12 +2,13 @@ package uri
import (
"context"
regexp "github.com/wasilibs/go-re2"
"net/http"
"net/url"
"strings"
"time"
regexp "github.com/wasilibs/go-re2"
"github.com/trufflesecurity/trufflehog/v3/pkg/common"
"github.com/trufflesecurity/trufflehog/v3/pkg/detectors"
"github.com/trufflesecurity/trufflehog/v3/pkg/pb/detectorspb"
@ -20,6 +21,7 @@ type Scanner struct {
// Ensure the Scanner satisfies the interface at compile time.
var _ detectors.Detector = (*Scanner)(nil)
var _ detectors.CustomFalsePositiveChecker = (*Scanner)(nil)
var (
keyPat = regexp.MustCompile(`\b(?:https?:)?\/\/[\S]{3,50}:([\S]{3,50})@[-.%\w\/:]+\b`)
@ -101,6 +103,10 @@ func (s Scanner) FromData(ctx context.Context, verify bool, data []byte) (result
return results, nil
}
func (s Scanner) IsFalsePositive(_ detectors.Result) bool {
return false
}
func verifyURL(ctx context.Context, client *http.Client, u *url.URL) (bool, error) {
// defuse most SSRF payloads
u.Path = strings.TrimSuffix(u.Path, "/")

View file

@ -69,14 +69,6 @@ func (s Scanner) FromData(ctx context.Context, verify bool, data []byte) (result
s1.SetVerificationError(verificationErr, idMatch, secretMatch)
}
// This function will check false positives for common test words, but also it will make sure the key appears 'random' enough to be a real key.
if !s1.Verified {
if detectors.IsKnownFalsePositive(idMatch, detectors.DefaultFalsePositives, true) ||
detectors.IsKnownFalsePositive(secretMatch, detectors.DefaultFalsePositives, true) {
continue
}
}
results = append(results, s1)
// If we've found a verified match with this ID, we don't need to look for anymore. So move on to the next ID.

View file

@ -831,7 +831,7 @@ func (e *Engine) detectChunk(ctx context.Context, data detectableChunk) {
results = detectors.CleanResults(results)
}
results = detectors.FilterKnownFalsePositives(ctx, results, detectors.DefaultFalsePositives, true, e.logFilteredUnverified)
results = detectors.FilterKnownFalsePositives(ctx, data.detector, results, e.logFilteredUnverified)
if e.filterEntropy != nil {
results = detectors.FilterResultsWithEntropy(ctx, results, *e.filterEntropy, e.logFilteredUnverified)