Improve fp ignore logic (#2351)

* forgot field change

* use aho corasick for filter

* reduce wordlist sensitivity
This commit is contained in:
Dustin Decker 2024-01-29 11:28:46 -08:00 committed by GitHub
parent 303e191f38
commit 7befefd369
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
6 changed files with 61 additions and 52 deletions

2
go.mod
View file

@ -39,6 +39,7 @@ require (
github.com/go-sql-driver/mysql v1.7.1
github.com/gobwas/glob v0.2.3
github.com/golang-jwt/jwt v3.2.2+incompatible
github.com/golang-jwt/jwt/v4 v4.5.0
github.com/google/go-cmp v0.6.0
github.com/google/go-containerregistry v0.17.0
github.com/google/go-github/v42 v42.0.0
@ -166,7 +167,6 @@ require (
github.com/go-ole/go-ole v1.2.6 // indirect
github.com/goccy/go-json v0.10.0 // indirect
github.com/godbus/dbus v0.0.0-20190726142602-4481cbc300e2 // indirect
github.com/golang-jwt/jwt/v4 v4.5.0 // indirect
github.com/golang-sql/civil v0.0.0-20190719163853-cb61b32ac6fe // indirect
github.com/golang-sql/sqlexp v0.1.0 // indirect
github.com/golang/groupcache v0.0.0-20210331224755-41bb18bfe9da // indirect

View file

@ -52,7 +52,8 @@ func (s Scanner) FromData(ctx context.Context, verify bool, data []byte) (result
s1 := detectors.Result{
DetectorType: detectorspb.DetectorType_AzureSearchQueryKey,
Raw: []byte(resMatch + resUrlMatch),
Raw: []byte(resMatch),
RawV2: []byte(resMatch + resUrlMatch),
}
if verify {
client := s.client

View file

@ -4,12 +4,7 @@ array
uint
boolean
config
/>
</
\n
\r
parse
()
func
param
cancel
@ -27,7 +22,6 @@ space
ident
block
type
\"
index
case
safe
@ -87,8 +81,6 @@ keyword
trace
truncate
group
a-z
0-9
href
scale
model
@ -106,26 +98,18 @@ close
defer
start
;var
":
storage
blob
cred
${
math
.xml
conflict
];
$(
-{{
hack
-v1
-v2
package
contract
schema
vec<
ed25519
(&
prefix
suffix
compress
@ -177,7 +161,6 @@ error
revoke
encrypt
binary
md5
2018-
2019-
2020-
@ -188,14 +171,12 @@ root
readon
test
2048
1<<
match
private
key_
aes256
aes128
state
...
alloc
proto
term
@ -281,8 +262,6 @@ k8s.
role
application
explic
[[
]]
random
DES3
3DES
@ -295,7 +274,6 @@ tag:
extend
split
option
t=0
fontsize
&quot;
keyboard
@ -307,4 +285,3 @@ develop
master
slave
secondary
---

View file

@ -6,6 +6,8 @@ import (
"strings"
"unicode"
"unicode/utf8"
ahocorasick "github.com/BobuSumisu/aho-corasick"
)
var DefaultFalsePositives = []FalsePositive{"example", "xxxxxx", "aaaaaa", "abcde", "00000", "sample", "www"}
@ -21,16 +23,21 @@ var wordList []byte
//go:embed "programmingbooks.txt"
var programmingBookWords []byte
type Wordlists struct {
wordList map[string]struct{}
badList map[string]struct{}
programmingBookWords map[string]struct{}
}
var filter *ahocorasick.Trie
var FalsePositiveWordlists = Wordlists{
wordList: bytesToCleanWordList(wordList),
badList: bytesToCleanWordList(badList),
programmingBookWords: bytesToCleanWordList(programmingBookWords),
func init() {
builder := ahocorasick.NewTrieBuilder()
wordList := bytesToCleanWordList(wordList)
builder.AddStrings(wordList)
badList := bytesToCleanWordList(badList)
builder.AddStrings(badList)
programmingBookWords := bytesToCleanWordList(programmingBookWords)
builder.AddStrings(programmingBookWords)
filter = builder.Build()
}
// IsKnownFalsePositives will not return a valid secret finding if any of the disqualifying conditions are met
@ -48,21 +55,11 @@ func IsKnownFalsePositive(match string, falsePositives []FalsePositive, wordChec
}
if wordCheck {
// check against common substring badlist
if _, ok := FalsePositiveWordlists.badList[lower]; ok {
if filter.MatchFirstString(lower) != nil {
return true
}
}
// check for dictionary word substrings
if _, ok := FalsePositiveWordlists.wordList[lower]; ok {
return true
}
// check for programming book token substrings
if _, ok := FalsePositiveWordlists.programmingBookWords[lower]; ok {
return true
}
}
return false
}
@ -76,14 +73,19 @@ func HasDigit(key string) bool {
return false
}
func bytesToCleanWordList(data []byte) map[string]struct{} {
func bytesToCleanWordList(data []byte) []string {
words := make(map[string]struct{})
for _, word := range strings.Split(string(data), "\n") {
if strings.TrimSpace(word) != "" {
words[strings.TrimSpace(strings.ToLower(word))] = struct{}{}
}
}
return words
wordList := make([]string, 0, len(words))
for word := range words {
wordList = append(wordList, word)
}
return wordList
}
func StringShannonEntropy(input string) float64 {

View file

@ -12,6 +12,7 @@ func TestIsFalsePositive(t *testing.T) {
type args struct {
match string
falsePositives []FalsePositive
useWordlist bool
}
tests := []struct {
name string
@ -23,21 +24,50 @@ func TestIsFalsePositive(t *testing.T) {
args: args{
match: "example",
falsePositives: DefaultFalsePositives,
useWordlist: false,
},
want: true,
},
{
name: "fp - in wordlist",
args: args{
match: "sdfdsfprivatesfsdfd",
falsePositives: DefaultFalsePositives,
useWordlist: true,
},
want: true,
},
{
name: "fp - not in wordlist",
args: args{
match: "sdfdsfsfsdfd",
falsePositives: DefaultFalsePositives,
useWordlist: true,
},
want: false,
},
{
name: "not fp",
args: args{
match: "notafp123",
falsePositives: DefaultFalsePositives,
useWordlist: false,
},
want: false,
},
{
name: "fp - in wordlist exact match",
args: args{
match: "private",
falsePositives: DefaultFalsePositives,
useWordlist: true,
},
want: true,
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
if got := IsKnownFalsePositive(tt.args.match, tt.args.falsePositives, false); got != tt.want {
if got := IsKnownFalsePositive(tt.args.match, tt.args.falsePositives, tt.args.useWordlist); got != tt.want {
t.Errorf("IsKnownFalsePositive() = %v, want %v", got, tt.want)
}
})

View file

@ -16,7 +16,6 @@ ${uname
$value
$x:expr
+3=err
a;
a][appendix_a
abbreviated
abcabcabc