Improve fp ignore logic (#2351)

* forgot field change

* use aho corasick for filter

* reduce wordlist sensitivity
This commit is contained in:
Dustin Decker 2024-01-29 11:28:46 -08:00 committed by GitHub
parent 303e191f38
commit 7befefd369
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
6 changed files with 61 additions and 52 deletions

2
go.mod
View file

@ -39,6 +39,7 @@ require (
github.com/go-sql-driver/mysql v1.7.1 github.com/go-sql-driver/mysql v1.7.1
github.com/gobwas/glob v0.2.3 github.com/gobwas/glob v0.2.3
github.com/golang-jwt/jwt v3.2.2+incompatible github.com/golang-jwt/jwt v3.2.2+incompatible
github.com/golang-jwt/jwt/v4 v4.5.0
github.com/google/go-cmp v0.6.0 github.com/google/go-cmp v0.6.0
github.com/google/go-containerregistry v0.17.0 github.com/google/go-containerregistry v0.17.0
github.com/google/go-github/v42 v42.0.0 github.com/google/go-github/v42 v42.0.0
@ -166,7 +167,6 @@ require (
github.com/go-ole/go-ole v1.2.6 // indirect github.com/go-ole/go-ole v1.2.6 // indirect
github.com/goccy/go-json v0.10.0 // indirect github.com/goccy/go-json v0.10.0 // indirect
github.com/godbus/dbus v0.0.0-20190726142602-4481cbc300e2 // indirect github.com/godbus/dbus v0.0.0-20190726142602-4481cbc300e2 // indirect
github.com/golang-jwt/jwt/v4 v4.5.0 // indirect
github.com/golang-sql/civil v0.0.0-20190719163853-cb61b32ac6fe // indirect github.com/golang-sql/civil v0.0.0-20190719163853-cb61b32ac6fe // indirect
github.com/golang-sql/sqlexp v0.1.0 // indirect github.com/golang-sql/sqlexp v0.1.0 // indirect
github.com/golang/groupcache v0.0.0-20210331224755-41bb18bfe9da // indirect github.com/golang/groupcache v0.0.0-20210331224755-41bb18bfe9da // indirect

View file

@ -52,7 +52,8 @@ func (s Scanner) FromData(ctx context.Context, verify bool, data []byte) (result
s1 := detectors.Result{ s1 := detectors.Result{
DetectorType: detectorspb.DetectorType_AzureSearchQueryKey, DetectorType: detectorspb.DetectorType_AzureSearchQueryKey,
Raw: []byte(resMatch + resUrlMatch), Raw: []byte(resMatch),
RawV2: []byte(resMatch + resUrlMatch),
} }
if verify { if verify {
client := s.client client := s.client

View file

@ -4,12 +4,7 @@ array
uint uint
boolean boolean
config config
/>
</
\n
\r
parse parse
()
func func
param param
cancel cancel
@ -27,7 +22,6 @@ space
ident ident
block block
type type
\"
index index
case case
safe safe
@ -87,8 +81,6 @@ keyword
trace trace
truncate truncate
group group
a-z
0-9
href href
scale scale
model model
@ -106,26 +98,18 @@ close
defer defer
start start
;var ;var
":
storage storage
blob blob
cred cred
${
math math
.xml .xml
conflict conflict
];
$(
-{{
hack hack
-v1
-v2
package package
contract contract
schema schema
vec< vec<
ed25519 ed25519
(&
prefix prefix
suffix suffix
compress compress
@ -177,7 +161,6 @@ error
revoke revoke
encrypt encrypt
binary binary
md5
2018- 2018-
2019- 2019-
2020- 2020-
@ -188,14 +171,12 @@ root
readon readon
test test
2048 2048
1<<
match match
private private
key_ key_
aes256 aes256
aes128 aes128
state state
...
alloc alloc
proto proto
term term
@ -281,8 +262,6 @@ k8s.
role role
application application
explic explic
[[
]]
random random
DES3 DES3
3DES 3DES
@ -295,7 +274,6 @@ tag:
extend extend
split split
option option
t=0
fontsize fontsize
&quot; &quot;
keyboard keyboard
@ -307,4 +285,3 @@ develop
master master
slave slave
secondary secondary
---

View file

@ -6,6 +6,8 @@ import (
"strings" "strings"
"unicode" "unicode"
"unicode/utf8" "unicode/utf8"
ahocorasick "github.com/BobuSumisu/aho-corasick"
) )
var DefaultFalsePositives = []FalsePositive{"example", "xxxxxx", "aaaaaa", "abcde", "00000", "sample", "www"} var DefaultFalsePositives = []FalsePositive{"example", "xxxxxx", "aaaaaa", "abcde", "00000", "sample", "www"}
@ -21,16 +23,21 @@ var wordList []byte
//go:embed "programmingbooks.txt" //go:embed "programmingbooks.txt"
var programmingBookWords []byte var programmingBookWords []byte
type Wordlists struct { var filter *ahocorasick.Trie
wordList map[string]struct{}
badList map[string]struct{}
programmingBookWords map[string]struct{}
}
var FalsePositiveWordlists = Wordlists{ func init() {
wordList: bytesToCleanWordList(wordList), builder := ahocorasick.NewTrieBuilder()
badList: bytesToCleanWordList(badList),
programmingBookWords: bytesToCleanWordList(programmingBookWords), wordList := bytesToCleanWordList(wordList)
builder.AddStrings(wordList)
badList := bytesToCleanWordList(badList)
builder.AddStrings(badList)
programmingBookWords := bytesToCleanWordList(programmingBookWords)
builder.AddStrings(programmingBookWords)
filter = builder.Build()
} }
// IsKnownFalsePositives will not return a valid secret finding if any of the disqualifying conditions are met // IsKnownFalsePositives will not return a valid secret finding if any of the disqualifying conditions are met
@ -48,21 +55,11 @@ func IsKnownFalsePositive(match string, falsePositives []FalsePositive, wordChec
} }
if wordCheck { if wordCheck {
// check against common substring badlist if filter.MatchFirstString(lower) != nil {
if _, ok := FalsePositiveWordlists.badList[lower]; ok {
return true return true
} }
}
// check for dictionary word substrings
if _, ok := FalsePositiveWordlists.wordList[lower]; ok {
return true
}
// check for programming book token substrings
if _, ok := FalsePositiveWordlists.programmingBookWords[lower]; ok {
return true
}
}
return false return false
} }
@ -76,14 +73,19 @@ func HasDigit(key string) bool {
return false return false
} }
func bytesToCleanWordList(data []byte) map[string]struct{} { func bytesToCleanWordList(data []byte) []string {
words := make(map[string]struct{}) words := make(map[string]struct{})
for _, word := range strings.Split(string(data), "\n") { for _, word := range strings.Split(string(data), "\n") {
if strings.TrimSpace(word) != "" { if strings.TrimSpace(word) != "" {
words[strings.TrimSpace(strings.ToLower(word))] = struct{}{} words[strings.TrimSpace(strings.ToLower(word))] = struct{}{}
} }
} }
return words
wordList := make([]string, 0, len(words))
for word := range words {
wordList = append(wordList, word)
}
return wordList
} }
func StringShannonEntropy(input string) float64 { func StringShannonEntropy(input string) float64 {

View file

@ -12,6 +12,7 @@ func TestIsFalsePositive(t *testing.T) {
type args struct { type args struct {
match string match string
falsePositives []FalsePositive falsePositives []FalsePositive
useWordlist bool
} }
tests := []struct { tests := []struct {
name string name string
@ -23,21 +24,50 @@ func TestIsFalsePositive(t *testing.T) {
args: args{ args: args{
match: "example", match: "example",
falsePositives: DefaultFalsePositives, falsePositives: DefaultFalsePositives,
useWordlist: false,
}, },
want: true, want: true,
}, },
{
name: "fp - in wordlist",
args: args{
match: "sdfdsfprivatesfsdfd",
falsePositives: DefaultFalsePositives,
useWordlist: true,
},
want: true,
},
{
name: "fp - not in wordlist",
args: args{
match: "sdfdsfsfsdfd",
falsePositives: DefaultFalsePositives,
useWordlist: true,
},
want: false,
},
{ {
name: "not fp", name: "not fp",
args: args{ args: args{
match: "notafp123", match: "notafp123",
falsePositives: DefaultFalsePositives, falsePositives: DefaultFalsePositives,
useWordlist: false,
}, },
want: false, want: false,
}, },
{
name: "fp - in wordlist exact match",
args: args{
match: "private",
falsePositives: DefaultFalsePositives,
useWordlist: true,
},
want: true,
},
} }
for _, tt := range tests { for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) { t.Run(tt.name, func(t *testing.T) {
if got := IsKnownFalsePositive(tt.args.match, tt.args.falsePositives, false); got != tt.want { if got := IsKnownFalsePositive(tt.args.match, tt.args.falsePositives, tt.args.useWordlist); got != tt.want {
t.Errorf("IsKnownFalsePositive() = %v, want %v", got, tt.want) t.Errorf("IsKnownFalsePositive() = %v, want %v", got, tt.want)
} }
}) })

View file

@ -16,7 +16,6 @@ ${uname
$value $value
$x:expr $x:expr
+3=err +3=err
a;
a][appendix_a a][appendix_a
abbreviated abbreviated
abcabcabc abcabcabc