mirror of
https://github.com/trufflesecurity/trufflehog.git
synced 2024-11-10 07:04:24 +00:00
Improve fp ignore logic (#2351)
* forgot field change * use aho corasick for filter * reduce wordlist sensitivity
This commit is contained in:
parent
303e191f38
commit
7befefd369
6 changed files with 61 additions and 52 deletions
2
go.mod
2
go.mod
|
@ -39,6 +39,7 @@ require (
|
||||||
github.com/go-sql-driver/mysql v1.7.1
|
github.com/go-sql-driver/mysql v1.7.1
|
||||||
github.com/gobwas/glob v0.2.3
|
github.com/gobwas/glob v0.2.3
|
||||||
github.com/golang-jwt/jwt v3.2.2+incompatible
|
github.com/golang-jwt/jwt v3.2.2+incompatible
|
||||||
|
github.com/golang-jwt/jwt/v4 v4.5.0
|
||||||
github.com/google/go-cmp v0.6.0
|
github.com/google/go-cmp v0.6.0
|
||||||
github.com/google/go-containerregistry v0.17.0
|
github.com/google/go-containerregistry v0.17.0
|
||||||
github.com/google/go-github/v42 v42.0.0
|
github.com/google/go-github/v42 v42.0.0
|
||||||
|
@ -166,7 +167,6 @@ require (
|
||||||
github.com/go-ole/go-ole v1.2.6 // indirect
|
github.com/go-ole/go-ole v1.2.6 // indirect
|
||||||
github.com/goccy/go-json v0.10.0 // indirect
|
github.com/goccy/go-json v0.10.0 // indirect
|
||||||
github.com/godbus/dbus v0.0.0-20190726142602-4481cbc300e2 // indirect
|
github.com/godbus/dbus v0.0.0-20190726142602-4481cbc300e2 // indirect
|
||||||
github.com/golang-jwt/jwt/v4 v4.5.0 // indirect
|
|
||||||
github.com/golang-sql/civil v0.0.0-20190719163853-cb61b32ac6fe // indirect
|
github.com/golang-sql/civil v0.0.0-20190719163853-cb61b32ac6fe // indirect
|
||||||
github.com/golang-sql/sqlexp v0.1.0 // indirect
|
github.com/golang-sql/sqlexp v0.1.0 // indirect
|
||||||
github.com/golang/groupcache v0.0.0-20210331224755-41bb18bfe9da // indirect
|
github.com/golang/groupcache v0.0.0-20210331224755-41bb18bfe9da // indirect
|
||||||
|
|
|
@ -52,7 +52,8 @@ func (s Scanner) FromData(ctx context.Context, verify bool, data []byte) (result
|
||||||
|
|
||||||
s1 := detectors.Result{
|
s1 := detectors.Result{
|
||||||
DetectorType: detectorspb.DetectorType_AzureSearchQueryKey,
|
DetectorType: detectorspb.DetectorType_AzureSearchQueryKey,
|
||||||
Raw: []byte(resMatch + resUrlMatch),
|
Raw: []byte(resMatch),
|
||||||
|
RawV2: []byte(resMatch + resUrlMatch),
|
||||||
}
|
}
|
||||||
if verify {
|
if verify {
|
||||||
client := s.client
|
client := s.client
|
||||||
|
|
|
@ -4,12 +4,7 @@ array
|
||||||
uint
|
uint
|
||||||
boolean
|
boolean
|
||||||
config
|
config
|
||||||
/>
|
|
||||||
</
|
|
||||||
\n
|
|
||||||
\r
|
|
||||||
parse
|
parse
|
||||||
()
|
|
||||||
func
|
func
|
||||||
param
|
param
|
||||||
cancel
|
cancel
|
||||||
|
@ -27,7 +22,6 @@ space
|
||||||
ident
|
ident
|
||||||
block
|
block
|
||||||
type
|
type
|
||||||
\"
|
|
||||||
index
|
index
|
||||||
case
|
case
|
||||||
safe
|
safe
|
||||||
|
@ -87,8 +81,6 @@ keyword
|
||||||
trace
|
trace
|
||||||
truncate
|
truncate
|
||||||
group
|
group
|
||||||
a-z
|
|
||||||
0-9
|
|
||||||
href
|
href
|
||||||
scale
|
scale
|
||||||
model
|
model
|
||||||
|
@ -106,26 +98,18 @@ close
|
||||||
defer
|
defer
|
||||||
start
|
start
|
||||||
;var
|
;var
|
||||||
":
|
|
||||||
storage
|
storage
|
||||||
blob
|
blob
|
||||||
cred
|
cred
|
||||||
${
|
|
||||||
math
|
math
|
||||||
.xml
|
.xml
|
||||||
conflict
|
conflict
|
||||||
];
|
|
||||||
$(
|
|
||||||
-{{
|
|
||||||
hack
|
hack
|
||||||
-v1
|
|
||||||
-v2
|
|
||||||
package
|
package
|
||||||
contract
|
contract
|
||||||
schema
|
schema
|
||||||
vec<
|
vec<
|
||||||
ed25519
|
ed25519
|
||||||
(&
|
|
||||||
prefix
|
prefix
|
||||||
suffix
|
suffix
|
||||||
compress
|
compress
|
||||||
|
@ -177,7 +161,6 @@ error
|
||||||
revoke
|
revoke
|
||||||
encrypt
|
encrypt
|
||||||
binary
|
binary
|
||||||
md5
|
|
||||||
2018-
|
2018-
|
||||||
2019-
|
2019-
|
||||||
2020-
|
2020-
|
||||||
|
@ -188,14 +171,12 @@ root
|
||||||
readon
|
readon
|
||||||
test
|
test
|
||||||
2048
|
2048
|
||||||
1<<
|
|
||||||
match
|
match
|
||||||
private
|
private
|
||||||
key_
|
key_
|
||||||
aes256
|
aes256
|
||||||
aes128
|
aes128
|
||||||
state
|
state
|
||||||
...
|
|
||||||
alloc
|
alloc
|
||||||
proto
|
proto
|
||||||
term
|
term
|
||||||
|
@ -281,8 +262,6 @@ k8s.
|
||||||
role
|
role
|
||||||
application
|
application
|
||||||
explic
|
explic
|
||||||
[[
|
|
||||||
]]
|
|
||||||
random
|
random
|
||||||
DES3
|
DES3
|
||||||
3DES
|
3DES
|
||||||
|
@ -295,7 +274,6 @@ tag:
|
||||||
extend
|
extend
|
||||||
split
|
split
|
||||||
option
|
option
|
||||||
t=0
|
|
||||||
fontsize
|
fontsize
|
||||||
"
|
"
|
||||||
keyboard
|
keyboard
|
||||||
|
@ -306,5 +284,4 @@ iphone
|
||||||
develop
|
develop
|
||||||
master
|
master
|
||||||
slave
|
slave
|
||||||
secondary
|
secondary
|
||||||
---
|
|
|
@ -6,6 +6,8 @@ import (
|
||||||
"strings"
|
"strings"
|
||||||
"unicode"
|
"unicode"
|
||||||
"unicode/utf8"
|
"unicode/utf8"
|
||||||
|
|
||||||
|
ahocorasick "github.com/BobuSumisu/aho-corasick"
|
||||||
)
|
)
|
||||||
|
|
||||||
var DefaultFalsePositives = []FalsePositive{"example", "xxxxxx", "aaaaaa", "abcde", "00000", "sample", "www"}
|
var DefaultFalsePositives = []FalsePositive{"example", "xxxxxx", "aaaaaa", "abcde", "00000", "sample", "www"}
|
||||||
|
@ -21,16 +23,21 @@ var wordList []byte
|
||||||
//go:embed "programmingbooks.txt"
|
//go:embed "programmingbooks.txt"
|
||||||
var programmingBookWords []byte
|
var programmingBookWords []byte
|
||||||
|
|
||||||
type Wordlists struct {
|
var filter *ahocorasick.Trie
|
||||||
wordList map[string]struct{}
|
|
||||||
badList map[string]struct{}
|
|
||||||
programmingBookWords map[string]struct{}
|
|
||||||
}
|
|
||||||
|
|
||||||
var FalsePositiveWordlists = Wordlists{
|
func init() {
|
||||||
wordList: bytesToCleanWordList(wordList),
|
builder := ahocorasick.NewTrieBuilder()
|
||||||
badList: bytesToCleanWordList(badList),
|
|
||||||
programmingBookWords: bytesToCleanWordList(programmingBookWords),
|
wordList := bytesToCleanWordList(wordList)
|
||||||
|
builder.AddStrings(wordList)
|
||||||
|
|
||||||
|
badList := bytesToCleanWordList(badList)
|
||||||
|
builder.AddStrings(badList)
|
||||||
|
|
||||||
|
programmingBookWords := bytesToCleanWordList(programmingBookWords)
|
||||||
|
builder.AddStrings(programmingBookWords)
|
||||||
|
|
||||||
|
filter = builder.Build()
|
||||||
}
|
}
|
||||||
|
|
||||||
// IsKnownFalsePositives will not return a valid secret finding if any of the disqualifying conditions are met
|
// IsKnownFalsePositives will not return a valid secret finding if any of the disqualifying conditions are met
|
||||||
|
@ -48,21 +55,11 @@ func IsKnownFalsePositive(match string, falsePositives []FalsePositive, wordChec
|
||||||
}
|
}
|
||||||
|
|
||||||
if wordCheck {
|
if wordCheck {
|
||||||
// check against common substring badlist
|
if filter.MatchFirstString(lower) != nil {
|
||||||
if _, ok := FalsePositiveWordlists.badList[lower]; ok {
|
|
||||||
return true
|
|
||||||
}
|
|
||||||
|
|
||||||
// check for dictionary word substrings
|
|
||||||
if _, ok := FalsePositiveWordlists.wordList[lower]; ok {
|
|
||||||
return true
|
|
||||||
}
|
|
||||||
|
|
||||||
// check for programming book token substrings
|
|
||||||
if _, ok := FalsePositiveWordlists.programmingBookWords[lower]; ok {
|
|
||||||
return true
|
return true
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
return false
|
return false
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -76,14 +73,19 @@ func HasDigit(key string) bool {
|
||||||
return false
|
return false
|
||||||
}
|
}
|
||||||
|
|
||||||
func bytesToCleanWordList(data []byte) map[string]struct{} {
|
func bytesToCleanWordList(data []byte) []string {
|
||||||
words := make(map[string]struct{})
|
words := make(map[string]struct{})
|
||||||
for _, word := range strings.Split(string(data), "\n") {
|
for _, word := range strings.Split(string(data), "\n") {
|
||||||
if strings.TrimSpace(word) != "" {
|
if strings.TrimSpace(word) != "" {
|
||||||
words[strings.TrimSpace(strings.ToLower(word))] = struct{}{}
|
words[strings.TrimSpace(strings.ToLower(word))] = struct{}{}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
return words
|
|
||||||
|
wordList := make([]string, 0, len(words))
|
||||||
|
for word := range words {
|
||||||
|
wordList = append(wordList, word)
|
||||||
|
}
|
||||||
|
return wordList
|
||||||
}
|
}
|
||||||
|
|
||||||
func StringShannonEntropy(input string) float64 {
|
func StringShannonEntropy(input string) float64 {
|
||||||
|
|
|
@ -12,6 +12,7 @@ func TestIsFalsePositive(t *testing.T) {
|
||||||
type args struct {
|
type args struct {
|
||||||
match string
|
match string
|
||||||
falsePositives []FalsePositive
|
falsePositives []FalsePositive
|
||||||
|
useWordlist bool
|
||||||
}
|
}
|
||||||
tests := []struct {
|
tests := []struct {
|
||||||
name string
|
name string
|
||||||
|
@ -23,21 +24,50 @@ func TestIsFalsePositive(t *testing.T) {
|
||||||
args: args{
|
args: args{
|
||||||
match: "example",
|
match: "example",
|
||||||
falsePositives: DefaultFalsePositives,
|
falsePositives: DefaultFalsePositives,
|
||||||
|
useWordlist: false,
|
||||||
},
|
},
|
||||||
want: true,
|
want: true,
|
||||||
},
|
},
|
||||||
|
{
|
||||||
|
name: "fp - in wordlist",
|
||||||
|
args: args{
|
||||||
|
match: "sdfdsfprivatesfsdfd",
|
||||||
|
falsePositives: DefaultFalsePositives,
|
||||||
|
useWordlist: true,
|
||||||
|
},
|
||||||
|
want: true,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: "fp - not in wordlist",
|
||||||
|
args: args{
|
||||||
|
match: "sdfdsfsfsdfd",
|
||||||
|
falsePositives: DefaultFalsePositives,
|
||||||
|
useWordlist: true,
|
||||||
|
},
|
||||||
|
want: false,
|
||||||
|
},
|
||||||
{
|
{
|
||||||
name: "not fp",
|
name: "not fp",
|
||||||
args: args{
|
args: args{
|
||||||
match: "notafp123",
|
match: "notafp123",
|
||||||
falsePositives: DefaultFalsePositives,
|
falsePositives: DefaultFalsePositives,
|
||||||
|
useWordlist: false,
|
||||||
},
|
},
|
||||||
want: false,
|
want: false,
|
||||||
},
|
},
|
||||||
|
{
|
||||||
|
name: "fp - in wordlist exact match",
|
||||||
|
args: args{
|
||||||
|
match: "private",
|
||||||
|
falsePositives: DefaultFalsePositives,
|
||||||
|
useWordlist: true,
|
||||||
|
},
|
||||||
|
want: true,
|
||||||
|
},
|
||||||
}
|
}
|
||||||
for _, tt := range tests {
|
for _, tt := range tests {
|
||||||
t.Run(tt.name, func(t *testing.T) {
|
t.Run(tt.name, func(t *testing.T) {
|
||||||
if got := IsKnownFalsePositive(tt.args.match, tt.args.falsePositives, false); got != tt.want {
|
if got := IsKnownFalsePositive(tt.args.match, tt.args.falsePositives, tt.args.useWordlist); got != tt.want {
|
||||||
t.Errorf("IsKnownFalsePositive() = %v, want %v", got, tt.want)
|
t.Errorf("IsKnownFalsePositive() = %v, want %v", got, tt.want)
|
||||||
}
|
}
|
||||||
})
|
})
|
||||||
|
|
|
@ -15,8 +15,7 @@ ${ts.map(kv
|
||||||
${uname
|
${uname
|
||||||
$value
|
$value
|
||||||
$x:expr
|
$x:expr
|
||||||
+3=err
|
+3=err
|
||||||
a;
|
|
||||||
a][appendix_a
|
a][appendix_a
|
||||||
abbreviated
|
abbreviated
|
||||||
abcabcabc
|
abcabcabc
|
||||||
|
|
Loading…
Reference in a new issue