mirror of
https://github.com/trufflesecurity/trufflehog.git
synced 2024-11-10 07:04:24 +00:00
Improve fp ignore logic (#2351)
* forgot field change * use aho corasick for filter * reduce wordlist sensitivity
This commit is contained in:
parent
303e191f38
commit
7befefd369
6 changed files with 61 additions and 52 deletions
2
go.mod
2
go.mod
|
@ -39,6 +39,7 @@ require (
|
|||
github.com/go-sql-driver/mysql v1.7.1
|
||||
github.com/gobwas/glob v0.2.3
|
||||
github.com/golang-jwt/jwt v3.2.2+incompatible
|
||||
github.com/golang-jwt/jwt/v4 v4.5.0
|
||||
github.com/google/go-cmp v0.6.0
|
||||
github.com/google/go-containerregistry v0.17.0
|
||||
github.com/google/go-github/v42 v42.0.0
|
||||
|
@ -166,7 +167,6 @@ require (
|
|||
github.com/go-ole/go-ole v1.2.6 // indirect
|
||||
github.com/goccy/go-json v0.10.0 // indirect
|
||||
github.com/godbus/dbus v0.0.0-20190726142602-4481cbc300e2 // indirect
|
||||
github.com/golang-jwt/jwt/v4 v4.5.0 // indirect
|
||||
github.com/golang-sql/civil v0.0.0-20190719163853-cb61b32ac6fe // indirect
|
||||
github.com/golang-sql/sqlexp v0.1.0 // indirect
|
||||
github.com/golang/groupcache v0.0.0-20210331224755-41bb18bfe9da // indirect
|
||||
|
|
|
@ -52,7 +52,8 @@ func (s Scanner) FromData(ctx context.Context, verify bool, data []byte) (result
|
|||
|
||||
s1 := detectors.Result{
|
||||
DetectorType: detectorspb.DetectorType_AzureSearchQueryKey,
|
||||
Raw: []byte(resMatch + resUrlMatch),
|
||||
Raw: []byte(resMatch),
|
||||
RawV2: []byte(resMatch + resUrlMatch),
|
||||
}
|
||||
if verify {
|
||||
client := s.client
|
||||
|
|
|
@ -4,12 +4,7 @@ array
|
|||
uint
|
||||
boolean
|
||||
config
|
||||
/>
|
||||
</
|
||||
\n
|
||||
\r
|
||||
parse
|
||||
()
|
||||
func
|
||||
param
|
||||
cancel
|
||||
|
@ -27,7 +22,6 @@ space
|
|||
ident
|
||||
block
|
||||
type
|
||||
\"
|
||||
index
|
||||
case
|
||||
safe
|
||||
|
@ -87,8 +81,6 @@ keyword
|
|||
trace
|
||||
truncate
|
||||
group
|
||||
a-z
|
||||
0-9
|
||||
href
|
||||
scale
|
||||
model
|
||||
|
@ -106,26 +98,18 @@ close
|
|||
defer
|
||||
start
|
||||
;var
|
||||
":
|
||||
storage
|
||||
blob
|
||||
cred
|
||||
${
|
||||
math
|
||||
.xml
|
||||
conflict
|
||||
];
|
||||
$(
|
||||
-{{
|
||||
hack
|
||||
-v1
|
||||
-v2
|
||||
package
|
||||
contract
|
||||
schema
|
||||
vec<
|
||||
ed25519
|
||||
(&
|
||||
prefix
|
||||
suffix
|
||||
compress
|
||||
|
@ -177,7 +161,6 @@ error
|
|||
revoke
|
||||
encrypt
|
||||
binary
|
||||
md5
|
||||
2018-
|
||||
2019-
|
||||
2020-
|
||||
|
@ -188,14 +171,12 @@ root
|
|||
readon
|
||||
test
|
||||
2048
|
||||
1<<
|
||||
match
|
||||
private
|
||||
key_
|
||||
aes256
|
||||
aes128
|
||||
state
|
||||
...
|
||||
alloc
|
||||
proto
|
||||
term
|
||||
|
@ -281,8 +262,6 @@ k8s.
|
|||
role
|
||||
application
|
||||
explic
|
||||
[[
|
||||
]]
|
||||
random
|
||||
DES3
|
||||
3DES
|
||||
|
@ -295,7 +274,6 @@ tag:
|
|||
extend
|
||||
split
|
||||
option
|
||||
t=0
|
||||
fontsize
|
||||
"
|
||||
keyboard
|
||||
|
@ -306,5 +284,4 @@ iphone
|
|||
develop
|
||||
master
|
||||
slave
|
||||
secondary
|
||||
---
|
||||
secondary
|
|
@ -6,6 +6,8 @@ import (
|
|||
"strings"
|
||||
"unicode"
|
||||
"unicode/utf8"
|
||||
|
||||
ahocorasick "github.com/BobuSumisu/aho-corasick"
|
||||
)
|
||||
|
||||
var DefaultFalsePositives = []FalsePositive{"example", "xxxxxx", "aaaaaa", "abcde", "00000", "sample", "www"}
|
||||
|
@ -21,16 +23,21 @@ var wordList []byte
|
|||
//go:embed "programmingbooks.txt"
|
||||
var programmingBookWords []byte
|
||||
|
||||
type Wordlists struct {
|
||||
wordList map[string]struct{}
|
||||
badList map[string]struct{}
|
||||
programmingBookWords map[string]struct{}
|
||||
}
|
||||
var filter *ahocorasick.Trie
|
||||
|
||||
var FalsePositiveWordlists = Wordlists{
|
||||
wordList: bytesToCleanWordList(wordList),
|
||||
badList: bytesToCleanWordList(badList),
|
||||
programmingBookWords: bytesToCleanWordList(programmingBookWords),
|
||||
func init() {
|
||||
builder := ahocorasick.NewTrieBuilder()
|
||||
|
||||
wordList := bytesToCleanWordList(wordList)
|
||||
builder.AddStrings(wordList)
|
||||
|
||||
badList := bytesToCleanWordList(badList)
|
||||
builder.AddStrings(badList)
|
||||
|
||||
programmingBookWords := bytesToCleanWordList(programmingBookWords)
|
||||
builder.AddStrings(programmingBookWords)
|
||||
|
||||
filter = builder.Build()
|
||||
}
|
||||
|
||||
// IsKnownFalsePositives will not return a valid secret finding if any of the disqualifying conditions are met
|
||||
|
@ -48,21 +55,11 @@ func IsKnownFalsePositive(match string, falsePositives []FalsePositive, wordChec
|
|||
}
|
||||
|
||||
if wordCheck {
|
||||
// check against common substring badlist
|
||||
if _, ok := FalsePositiveWordlists.badList[lower]; ok {
|
||||
return true
|
||||
}
|
||||
|
||||
// check for dictionary word substrings
|
||||
if _, ok := FalsePositiveWordlists.wordList[lower]; ok {
|
||||
return true
|
||||
}
|
||||
|
||||
// check for programming book token substrings
|
||||
if _, ok := FalsePositiveWordlists.programmingBookWords[lower]; ok {
|
||||
if filter.MatchFirstString(lower) != nil {
|
||||
return true
|
||||
}
|
||||
}
|
||||
|
||||
return false
|
||||
}
|
||||
|
||||
|
@ -76,14 +73,19 @@ func HasDigit(key string) bool {
|
|||
return false
|
||||
}
|
||||
|
||||
func bytesToCleanWordList(data []byte) map[string]struct{} {
|
||||
func bytesToCleanWordList(data []byte) []string {
|
||||
words := make(map[string]struct{})
|
||||
for _, word := range strings.Split(string(data), "\n") {
|
||||
if strings.TrimSpace(word) != "" {
|
||||
words[strings.TrimSpace(strings.ToLower(word))] = struct{}{}
|
||||
}
|
||||
}
|
||||
return words
|
||||
|
||||
wordList := make([]string, 0, len(words))
|
||||
for word := range words {
|
||||
wordList = append(wordList, word)
|
||||
}
|
||||
return wordList
|
||||
}
|
||||
|
||||
func StringShannonEntropy(input string) float64 {
|
||||
|
|
|
@ -12,6 +12,7 @@ func TestIsFalsePositive(t *testing.T) {
|
|||
type args struct {
|
||||
match string
|
||||
falsePositives []FalsePositive
|
||||
useWordlist bool
|
||||
}
|
||||
tests := []struct {
|
||||
name string
|
||||
|
@ -23,21 +24,50 @@ func TestIsFalsePositive(t *testing.T) {
|
|||
args: args{
|
||||
match: "example",
|
||||
falsePositives: DefaultFalsePositives,
|
||||
useWordlist: false,
|
||||
},
|
||||
want: true,
|
||||
},
|
||||
{
|
||||
name: "fp - in wordlist",
|
||||
args: args{
|
||||
match: "sdfdsfprivatesfsdfd",
|
||||
falsePositives: DefaultFalsePositives,
|
||||
useWordlist: true,
|
||||
},
|
||||
want: true,
|
||||
},
|
||||
{
|
||||
name: "fp - not in wordlist",
|
||||
args: args{
|
||||
match: "sdfdsfsfsdfd",
|
||||
falsePositives: DefaultFalsePositives,
|
||||
useWordlist: true,
|
||||
},
|
||||
want: false,
|
||||
},
|
||||
{
|
||||
name: "not fp",
|
||||
args: args{
|
||||
match: "notafp123",
|
||||
falsePositives: DefaultFalsePositives,
|
||||
useWordlist: false,
|
||||
},
|
||||
want: false,
|
||||
},
|
||||
{
|
||||
name: "fp - in wordlist exact match",
|
||||
args: args{
|
||||
match: "private",
|
||||
falsePositives: DefaultFalsePositives,
|
||||
useWordlist: true,
|
||||
},
|
||||
want: true,
|
||||
},
|
||||
}
|
||||
for _, tt := range tests {
|
||||
t.Run(tt.name, func(t *testing.T) {
|
||||
if got := IsKnownFalsePositive(tt.args.match, tt.args.falsePositives, false); got != tt.want {
|
||||
if got := IsKnownFalsePositive(tt.args.match, tt.args.falsePositives, tt.args.useWordlist); got != tt.want {
|
||||
t.Errorf("IsKnownFalsePositive() = %v, want %v", got, tt.want)
|
||||
}
|
||||
})
|
||||
|
|
|
@ -15,8 +15,7 @@ ${ts.map(kv
|
|||
${uname
|
||||
$value
|
||||
$x:expr
|
||||
+3=err
|
||||
a;
|
||||
+3=err
|
||||
a][appendix_a
|
||||
abbreviated
|
||||
abcabcabc
|
||||
|
|
Loading…
Reference in a new issue