2022-01-19 06:24:42 +00:00
|
|
|
package detectors
|
|
|
|
|
|
|
|
import (
|
|
|
|
_ "embed"
|
2023-10-09 02:52:28 +00:00
|
|
|
"math"
|
2022-01-19 06:24:42 +00:00
|
|
|
"strings"
|
|
|
|
"unicode"
|
2023-11-03 15:45:00 +00:00
|
|
|
"unicode/utf8"
|
2024-01-29 19:28:46 +00:00
|
|
|
|
|
|
|
ahocorasick "github.com/BobuSumisu/aho-corasick"
|
2022-01-19 06:24:42 +00:00
|
|
|
)
|
|
|
|
|
2023-06-25 15:55:11 +00:00
|
|
|
var DefaultFalsePositives = []FalsePositive{"example", "xxxxxx", "aaaaaa", "abcde", "00000", "sample", "www"}
|
2022-01-19 06:24:42 +00:00
|
|
|
|
|
|
|
type FalsePositive string
|
|
|
|
|
|
|
|
//go:embed "badlist.txt"
|
|
|
|
var badList []byte
|
|
|
|
|
|
|
|
//go:embed "words.txt"
|
|
|
|
var wordList []byte
|
|
|
|
|
|
|
|
//go:embed "programmingbooks.txt"
|
|
|
|
var programmingBookWords []byte
|
|
|
|
|
2024-01-29 19:28:46 +00:00
|
|
|
var filter *ahocorasick.Trie
|
|
|
|
|
|
|
|
func init() {
|
|
|
|
builder := ahocorasick.NewTrieBuilder()
|
|
|
|
|
|
|
|
wordList := bytesToCleanWordList(wordList)
|
|
|
|
builder.AddStrings(wordList)
|
2022-01-19 06:24:42 +00:00
|
|
|
|
2024-01-29 19:28:46 +00:00
|
|
|
badList := bytesToCleanWordList(badList)
|
|
|
|
builder.AddStrings(badList)
|
|
|
|
|
|
|
|
programmingBookWords := bytesToCleanWordList(programmingBookWords)
|
|
|
|
builder.AddStrings(programmingBookWords)
|
|
|
|
|
|
|
|
filter = builder.Build()
|
2022-01-19 06:24:42 +00:00
|
|
|
}
|
|
|
|
|
2023-06-25 15:55:11 +00:00
|
|
|
// IsKnownFalsePositives will not return a valid secret finding if any of the disqualifying conditions are met
|
|
|
|
// Currently that includes: No number, english word in key, or matches common example pattens.
|
|
|
|
// Only the secret key material should be passed into this function
|
2022-01-19 06:24:42 +00:00
|
|
|
func IsKnownFalsePositive(match string, falsePositives []FalsePositive, wordCheck bool) bool {
|
2023-11-03 15:45:00 +00:00
|
|
|
if !utf8.ValidString(match) {
|
|
|
|
return true
|
|
|
|
}
|
|
|
|
lower := strings.ToLower(match)
|
2022-01-19 06:24:42 +00:00
|
|
|
for _, fp := range falsePositives {
|
2023-11-03 15:45:00 +00:00
|
|
|
if strings.Contains(lower, string(fp)) {
|
2022-01-19 06:24:42 +00:00
|
|
|
return true
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
if wordCheck {
|
2024-01-29 19:28:46 +00:00
|
|
|
if filter.MatchFirstString(lower) != nil {
|
2022-01-19 06:24:42 +00:00
|
|
|
return true
|
|
|
|
}
|
|
|
|
}
|
2024-01-29 19:28:46 +00:00
|
|
|
|
2022-01-19 06:24:42 +00:00
|
|
|
return false
|
|
|
|
}
|
|
|
|
|
|
|
|
func HasDigit(key string) bool {
|
|
|
|
for _, ch := range key {
|
|
|
|
if unicode.IsDigit(ch) {
|
|
|
|
return true
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
return false
|
|
|
|
}
|
|
|
|
|
2024-01-29 19:28:46 +00:00
|
|
|
func bytesToCleanWordList(data []byte) []string {
|
2023-11-03 15:45:00 +00:00
|
|
|
words := make(map[string]struct{})
|
2022-01-19 06:24:42 +00:00
|
|
|
for _, word := range strings.Split(string(data), "\n") {
|
|
|
|
if strings.TrimSpace(word) != "" {
|
2023-11-03 15:45:00 +00:00
|
|
|
words[strings.TrimSpace(strings.ToLower(word))] = struct{}{}
|
2022-01-19 06:24:42 +00:00
|
|
|
}
|
|
|
|
}
|
2024-01-29 19:28:46 +00:00
|
|
|
|
|
|
|
wordList := make([]string, 0, len(words))
|
|
|
|
for word := range words {
|
|
|
|
wordList = append(wordList, word)
|
|
|
|
}
|
|
|
|
return wordList
|
2022-01-19 06:24:42 +00:00
|
|
|
}
|
2023-10-09 02:52:28 +00:00
|
|
|
|
|
|
|
func StringShannonEntropy(input string) float64 {
|
|
|
|
chars := make(map[rune]float64)
|
|
|
|
inverseTotal := 1 / float64(len(input)) // precompute the inverse
|
|
|
|
|
|
|
|
for _, char := range input {
|
|
|
|
chars[char]++
|
|
|
|
}
|
|
|
|
|
|
|
|
entropy := 0.0
|
|
|
|
for _, count := range chars {
|
|
|
|
probability := count * inverseTotal
|
|
|
|
entropy += probability * math.Log2(probability)
|
|
|
|
}
|
|
|
|
|
|
|
|
return -entropy
|
|
|
|
}
|
|
|
|
|
|
|
|
// FilterResultsWithEntropy filters out determinately unverified results that have a shannon entropy below the given value.
|
|
|
|
func FilterResultsWithEntropy(results []Result, entropy float64) []Result {
|
2023-12-01 19:03:44 +00:00
|
|
|
var filteredResults []Result
|
2023-10-09 02:52:28 +00:00
|
|
|
for _, result := range results {
|
2024-01-29 17:55:46 +00:00
|
|
|
if !result.Verified {
|
2023-10-09 02:52:28 +00:00
|
|
|
if result.RawV2 != nil {
|
|
|
|
if StringShannonEntropy(string(result.RawV2)) >= entropy {
|
|
|
|
filteredResults = append(filteredResults, result)
|
|
|
|
}
|
|
|
|
} else {
|
|
|
|
if StringShannonEntropy(string(result.Raw)) >= entropy {
|
|
|
|
filteredResults = append(filteredResults, result)
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
return filteredResults
|
|
|
|
}
|