mirror of
https://github.com/trufflesecurity/trufflehog.git
synced 2024-11-14 00:47:21 +00:00
114f4b6989
* Add Type() to detector interface The goal here is to allow the detector type information to be used without the need for reflection. This could possibly allow us to more easily inject information into detectors or filter them out if necessary. Co-authored-by: ahmed <ahmed.zahran@trufflesec.com> * remove test detector --------- Co-authored-by: ahmed <ahmed.zahran@trufflesec.com>
143 lines
4 KiB
Go
143 lines
4 KiB
Go
package generic
|
|
|
|
// cat scanner/pkg/secrets/generic/top-1000.txt | awk 'length($0)>5' > scanner/pkg/secrets/generic/words.txt
|
|
|
|
import (
|
|
"context"
|
|
"encoding/base64"
|
|
"fmt"
|
|
"regexp"
|
|
"strings"
|
|
|
|
"github.com/trufflesecurity/trufflehog/v3/pkg/detectors"
|
|
"github.com/trufflesecurity/trufflehog/v3/pkg/pb/detectorspb"
|
|
)
|
|
|
|
func New() Scanner {
|
|
excludePatterns := []string{
|
|
`[0-9A-Fa-f]{8}(?:-[0-9A-Fa-f]{4}){3}-[0-9A-Fa-f]{12}`, // UUID
|
|
`[a-fA-F0-9]{8}-[a-fA-F0-9]{4}-4[a-fA-F0-9]{3}-[8|9|aA|bB][a-fA-F0-9]{3}-[a-fA-F0-9]{12}`, // UUIDv4
|
|
`[A-Z]{2,6}\-[0-9]{2,6}`, // issue tracker
|
|
`#[a-fA-F0-9]{6}\b`, // hex color code
|
|
`\b[A-Fa-f0-9]{64}\b`, // hex encoded hash
|
|
`\b[A-Fa-f0-9]{32}\b`, // hex encoded hash
|
|
`https?:\/\/(www\.)?[-a-zA-Z0-9@:%._\+~#=]{1,256}\.[a-zA-Z0-9()]{1,6}\b([-a-zA-Z0-9()@:%_\+.~#?&//=]*)`, // http
|
|
`\b([/]{0,1}([\w]+[/])+[\w\.]*)\b`, // filepath
|
|
`([0-9A-F]{2}[:-]){5}([0-9A-F]{2})`, // MAC addr
|
|
`\d{4}[-/]{1}([0]\d|1[0-2])[-/]{1}([0-2]\d|3[01])`, // date
|
|
`[v|\-]\d\.\d`, //version
|
|
`\d\.\d\.\d-`, //version
|
|
`[\d]{1,3}\.[\d]{1,3}\.[\d]{1,3}\.[\d]{1,3}`, // IPs and OIDs
|
|
`\b[A-Fa-f0-9x]{6,99}\b`, // hex encoding
|
|
`[A-Fa-f0-9x]{2}:[A-Fa-f0-9x]{2}:[A-Fa-f0-9x]{2}`, // hex encoding
|
|
`[\w]+\([\w, ]+\)`, // function
|
|
}
|
|
|
|
excludeMatchers := []*regexp.Regexp{}
|
|
for _, pat := range excludePatterns {
|
|
excludeMatchers = append(excludeMatchers, regexp.MustCompile(pat))
|
|
}
|
|
|
|
return Scanner{
|
|
excludeMatchers: excludeMatchers,
|
|
}
|
|
}
|
|
|
|
type Scanner struct {
|
|
wordList []string
|
|
badList []string
|
|
programmingBookWords []string
|
|
excludeMatchers []*regexp.Regexp
|
|
}
|
|
|
|
// Ensure the Scanner satisfies the interface at compile time.
|
|
var _ detectors.Detector = (*Scanner)(nil)
|
|
|
|
var keywords = []string{"pass", "token", "cred", "secret", "key"}
|
|
|
|
var (
|
|
// \x21-\x7e == ASCII 33 (0x21) and 126 (0x7e)
|
|
keyPat = regexp.MustCompile(detectors.PrefixRegex(keywords) + `(\b[\x21-\x7e]{16,64}\b)`)
|
|
)
|
|
|
|
// Keywords are used for efficiently pre-filtering chunks.
|
|
// Use identifiers in the secret preferably, or the provider name.
|
|
func (s Scanner) Keywords() []string {
|
|
return keywords
|
|
}
|
|
|
|
// FromData will find and optionally verify Generic secrets in a given set of bytes.
|
|
func (s Scanner) FromData(ctx context.Context, verify bool, data []byte) (results []detectors.Result, err error) {
|
|
dataStr := string(data)
|
|
|
|
matches := keyPat.FindAllStringSubmatch(dataStr, -1)
|
|
|
|
for _, match := range matches {
|
|
|
|
token := match[1]
|
|
|
|
// Least expensive-> most expensive filters.
|
|
// Substrings, then patterns.
|
|
|
|
if detectors.IsKnownFalsePositive(token, detectors.DefaultFalsePositives, true) {
|
|
continue
|
|
}
|
|
|
|
// toss any that match regexes
|
|
if hasReMatch(s.excludeMatchers, token) {
|
|
continue
|
|
}
|
|
|
|
// clean up containment chars
|
|
token = strings.Trim(token, fmt.Sprintf(`%s" '.,)(][}{`, "`"))
|
|
|
|
// toss any that b64 decode
|
|
// TODO: run them through again?
|
|
_, err := base64.StdEncoding.DecodeString(token)
|
|
if err == nil {
|
|
continue
|
|
}
|
|
|
|
s := detectors.Result{
|
|
DetectorType: detectorspb.DetectorType_Generic,
|
|
Raw: []byte(token),
|
|
}
|
|
|
|
results = append(results, s)
|
|
}
|
|
|
|
return
|
|
}
|
|
|
|
func hasReMatch(matchers []*regexp.Regexp, token string) bool {
|
|
for _, m := range matchers {
|
|
if m.MatchString(token) {
|
|
return true
|
|
}
|
|
}
|
|
return false
|
|
}
|
|
|
|
// func hasDictWord(wordList []string, token string) bool {
|
|
// lower := strings.ToLower(token)
|
|
// for _, word := range wordList {
|
|
// if strings.Contains(lower, word) {
|
|
// return true
|
|
// }
|
|
// }
|
|
// return false
|
|
// }
|
|
|
|
// func bytesToCleanWordList(data []byte) []string {
|
|
// words := []string{}
|
|
// for _, word := range strings.Split(string(data), "\n") {
|
|
// if strings.TrimSpace(word) != "" {
|
|
// words = append(words, strings.TrimSpace(strings.ToLower(word)))
|
|
// }
|
|
// }
|
|
// return words
|
|
// }
|
|
|
|
func (s Scanner) Type() detectorspb.DetectorType {
|
|
return detectorspb.DetectorType_Generic
|
|
}
|