mirror of
https://github.com/trufflesecurity/trufflehog.git
synced 2024-11-10 15:14:38 +00:00
7a156330b5
#1711 inadvertently removed the ability to match multiple custom detectors, or multiple detectors of the same type but different version, to a given keyword. (#2060 re-added support for multiple versions of detectors globally, and #2064 re-added support for multiple custom detectors globally, but neither fixed trufflehog's inability to support multiple such detectors for a given keyword match.) This PR re-adds the removed functionality (and narrows the AhoCorasickCore interface in the process.)
89 lines
3.8 KiB
Go
89 lines
3.8 KiB
Go
package engine
|
|
|
|
import (
|
|
"strings"
|
|
|
|
ahocorasick "github.com/BobuSumisu/aho-corasick"
|
|
"github.com/trufflesecurity/trufflehog/v3/pkg/custom_detectors"
|
|
"github.com/trufflesecurity/trufflehog/v3/pkg/detectors"
|
|
"github.com/trufflesecurity/trufflehog/v3/pkg/pb/detectorspb"
|
|
)
|
|
|
|
// DetectorKey is used to identify a detector in the keywordsToDetectors map.
|
|
// Multiple detectors can have the same detector type but different versions.
|
|
// This allows us to identify a detector by its type and version. An
|
|
// additional (optional) field is provided to disambiguate multiple custom
|
|
// detectors. This type is exported even though none of its fields are so
|
|
// that the AhoCorasickCore can populate passed-in maps keyed on this type
|
|
// without exposing any of its internals to consumers.
|
|
type DetectorKey struct {
|
|
detectorType detectorspb.DetectorType
|
|
version int
|
|
customDetectorName string
|
|
}
|
|
|
|
// AhoCorasickCore encapsulates the operations and data structures used for keyword matching via the
|
|
// Aho-Corasick algorithm. It is responsible for constructing and managing the trie for efficient
|
|
// substring searches, as well as mapping keywords to their associated detectors for rapid lookups.
|
|
type AhoCorasickCore struct {
|
|
// prefilter is a ahocorasick struct used for doing efficient string
|
|
// matching given a set of words. (keywords from the rules in the config)
|
|
prefilter ahocorasick.Trie
|
|
// Maps for efficient lookups during detection.
|
|
// (This implementation maps in two layers: from keywords to detector
|
|
// type and then again from detector type to detector. We could
|
|
// go straight from keywords to detectors but doing it this way makes
|
|
// some consuming code a little cleaner.)
|
|
keywordsToDetectors map[string][]DetectorKey
|
|
detectorsByKey map[DetectorKey]detectors.Detector
|
|
}
|
|
|
|
// NewAhoCorasickCore allocates and initializes a new instance of AhoCorasickCore. It uses the
|
|
// provided detector slice to create a map from keywords to detectors and build the Aho-Corasick
|
|
// prefilter trie.
|
|
func NewAhoCorasickCore(allDetectors []detectors.Detector) *AhoCorasickCore {
|
|
keywordsToDetectors := make(map[string][]DetectorKey)
|
|
detectorsByKey := make(map[DetectorKey]detectors.Detector, len(allDetectors))
|
|
var keywords []string
|
|
for _, d := range allDetectors {
|
|
key := createDetectorKey(d)
|
|
detectorsByKey[key] = d
|
|
for _, kw := range d.Keywords() {
|
|
kwLower := strings.ToLower(kw)
|
|
keywords = append(keywords, kwLower)
|
|
keywordsToDetectors[kwLower] = append(keywordsToDetectors[kwLower], key)
|
|
}
|
|
}
|
|
|
|
return &AhoCorasickCore{
|
|
keywordsToDetectors: keywordsToDetectors,
|
|
detectorsByKey: detectorsByKey,
|
|
prefilter: *ahocorasick.NewTrieBuilder().AddStrings(keywords).Build(),
|
|
}
|
|
}
|
|
|
|
// PopulateMatchingDetectors populates the given detector slice with all the detectors matching the
|
|
// provided input. This method populates an existing map rather than allocating a new one because
|
|
// it will be called once per chunk and that many allocations has a noticeable performance cost.
|
|
func (ac *AhoCorasickCore) PopulateMatchingDetectors(chunkData string, detectors map[DetectorKey]detectors.Detector) {
|
|
for _, m := range ac.prefilter.MatchString(strings.ToLower(chunkData)) {
|
|
for _, k := range ac.keywordsToDetectors[m.MatchString()] {
|
|
detectors[k] = ac.detectorsByKey[k]
|
|
}
|
|
}
|
|
}
|
|
|
|
// createDetectorKey creates a unique key for each detector from its type, version, and, for
|
|
// custom regex detectors, its name.
|
|
func createDetectorKey(d detectors.Detector) DetectorKey {
|
|
detectorType := d.Type()
|
|
var version int
|
|
if v, ok := d.(detectors.Versioner); ok {
|
|
version = v.Version()
|
|
}
|
|
var customDetectorName string
|
|
if r, ok := d.(*custom_detectors.CustomRegexWebhook); ok {
|
|
customDetectorName = r.GetName()
|
|
}
|
|
return DetectorKey{detectorType: detectorType, version: version, customDetectorName: customDetectorName}
|
|
}
|