mirror of
https://github.com/trufflesecurity/trufflehog.git
synced 2024-11-10 15:14:38 +00:00
Re-add detector version (#2060)
#2010 mistakenly removed detector version tracking from the Aho Corasick wrapper. This PR re-adds it.
This commit is contained in:
parent
3c2270ae65
commit
45059864f8
2 changed files with 120 additions and 12 deletions
|
@ -9,6 +9,14 @@ import (
|
|||
"github.com/trufflesecurity/trufflehog/v3/pkg/pb/detectorspb"
|
||||
)
|
||||
|
||||
// detectorKey is used to identify a detector in the keywordsToDetectors map.
|
||||
// Multiple detectors can have the same detector type but different versions.
|
||||
// This allows us to identify a detector by its type and version.
|
||||
type detectorKey struct {
|
||||
detectorType detectorspb.DetectorType
|
||||
version int
|
||||
}
|
||||
|
||||
// AhoCorasickCore encapsulates the operations and data structures used for keyword matching via the
|
||||
// Aho-Corasick algorithm. It is responsible for constructing and managing the trie for efficient
|
||||
// substring searches, as well as mapping keywords to their associated detectors for rapid lookups.
|
||||
|
@ -21,30 +29,31 @@ type AhoCorasickCore struct {
|
|||
// type and then again from detector type to detector. We could
|
||||
// go straight from keywords to detectors but doing it this way makes
|
||||
// some consuming code a little cleaner.)
|
||||
keywordsToDetectorTypes map[string][]detectorspb.DetectorType
|
||||
detectorsByType map[detectorspb.DetectorType]detectors.Detector
|
||||
keywordsToDetectors map[string][]detectorKey
|
||||
detectorsByKey map[detectorKey]detectors.Detector
|
||||
}
|
||||
|
||||
// NewAhoCorasickCore allocates and initializes a new instance of AhoCorasickCore. It uses the
|
||||
// provided detector slice to create a map from keywords to detectors and build the Aho-Corasick
|
||||
// prefilter trie.
|
||||
func NewAhoCorasickCore(allDetectors []detectors.Detector) *AhoCorasickCore {
|
||||
keywordsToDetectorTypes := make(map[string][]detectorspb.DetectorType)
|
||||
detectorsByType := make(map[detectorspb.DetectorType]detectors.Detector, len(allDetectors))
|
||||
keywordsToDetectors := make(map[string][]detectorKey)
|
||||
detectorsByKey := make(map[detectorKey]detectors.Detector, len(allDetectors))
|
||||
var keywords []string
|
||||
for _, d := range allDetectors {
|
||||
detectorsByType[d.Type()] = d
|
||||
key := createDetectorKey(d)
|
||||
detectorsByKey[key] = d
|
||||
for _, kw := range d.Keywords() {
|
||||
kwLower := strings.ToLower(kw)
|
||||
keywords = append(keywords, kwLower)
|
||||
keywordsToDetectorTypes[kwLower] = append(keywordsToDetectorTypes[kwLower], d.Type())
|
||||
keywordsToDetectors[kwLower] = append(keywordsToDetectors[kwLower], key)
|
||||
}
|
||||
}
|
||||
|
||||
return &AhoCorasickCore{
|
||||
keywordsToDetectorTypes: keywordsToDetectorTypes,
|
||||
detectorsByType: detectorsByType,
|
||||
prefilter: *ahocorasick.NewTrieBuilder().AddStrings(keywords).Build(),
|
||||
keywordsToDetectors: keywordsToDetectors,
|
||||
detectorsByKey: detectorsByKey,
|
||||
prefilter: *ahocorasick.NewTrieBuilder().AddStrings(keywords).Build(),
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -58,12 +67,23 @@ func (ac *AhoCorasickCore) MatchString(input string) []*ahocorasick.Match {
|
|||
// This method is designed to reuse the same map for performance optimization,
|
||||
// reducing the need for repeated allocations within each detector worker in the engine.
|
||||
func (ac *AhoCorasickCore) PopulateDetectorsByMatch(match *ahocorasick.Match, detectors map[detectorspb.DetectorType]detectors.Detector) bool {
|
||||
matchedDetectorTypes, ok := ac.keywordsToDetectorTypes[match.MatchString()]
|
||||
matchedDetectorKeys, ok := ac.keywordsToDetectors[match.MatchString()]
|
||||
if !ok {
|
||||
return false
|
||||
}
|
||||
for _, t := range matchedDetectorTypes {
|
||||
detectors[t] = ac.detectorsByType[t]
|
||||
for _, key := range matchedDetectorKeys {
|
||||
detectors[key.detectorType] = ac.detectorsByKey[key]
|
||||
}
|
||||
return true
|
||||
}
|
||||
|
||||
// createDetectorKey creates a unique key for each detector. This key based on type and version,
|
||||
// it ensures faster lookups and reduces redundancy in our main detector store.
|
||||
func createDetectorKey(d detectors.Detector) detectorKey {
|
||||
detectorType := d.Type()
|
||||
var version int
|
||||
if v, ok := d.(detectors.Versioner); ok {
|
||||
version = v.Version()
|
||||
}
|
||||
return detectorKey{detectorType: detectorType, version: version}
|
||||
}
|
||||
|
|
88
pkg/engine/ahocorasickcore_test.go
Normal file
88
pkg/engine/ahocorasickcore_test.go
Normal file
|
@ -0,0 +1,88 @@
|
|||
package engine
|
||||
|
||||
import (
|
||||
"context"
|
||||
"testing"
|
||||
|
||||
"github.com/stretchr/testify/assert"
|
||||
"github.com/trufflesecurity/trufflehog/v3/pkg/detectors"
|
||||
"github.com/trufflesecurity/trufflehog/v3/pkg/pb/detectorspb"
|
||||
)
|
||||
|
||||
const TestDetectorType = -1
|
||||
|
||||
type testDetectorV1 struct {
|
||||
}
|
||||
|
||||
func (d testDetectorV1) FromData(ctx context.Context, verify bool, data []byte) ([]detectors.Result, error) {
|
||||
return make([]detectors.Result, 0), nil
|
||||
}
|
||||
|
||||
func (d testDetectorV1) Keywords() []string {
|
||||
return []string{"a"}
|
||||
}
|
||||
|
||||
func (d testDetectorV1) Type() detectorspb.DetectorType {
|
||||
return TestDetectorType
|
||||
}
|
||||
|
||||
func (d testDetectorV1) Version() int {
|
||||
return 1
|
||||
}
|
||||
|
||||
type testDetectorV2 struct {
|
||||
}
|
||||
|
||||
func (d testDetectorV2) FromData(ctx context.Context, verify bool, data []byte) ([]detectors.Result, error) {
|
||||
return make([]detectors.Result, 0), nil
|
||||
}
|
||||
|
||||
func (d testDetectorV2) Keywords() []string {
|
||||
return []string{"b"}
|
||||
}
|
||||
|
||||
func (d testDetectorV2) Type() detectorspb.DetectorType {
|
||||
return TestDetectorType
|
||||
}
|
||||
|
||||
func (d testDetectorV2) Version() int {
|
||||
return 2
|
||||
}
|
||||
|
||||
var _ detectors.Detector = (*testDetectorV1)(nil)
|
||||
var _ detectors.Detector = (*testDetectorV2)(nil)
|
||||
var _ detectors.Versioner = (*testDetectorV1)(nil)
|
||||
var _ detectors.Versioner = (*testDetectorV2)(nil)
|
||||
|
||||
func TestAhoCorasickCore_MultipleDetectorVersionsMatchable(t *testing.T) {
|
||||
testCases := []struct {
|
||||
matchString string
|
||||
detector detectors.Detector
|
||||
}{
|
||||
{
|
||||
matchString: "a",
|
||||
detector: testDetectorV1{},
|
||||
},
|
||||
{
|
||||
matchString: "b",
|
||||
detector: testDetectorV2{},
|
||||
},
|
||||
}
|
||||
|
||||
var allDetectors []detectors.Detector
|
||||
for _, tt := range testCases {
|
||||
allDetectors = append(allDetectors, tt.detector)
|
||||
}
|
||||
|
||||
ac := NewAhoCorasickCore(allDetectors)
|
||||
|
||||
for _, tt := range testCases {
|
||||
matches := ac.MatchString(tt.matchString)
|
||||
assert.Equal(t, 1, len(matches))
|
||||
|
||||
matchingDetectors := make(map[detectorspb.DetectorType]detectors.Detector)
|
||||
ac.PopulateDetectorsByMatch(matches[0], matchingDetectors)
|
||||
assert.Equal(t, 1, len(matchingDetectors))
|
||||
assert.Equal(t, tt.detector, matchingDetectors[TestDetectorType])
|
||||
}
|
||||
}
|
Loading…
Reference in a new issue