[bug] - use DetectorKey as the key in the detectorKeysWithResults map (#2366)

* use DetectorKey as the key in the map

* nil check

* update comment
This commit is contained in:
ahrav 2024-02-02 13:43:56 -08:00 committed by GitHub
parent f6546ffaf5
commit 382990a6bd
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
2 changed files with 22 additions and 8 deletions

View file

@ -63,6 +63,12 @@ func NewAhoCorasickCore(allDetectors []detectors.Detector) *AhoCorasickCore {
}
}
// GetDetectorByKey returns the detector associated with the given key. If no detector is found, it
// returns nil.
func (ac *AhoCorasickCore) GetDetectorByKey(key DetectorKey) detectors.Detector {
return ac.detectorsByKey[key]
}
// DetectorInfo represents a detected pattern's metadata in a data chunk.
// It encapsulates the key identifying a specific detector and the detector instance itself.
type DetectorInfo struct {

View file

@ -644,7 +644,7 @@ func (e *Engine) verificationOverlapWorker(ctx context.Context) {
// Reuse the same map and slice to avoid allocations.
const avgSecretsPerDetector = 8
detectorsWithResult := make(map[ahocorasick.DetectorInfo]struct{}, avgSecretsPerDetector)
detectorKeysWithResults := make(map[ahocorasick.DetectorKey]struct{}, avgSecretsPerDetector)
chunkSecrets := make(map[chunkSecretKey]struct{}, avgSecretsPerDetector)
for chunk := range e.verificationOverlapChunksChan {
@ -658,8 +658,8 @@ func (e *Engine) verificationOverlapWorker(ctx context.Context) {
if len(results) == 0 {
continue
}
if _, ok := detectorsWithResult[detector]; !ok {
detectorsWithResult[detector] = struct{}{}
if _, ok := detectorKeysWithResults[detector.Key]; !ok {
detectorKeysWithResults[detector.Key] = struct{}{}
}
for _, res := range results {
@ -693,14 +693,22 @@ func (e *Engine) verificationOverlapWorker(ctx context.Context) {
wgDoneFn: wgDetect.Done,
}, res)
// Remove the detector from the list of detectors with results.
delete(detectorsWithResult, detector)
// Remove the detector key from the list of detector keys with results.
// This is to ensure that the chunk is not reprocessed with verification enabled
// for this detector.
delete(detectorKeysWithResults, detector.Key)
}
chunkSecrets[key] = struct{}{}
}
}
for detector := range detectorsWithResult {
for key := range detectorKeysWithResults {
detector := e.ahoCorasickCore.GetDetectorByKey(key)
if detector == nil {
ctx.Logger().Info("detector not found", "key", key)
continue
}
wgDetect.Add(1)
chunk.chunk.Verify = e.verify
e.detectableChunksChan <- detectableChunk{
@ -715,8 +723,8 @@ func (e *Engine) verificationOverlapWorker(ctx context.Context) {
for k := range chunkSecrets {
delete(chunkSecrets, k)
}
for k := range detectorsWithResult {
delete(detectorsWithResult, k)
for k := range detectorKeysWithResults {
delete(detectorKeysWithResults, k)
}
chunk.verificationOverlapWgDoneFn()