Strip leading +/- from github target diffs (#3244)

The GitHub source generates chunks for targeted scans differently than it does for "normal" scans. One difference was the presence of leading + and - characters, which can interfere with detection in some cases.
This commit is contained in:
Cody Rose 2024-08-23 15:21:58 -04:00 committed by GitHub
parent 8f299ff8cd
commit 3b0b2909ca
No known key found for this signature in database
GPG key ID: B5690EEEBB952194

View file

@ -1408,7 +1408,7 @@ func (s *Source) scanTarget(ctx context.Context, target sources.ChunkingTarget,
SourceID: s.SourceID(),
JobID: s.JobID(),
SecretID: target.SecretID,
Data: []byte(res),
Data: []byte(stripLeadingPlusMinus(res)),
SourceMetadata: &source_metadatapb.MetaData{
Data: &source_metadatapb.MetaData_Github{Github: meta},
},
@ -1417,3 +1417,10 @@ func (s *Source) scanTarget(ctx context.Context, target sources.ChunkingTarget,
return common.CancellableWrite(ctx, chunksChan, chunk)
}
// stripLeadingPlusMinus removes leading + and - characters from lines in a diff string. These characters exist in the
// diffs returned when performing a targeted scan and need to be removed so that detectors are operating on the correct
// text.
func stripLeadingPlusMinus(diff string) string {
return strings.ReplaceAll(strings.ReplaceAll(diff, "\n+", "\n"), "\n-", "\n")
}