Refactor FragmentLineOffset to match multiline secrets (#1612)

* Refactor FragmentLineOffset to match multiline secrets

* Add tests and benchmarks

* Use bytes.Count and fix an ignore tag edge case
This commit is contained in:
Miccah 2023-08-14 12:51:41 -05:00 committed by GitHub
parent 09795c3591
commit eae66ccf7e
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
2 changed files with 84 additions and 10 deletions

View file

@ -595,17 +595,20 @@ func SupportsLineNumbers(sourceType sourcespb.SourceType) bool {
// FragmentLineOffset sets the line number for a provided source chunk with a given detector result.
func FragmentLineOffset(chunk *sources.Chunk, result *detectors.Result) (int64, bool) {
lines := bytes.Split(chunk.Data, []byte("\n"))
for i, line := range lines {
if bytes.Contains(line, result.Raw) {
// if the line contains the ignore tag, we should ignore the result
if bytes.Contains(line, []byte(ignoreTag)) {
return int64(i), true
}
return int64(i), false
}
before, after, found := bytes.Cut(chunk.Data, result.Raw)
if !found {
return 0, false
}
return 0, false
lineNumber := int64(bytes.Count(before, []byte("\n")))
// If the line contains the ignore tag, we should ignore the result.
endLine := bytes.Index(after, []byte("\n"))
if endLine == -1 {
endLine = len(after)
}
if bytes.Contains(after[:endLine], []byte(ignoreTag)) {
return lineNumber, true
}
return lineNumber, false
}
// FragmentFirstLine returns the first line number of a fragment along with a pointer to the value to update in the

View file

@ -1,6 +1,7 @@
package engine
import (
"fmt"
"testing"
"github.com/trufflesecurity/trufflehog/v3/pkg/decoders"
@ -50,6 +51,39 @@ func TestFragmentLineOffset(t *testing.T) {
expectedLine: 4,
ignore: false,
},
{
name: "match on consecutive lines",
chunk: &sources.Chunk{
Data: []byte("line1\nline2\ntrufflehog:ignore\nline4\nsecret\nhere\nline6"),
},
result: &detectors.Result{
Raw: []byte("secret\nhere"),
},
expectedLine: 4,
ignore: false,
},
{
name: "ignore on last consecutive lines",
chunk: &sources.Chunk{
Data: []byte("line1\nline2\nline3\nsecret\nhere // trufflehog:ignore\nline5"),
},
result: &detectors.Result{
Raw: []byte("secret\nhere"),
},
expectedLine: 3,
ignore: true,
},
{
name: "ignore on last line",
chunk: &sources.Chunk{
Data: []byte("line1\nline2\nline3\nsecret here // trufflehog:ignore"),
},
result: &detectors.Result{
Raw: []byte("secret here"),
},
expectedLine: 3,
ignore: true,
},
}
for _, tt := range tests {
@ -65,6 +99,43 @@ func TestFragmentLineOffset(t *testing.T) {
}
}
func setupFragmentLineOffsetBench(totalLines, needleLine int) (*sources.Chunk, *detectors.Result) {
data := make([]byte, 0, 4096)
needle := []byte("needle")
for i := 0; i < totalLines; i++ {
if i != needleLine {
data = append(data, []byte(fmt.Sprintf("line%d\n", i))...)
continue
}
data = append(data, needle...)
data = append(data, '\n')
}
chunk := &sources.Chunk{Data: data}
result := &detectors.Result{Raw: needle}
return chunk, result
}
func BenchmarkFragmentLineOffsetStart(b *testing.B) {
chunk, result := setupFragmentLineOffsetBench(512, 2)
for i := 0; i < b.N; i++ {
_, _ = FragmentLineOffset(chunk, result)
}
}
func BenchmarkFragmentLineOffsetMiddle(b *testing.B) {
chunk, result := setupFragmentLineOffsetBench(512, 256)
for i := 0; i < b.N; i++ {
_, _ = FragmentLineOffset(chunk, result)
}
}
func BenchmarkFragmentLineOffsetEnd(b *testing.B) {
chunk, result := setupFragmentLineOffsetBench(512, 510)
for i := 0; i < b.N; i++ {
_, _ = FragmentLineOffset(chunk, result)
}
}
// Test to make sure that DefaultDecoders always returns the UTF8 decoder first.
// Technically a decoder test but we want this to run and fail in CI
func TestDefaultDecoders(t *testing.T) {