mirror of
https://github.com/trufflesecurity/trufflehog.git
synced 2024-11-10 07:04:24 +00:00
[feat] - Update span calculation logic to use offset magnitude (#2957)
* Add a default start offset * update * use keywordIdx
This commit is contained in:
parent
68bea576db
commit
bf77251543
5 changed files with 227 additions and 41 deletions
|
@ -38,6 +38,12 @@ type MaxSecretSizeProvider interface {
|
|||
MaxSecretSize() int64
|
||||
}
|
||||
|
||||
// StartOffsetProvider is an optional interface that a detector can implement to
|
||||
// provide a custom start offset for the secret it finds.
|
||||
type StartOffsetProvider interface {
|
||||
StartOffset() int64
|
||||
}
|
||||
|
||||
// MultiPartCredentialProvider is an optional interface that a detector can implement
|
||||
// to indicate its compatibility with multi-part credentials and provide the maximum
|
||||
// secret size for the credential it finds.
|
||||
|
|
|
@ -20,6 +20,7 @@ type Scanner struct{}
|
|||
var _ detectors.Detector = (*Scanner)(nil)
|
||||
var _ detectors.CustomFalsePositiveChecker = (*Scanner)(nil)
|
||||
var _ detectors.MaxSecretSizeProvider = (*Scanner)(nil)
|
||||
var _ detectors.StartOffsetProvider = (*Scanner)(nil)
|
||||
|
||||
var (
|
||||
keyPat = regexp.MustCompile(`\{[^{]+auth_provider_x509_cert_url[^}]+\}`)
|
||||
|
@ -50,10 +51,15 @@ func (s Scanner) Keywords() []string {
|
|||
return []string{"provider_x509"}
|
||||
}
|
||||
|
||||
const maxGCPKeySize = 4096
|
||||
const maxGCPKeySize = 2048
|
||||
|
||||
// ProvideMaxSecretSize returns the maximum size of a secret that this detector can find.
|
||||
func (s Scanner) MaxSecretSize() int64 { return maxGCPKeySize }
|
||||
// MaxSecretSize returns the maximum size of a secret that this detector can find.
|
||||
func (Scanner) MaxSecretSize() int64 { return maxGCPKeySize }
|
||||
|
||||
const startOffset = 4096
|
||||
|
||||
// StartOffset returns the start offset for the secret this detector finds.
|
||||
func (Scanner) StartOffset() int64 { return startOffset }
|
||||
|
||||
// FromData will find and optionally verify GCP secrets in a given set of bytes.
|
||||
func (s Scanner) FromData(ctx context.Context, verify bool, data []byte) (results []detectors.Result, err error) {
|
||||
|
|
|
@ -26,6 +26,7 @@ type Scanner struct {
|
|||
// Ensure the Scanner satisfies the interface at compile time.
|
||||
var _ detectors.Detector = (*Scanner)(nil)
|
||||
var _ detectors.MaxSecretSizeProvider = (*Scanner)(nil)
|
||||
var _ detectors.StartOffsetProvider = (*Scanner)(nil)
|
||||
|
||||
var (
|
||||
defaultClient = common.SaneHttpClient()
|
||||
|
@ -48,8 +49,13 @@ func (s Scanner) Keywords() []string {
|
|||
|
||||
const maxGCPADCKeySize = 1024
|
||||
|
||||
// ProvideMaxSecretSize returns the maximum size of a secret that this detector can find.
|
||||
func (s Scanner) MaxSecretSize() int64 { return maxGCPADCKeySize }
|
||||
// MaxSecretSize returns the maximum size of a secret that this detector can find.
|
||||
func (Scanner) MaxSecretSize() int64 { return maxGCPADCKeySize }
|
||||
|
||||
const startOffset = maxGCPADCKeySize
|
||||
|
||||
// StartOffset returns the start offset for the secret this detector finds.
|
||||
func (Scanner) StartOffset() int64 { return startOffset }
|
||||
|
||||
// FromData will find and optionally verify Gcpapplicationdefaultcredentials secrets in a given set of bytes.
|
||||
func (s Scanner) FromData(ctx context.Context, verify bool, data []byte) (results []detectors.Result, err error) {
|
||||
|
|
|
@ -34,11 +34,11 @@ type spanCalculator interface {
|
|||
}
|
||||
|
||||
// spanCalculationParams provides the necessary context for calculating match spans,
|
||||
// including the starting index in the chunk, the chunk data itself, and the detector being used.
|
||||
// including the keyword index in the chunk, the chunk data itself, and the detector being used.
|
||||
type spanCalculationParams struct {
|
||||
startIdx int64
|
||||
chunkData []byte
|
||||
detector detectors.Detector
|
||||
keywordIdx int64 // Index of the keyword in the chunk data
|
||||
chunkData []byte
|
||||
detector detectors.Detector
|
||||
}
|
||||
|
||||
// EntireChunkSpanCalculator is a strategy that calculates the match span to use the entire chunk data.
|
||||
|
@ -51,34 +51,44 @@ func (e *EntireChunkSpanCalculator) calculateSpan(params spanCalculationParams)
|
|||
return matchSpan{startOffset: 0, endOffset: int64(len(params.chunkData))}
|
||||
}
|
||||
|
||||
// maxMatchLengthSpanCalculator is a strategy that calculates match spans based on a default max
|
||||
// match length or values provided by detectors. This allows for more granular control over the match span.
|
||||
type maxMatchLengthSpanCalculator struct{ maxMatchLength int64 }
|
||||
// adjustableSpanCalculator is a strategy that calculates match spans. It uses a default offset magnitude
|
||||
// or values provided by specific detectors to adjust the start and end indices of the span, allowing
|
||||
// for more granular control over the match.
|
||||
type adjustableSpanCalculator struct{ offsetMagnitude int64 }
|
||||
|
||||
// newMaxMatchLengthSpanCalculator creates a new instance of maxMatchLengthSpanCalculator with the
|
||||
// specified max match length.
|
||||
func newMaxMatchLengthSpanCalculator(maxMatchLength int64) *maxMatchLengthSpanCalculator {
|
||||
return &maxMatchLengthSpanCalculator{maxMatchLength: maxMatchLength}
|
||||
// newAdjustableSpanCalculator creates a new instance of adjustableSpanCalculator with the
|
||||
// specified offset magnitude.
|
||||
func newAdjustableSpanCalculator(offsetRadius int64) *adjustableSpanCalculator {
|
||||
return &adjustableSpanCalculator{offsetMagnitude: offsetRadius}
|
||||
}
|
||||
|
||||
// calculateSpans computes the match spans based on the start index and the max match length.
|
||||
// If the detector provides an override value, it uses that instead of the default max match length.
|
||||
func (m *maxMatchLengthSpanCalculator) calculateSpan(params spanCalculationParams) matchSpan {
|
||||
maxSize := m.maxMatchLength
|
||||
// calculateSpan computes the match span based on the keyword index and the offset magnitude.
|
||||
// If the detector provides an override value, it uses that instead of the default offset magnitude to
|
||||
// calculate the maximum size of the span.
|
||||
// The start index of the span is also adjusted if the detector provides a start offset.
|
||||
func (m *adjustableSpanCalculator) calculateSpan(params spanCalculationParams) matchSpan {
|
||||
keywordIdx := params.keywordIdx
|
||||
|
||||
switch d := params.detector.(type) {
|
||||
case detectors.MultiPartCredentialProvider:
|
||||
maxSize = d.MaxCredentialSpan()
|
||||
case detectors.MaxSecretSizeProvider:
|
||||
maxSize = d.MaxSecretSize()
|
||||
default: // Use the default max match length
|
||||
maxSize := keywordIdx + m.offsetMagnitude
|
||||
startOffset := keywordIdx - m.offsetMagnitude
|
||||
|
||||
// Check if the detector implements each interface and update values accordingly.
|
||||
// This CAN'T be done in a switch statement because a detector can implement multiple interfaces.
|
||||
if provider, ok := params.detector.(detectors.MultiPartCredentialProvider); ok {
|
||||
maxSize = provider.MaxCredentialSpan() + keywordIdx
|
||||
startOffset = keywordIdx - provider.MaxCredentialSpan()
|
||||
}
|
||||
endIdx := params.startIdx + maxSize
|
||||
if endIdx > int64(len(params.chunkData)) {
|
||||
endIdx = int64(len(params.chunkData))
|
||||
if provider, ok := params.detector.(detectors.MaxSecretSizeProvider); ok {
|
||||
maxSize = provider.MaxSecretSize() + keywordIdx
|
||||
}
|
||||
if provider, ok := params.detector.(detectors.StartOffsetProvider); ok {
|
||||
startOffset = keywordIdx - provider.StartOffset()
|
||||
}
|
||||
|
||||
return matchSpan{startOffset: params.startIdx, endOffset: endIdx}
|
||||
startIdx := max(startOffset, 0)
|
||||
endIdx := min(maxSize, int64(len(params.chunkData)))
|
||||
|
||||
return matchSpan{startOffset: startIdx, endOffset: endIdx}
|
||||
}
|
||||
|
||||
// CoreOption is a functional option type for configuring an AhoCorasickCore instance.
|
||||
|
@ -123,19 +133,19 @@ func NewAhoCorasickCore(allDetectors []detectors.Detector, opts ...CoreOption) *
|
|||
}
|
||||
}
|
||||
|
||||
const maxMatchLength int64 = 512
|
||||
ac := &Core{
|
||||
const defaultOffsetRadius int64 = 512
|
||||
core := &Core{
|
||||
keywordsToDetectors: keywordsToDetectors,
|
||||
detectorsByKey: detectorsByKey,
|
||||
prefilter: *ahocorasick.NewTrieBuilder().AddStrings(keywords).Build(),
|
||||
spanCalculator: newMaxMatchLengthSpanCalculator(maxMatchLength), // Default span calculator
|
||||
spanCalculator: newAdjustableSpanCalculator(defaultOffsetRadius), // Default span calculator
|
||||
}
|
||||
|
||||
for _, opt := range opts {
|
||||
opt(ac)
|
||||
opt(core)
|
||||
}
|
||||
|
||||
return ac
|
||||
return core
|
||||
}
|
||||
|
||||
// DetectorMatch represents a detected pattern's metadata in a data chunk.
|
||||
|
@ -234,9 +244,9 @@ func (ac *Core) FindDetectorMatches(chunkData []byte) []*DetectorMatch {
|
|||
startIdx := m.Pos()
|
||||
span := ac.spanCalculator.calculateSpan(
|
||||
spanCalculationParams{
|
||||
startIdx: startIdx,
|
||||
chunkData: chunkData,
|
||||
detector: detectorMatch.Detector,
|
||||
keywordIdx: startIdx,
|
||||
chunkData: chunkData,
|
||||
detector: detectorMatch.Detector,
|
||||
},
|
||||
)
|
||||
detectorMatch.addMatchSpan(span)
|
||||
|
|
|
@ -63,6 +63,64 @@ func (testDetectorV3) Type() detectorspb.DetectorType {
|
|||
|
||||
func (testDetectorV3) Version() int { return 1 }
|
||||
|
||||
var _ detectors.Detector = (*testDetectorV4)(nil)
|
||||
var _ detectors.MultiPartCredentialProvider = (*testDetectorV4)(nil)
|
||||
var _ detectors.StartOffsetProvider = (*testDetectorV4)(nil)
|
||||
|
||||
type testDetectorV4 struct{}
|
||||
|
||||
func (testDetectorV4) FromData(context.Context, bool, []byte) ([]detectors.Result, error) {
|
||||
return make([]detectors.Result, 0), nil
|
||||
}
|
||||
|
||||
func (testDetectorV4) Keywords() []string { return []string{"password"} }
|
||||
|
||||
func (testDetectorV4) Type() detectorspb.DetectorType { return TestDetectorType }
|
||||
|
||||
func (testDetectorV4) Version() int { return 1 }
|
||||
|
||||
func (testDetectorV4) MaxCredentialSpan() int64 { return 15 }
|
||||
|
||||
func (testDetectorV4) StartOffset() int64 { return 5 }
|
||||
|
||||
var _ detectors.Detector = (*testDetectorV5)(nil)
|
||||
var _ detectors.MaxSecretSizeProvider = (*testDetectorV5)(nil)
|
||||
var _ detectors.StartOffsetProvider = (*testDetectorV5)(nil)
|
||||
|
||||
type testDetectorV5 struct{}
|
||||
|
||||
func (testDetectorV5) FromData(context.Context, bool, []byte) ([]detectors.Result, error) {
|
||||
return make([]detectors.Result, 0), nil
|
||||
}
|
||||
|
||||
func (testDetectorV5) Keywords() []string { return []string{"password"} }
|
||||
|
||||
func (testDetectorV5) Type() detectorspb.DetectorType { return TestDetectorType }
|
||||
|
||||
func (testDetectorV5) Version() int { return 1 }
|
||||
|
||||
func (testDetectorV5) MaxSecretSize() int64 { return 10 }
|
||||
|
||||
func (testDetectorV5) StartOffset() int64 { return 3 }
|
||||
|
||||
var _ detectors.Detector = (*testDetectorV6)(nil)
|
||||
var _ detectors.Detector = (*testDetectorV6)(nil)
|
||||
var _ detectors.StartOffsetProvider = (*testDetectorV6)(nil)
|
||||
|
||||
type testDetectorV6 struct{}
|
||||
|
||||
func (testDetectorV6) FromData(context.Context, bool, []byte) ([]detectors.Result, error) {
|
||||
return make([]detectors.Result, 0), nil
|
||||
}
|
||||
|
||||
func (testDetectorV6) Keywords() []string { return []string{"password"} }
|
||||
|
||||
func (testDetectorV6) Type() detectorspb.DetectorType { return TestDetectorType }
|
||||
|
||||
func (testDetectorV6) Version() int { return 1 }
|
||||
|
||||
func (testDetectorV6) StartOffset() int64 { return 1 }
|
||||
|
||||
var _ detectors.Detector = (*testDetectorV1)(nil)
|
||||
var _ detectors.Detector = (*testDetectorV2)(nil)
|
||||
var _ detectors.Versioner = (*testDetectorV1)(nil)
|
||||
|
@ -141,7 +199,7 @@ func TestFindDetectorMatches(t *testing.T) {
|
|||
},
|
||||
sampleData: "This is a sample data containing keyword truffle",
|
||||
expectedResult: map[DetectorKey][][]int64{
|
||||
CreateDetectorKey(testDetectorV3{}): {{41, 48}},
|
||||
CreateDetectorKey(testDetectorV3{}): {{0, 48}},
|
||||
},
|
||||
},
|
||||
{
|
||||
|
@ -151,7 +209,7 @@ func TestFindDetectorMatches(t *testing.T) {
|
|||
},
|
||||
sampleData: "This is a sample data containing keyword a",
|
||||
expectedResult: map[DetectorKey][][]int64{
|
||||
CreateDetectorKey(testDetectorV1{}): {{8, 42}},
|
||||
CreateDetectorKey(testDetectorV1{}): {{0, 42}},
|
||||
},
|
||||
},
|
||||
{
|
||||
|
@ -172,7 +230,7 @@ func TestFindDetectorMatches(t *testing.T) {
|
|||
eget ultricies ugue ugue id ugue. Meens liquet libero
|
||||
c libero molestie, nec mlesud ugue ugue eget. This is the second occurrence of the letter a.`,
|
||||
expectedResult: map[DetectorKey][][]int64{
|
||||
CreateDetectorKey(testDetectorV2{}): {{43, 555}, {854, 856}},
|
||||
CreateDetectorKey(testDetectorV2{}): {{0, 856}},
|
||||
},
|
||||
},
|
||||
{
|
||||
|
@ -219,6 +277,106 @@ func TestFindDetectorMatches(t *testing.T) {
|
|||
CreateDetectorKey(testDetectorV2{}): {{0, 856}},
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "keyword in the middle of the credential; MultiPartCredentialProvider, StartOffsetProvider",
|
||||
detectors: []detectors.Detector{
|
||||
testDetectorV4{},
|
||||
},
|
||||
sampleData: "This is a password in the middle of some data",
|
||||
expectedResult: map[DetectorKey][][]int64{
|
||||
CreateDetectorKey(testDetectorV4{}): {{5, 25}},
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "keyword at the end of the credential; MultiPartCredentialProvider, StartOffsetProvider",
|
||||
detectors: []detectors.Detector{
|
||||
testDetectorV4{},
|
||||
},
|
||||
sampleData: "This data ends with a password",
|
||||
expectedResult: map[DetectorKey][][]int64{
|
||||
CreateDetectorKey(testDetectorV4{}): {{17, 30}},
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "keyword near the start of the data; MultiPartCredentialProvider, StartOffsetProvider",
|
||||
detectors: []detectors.Detector{
|
||||
testDetectorV4{},
|
||||
},
|
||||
sampleData: "a password at the start",
|
||||
expectedResult: map[DetectorKey][][]int64{
|
||||
CreateDetectorKey(testDetectorV4{}): {{0, 17}},
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "keyword in the middle of the credential; MaxSecretSizeProvider, StartOffsetProvider",
|
||||
detectors: []detectors.Detector{
|
||||
testDetectorV5{},
|
||||
},
|
||||
sampleData: "This is a password in the middle of some data",
|
||||
expectedResult: map[DetectorKey][][]int64{
|
||||
CreateDetectorKey(testDetectorV5{}): {{7, 20}},
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "keyword at the end of the credential; MaxSecretSizeProvider, StartOffsetProvider",
|
||||
detectors: []detectors.Detector{
|
||||
testDetectorV5{},
|
||||
},
|
||||
sampleData: "This data ends with a password",
|
||||
expectedResult: map[DetectorKey][][]int64{
|
||||
CreateDetectorKey(testDetectorV5{}): {{19, 30}},
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "keyword near the start of the data; MaxSecretSizeProvider, StartOffsetProvider",
|
||||
detectors: []detectors.Detector{
|
||||
testDetectorV5{},
|
||||
},
|
||||
sampleData: "a password at the start",
|
||||
expectedResult: map[DetectorKey][][]int64{
|
||||
CreateDetectorKey(testDetectorV5{}): {{0, 12}},
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "keyword in the middle of the credential; StartOffsetProvider",
|
||||
detectors: []detectors.Detector{
|
||||
testDetectorV6{},
|
||||
},
|
||||
sampleData: "This is a password in the middle of some data",
|
||||
expectedResult: map[DetectorKey][][]int64{
|
||||
CreateDetectorKey(testDetectorV6{}): {{9, 45}},
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "keyword at the end of the credential; StartOffsetProvider",
|
||||
detectors: []detectors.Detector{
|
||||
testDetectorV6{},
|
||||
},
|
||||
sampleData: "This data ends with a password",
|
||||
expectedResult: map[DetectorKey][][]int64{
|
||||
CreateDetectorKey(testDetectorV6{}): {{21, 30}},
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "keyword near the start of the data; StartOffsetProvider",
|
||||
detectors: []detectors.Detector{
|
||||
testDetectorV6{},
|
||||
},
|
||||
sampleData: "a password at the start",
|
||||
expectedResult: map[DetectorKey][][]int64{
|
||||
CreateDetectorKey(testDetectorV6{}): {{1, 23}},
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "multiple keyword in the middle of the credential; StartOffsetProvider",
|
||||
detectors: []detectors.Detector{
|
||||
testDetectorV6{},
|
||||
},
|
||||
sampleData: "This is a password in the middle of some data, and another password at the end!",
|
||||
expectedResult: map[DetectorKey][][]int64{
|
||||
CreateDetectorKey(testDetectorV6{}): {{9, 79}},
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "No matches",
|
||||
detectors: []detectors.Detector{
|
||||
|
|
Loading…
Reference in a new issue