trufflehog/pkg/decoders/decoders.go
ahrav 68f28a0e34
Filter unique detectors by keywords in chunk (#1711)
* pre filter detectors that include the keywords in the chunk.

* Optimize the engine to prevent iterating overing all detectors.

* use sync.Map for concurrent access.

* lint.

* use correct verify.

* allow versioned detectors.

* Break apart Start.

* cleanup.

* Update benchmark.

* add comment.

* remove Engine prefix.

* update comments.

* use regular map.

* delete the pool.

* remove old code.

* refactor ahocorasickcore into own file.

* update comments

* move structs to ahocorasickcore

* update comments

* fix

* address comments

* exported some methods and constructor since it will need to be be used by the enterprise pipeline as well

* remove extra log
2023-10-23 08:02:01 -07:00

47 lines
1.1 KiB
Go

package decoders
import (
"github.com/trufflesecurity/trufflehog/v3/pkg/pb/detectorspb"
"github.com/trufflesecurity/trufflehog/v3/pkg/sources"
)
func DefaultDecoders() []Decoder {
return []Decoder{
// UTF8 must be first for duplicate detection
&UTF8{},
&Base64{},
&UTF16{},
}
}
// DecodableChunk is a chunk that includes the type of decoder used.
// This allows us to avoid a type assertion on each decoder.
type DecodableChunk struct {
*sources.Chunk
DecoderType detectorspb.DecoderType
}
type Decoder interface {
FromChunk(chunk *sources.Chunk) *DecodableChunk
}
// Fuzz is an entrypoint for go-fuzz, which is an AFL-style fuzzing tool.
// This one attempts to uncover any panics during decoding.
func Fuzz(data []byte) int {
decoded := false
for i, decoder := range DefaultDecoders() {
// Skip the first decoder (plain), because it will always decode and give
// priority to the input (return 1).
if i == 0 {
continue
}
chunk := decoder.FromChunk(&sources.Chunk{Data: data})
if chunk != nil {
decoded = true
}
}
if decoded {
return 1 // prioritize the input
}
return -1 // Don't add input to the corpus.
}