mirror of
https://github.com/trufflesecurity/trufflehog.git
synced 2024-11-14 00:47:21 +00:00
68f28a0e34
* pre filter detectors that include the keywords in the chunk. * Optimize the engine to prevent iterating overing all detectors. * use sync.Map for concurrent access. * lint. * use correct verify. * allow versioned detectors. * Break apart Start. * cleanup. * Update benchmark. * add comment. * remove Engine prefix. * update comments. * use regular map. * delete the pool. * remove old code. * refactor ahocorasickcore into own file. * update comments * move structs to ahocorasickcore * update comments * fix * address comments * exported some methods and constructor since it will need to be be used by the enterprise pipeline as well * remove extra log
129 lines
3 KiB
Go
129 lines
3 KiB
Go
package decoders
|
|
|
|
import (
|
|
"bytes"
|
|
"encoding/base64"
|
|
|
|
"github.com/trufflesecurity/trufflehog/v3/pkg/pb/detectorspb"
|
|
"github.com/trufflesecurity/trufflehog/v3/pkg/sources"
|
|
)
|
|
|
|
type (
|
|
Base64 struct{}
|
|
)
|
|
|
|
var (
|
|
b64Charset = []byte("ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/-_=")
|
|
b64EndChars = "+/-_="
|
|
// Given characters are mostly ASCII, we can use a simple array to map.
|
|
b64CharsetMapping [128]bool
|
|
)
|
|
|
|
func init() {
|
|
// Build an array of all the characters in the base64 charset.
|
|
for _, char := range b64Charset {
|
|
b64CharsetMapping[char] = true
|
|
}
|
|
}
|
|
|
|
func (d *Base64) FromChunk(chunk *sources.Chunk) *DecodableChunk {
|
|
decodableChunk := &DecodableChunk{Chunk: chunk, DecoderType: detectorspb.DecoderType_BASE64}
|
|
encodedSubstrings := getSubstringsOfCharacterSet(chunk.Data, 20, b64CharsetMapping, b64EndChars)
|
|
decodedSubstrings := make(map[string][]byte)
|
|
|
|
for _, str := range encodedSubstrings {
|
|
dec, err := base64.StdEncoding.DecodeString(str)
|
|
if err == nil {
|
|
if len(dec) > 0 {
|
|
decodedSubstrings[str] = dec
|
|
}
|
|
continue
|
|
}
|
|
|
|
dec, err = base64.RawURLEncoding.DecodeString(str)
|
|
if err == nil && len(dec) > 0 {
|
|
decodedSubstrings[str] = dec
|
|
}
|
|
}
|
|
|
|
if len(decodedSubstrings) > 0 {
|
|
var result bytes.Buffer
|
|
result.Grow(len(chunk.Data))
|
|
|
|
start := 0
|
|
for _, encoded := range encodedSubstrings {
|
|
if decoded, ok := decodedSubstrings[encoded]; ok {
|
|
end := bytes.Index(chunk.Data[start:], []byte(encoded))
|
|
if end != -1 {
|
|
result.Write(chunk.Data[start : start+end])
|
|
result.Write(decoded)
|
|
start += end + len(encoded)
|
|
}
|
|
}
|
|
}
|
|
result.Write(chunk.Data[start:])
|
|
chunk.Data = result.Bytes()
|
|
return decodableChunk
|
|
}
|
|
|
|
return nil
|
|
}
|
|
|
|
func getSubstringsOfCharacterSet(data []byte, threshold int, charsetMapping [128]bool, endChars string) []string {
|
|
if len(data) == 0 {
|
|
return nil
|
|
}
|
|
|
|
count := 0
|
|
substringsCount := 0
|
|
|
|
// Determine the number of substrings that will be returned.
|
|
// Pre-allocate the slice to avoid reallocations.
|
|
for _, char := range data {
|
|
if char < 128 && charsetMapping[char] {
|
|
count++
|
|
} else {
|
|
if count > threshold {
|
|
substringsCount++
|
|
}
|
|
count = 0
|
|
}
|
|
}
|
|
if count > threshold {
|
|
substringsCount++
|
|
}
|
|
|
|
count = 0
|
|
start := 0
|
|
substrings := make([]string, 0, substringsCount)
|
|
|
|
for i, char := range data {
|
|
if char < 128 && charsetMapping[char] {
|
|
if count == 0 {
|
|
start = i
|
|
}
|
|
count++
|
|
} else {
|
|
if count > threshold {
|
|
substrings = appendB64Substring(data, start, count, substrings, endChars)
|
|
}
|
|
count = 0
|
|
}
|
|
}
|
|
|
|
if count > threshold {
|
|
substrings = appendB64Substring(data, start, count, substrings, endChars)
|
|
}
|
|
|
|
return substrings
|
|
}
|
|
|
|
func appendB64Substring(data []byte, start, count int, substrings []string, endChars string) []string {
|
|
substring := bytes.TrimLeft(data[start:start+count], endChars)
|
|
if idx := bytes.IndexByte(bytes.TrimRight(substring, endChars), '='); idx != -1 {
|
|
substrings = append(substrings, string(substring[idx+1:]))
|
|
} else {
|
|
substrings = append(substrings, string(substring))
|
|
}
|
|
return substrings
|
|
}
|