2022-01-13 20:02:24 +00:00
|
|
|
package detectors
|
|
|
|
|
|
|
|
import (
|
|
|
|
"context"
|
2023-08-23 21:34:10 +00:00
|
|
|
"crypto/rand"
|
|
|
|
"math/big"
|
2023-06-09 18:06:54 +00:00
|
|
|
"net/url"
|
2022-01-19 06:24:42 +00:00
|
|
|
"strings"
|
|
|
|
"unicode"
|
2022-01-13 20:02:24 +00:00
|
|
|
|
2022-02-10 18:54:33 +00:00
|
|
|
"github.com/trufflesecurity/trufflehog/v3/pkg/pb/detectorspb"
|
|
|
|
"github.com/trufflesecurity/trufflehog/v3/pkg/pb/source_metadatapb"
|
|
|
|
"github.com/trufflesecurity/trufflehog/v3/pkg/pb/sourcespb"
|
|
|
|
"github.com/trufflesecurity/trufflehog/v3/pkg/sources"
|
2022-01-13 20:02:24 +00:00
|
|
|
)
|
|
|
|
|
2022-04-01 23:47:27 +00:00
|
|
|
// Detector defines an interface for scanning for and verifying secrets.
|
2022-01-13 20:02:24 +00:00
|
|
|
type Detector interface {
|
|
|
|
// FromData will scan bytes for results, and optionally verify them.
|
|
|
|
FromData(ctx context.Context, verify bool, data []byte) ([]Result, error)
|
|
|
|
// Keywords are used for efficiently pre-filtering chunks using substring operations.
|
|
|
|
// Use unique identifiers that are part of the secret if you can, or the provider name.
|
|
|
|
Keywords() []string
|
2023-02-09 22:46:03 +00:00
|
|
|
// Type returns the DetectorType number from detectors.proto for the given detector.
|
|
|
|
Type() detectorspb.DetectorType
|
2022-01-13 20:02:24 +00:00
|
|
|
}
|
|
|
|
|
2023-03-02 22:33:56 +00:00
|
|
|
// Versioner is an optional interface that a detector can implement to
|
|
|
|
// differentiate instances of the same detector type.
|
|
|
|
type Versioner interface {
|
|
|
|
Version() int
|
|
|
|
}
|
|
|
|
|
2023-04-27 17:23:50 +00:00
|
|
|
// EndpointCustomizer is an optional interface that a detector can implement to
|
|
|
|
// support verifying against user-supplied endpoints.
|
|
|
|
type EndpointCustomizer interface {
|
|
|
|
SetEndpoints(...string) error
|
|
|
|
DefaultEndpoint() string
|
|
|
|
}
|
|
|
|
|
2022-01-13 20:02:24 +00:00
|
|
|
type Result struct {
|
|
|
|
// DetectorType is the type of Detector.
|
|
|
|
DetectorType detectorspb.DetectorType
|
2023-03-30 16:40:05 +00:00
|
|
|
// DetectorName is the name of the Detector. Used for custom detectors.
|
|
|
|
DetectorName string
|
2022-10-06 18:55:07 +00:00
|
|
|
// DecoderType is the type of Decoder.
|
|
|
|
DecoderType detectorspb.DecoderType
|
|
|
|
Verified bool
|
2022-01-13 20:02:24 +00:00
|
|
|
// Raw contains the raw secret identifier data. Prefer IDs over secrets since it is used for deduping after hashing.
|
|
|
|
Raw []byte
|
2022-08-12 21:53:37 +00:00
|
|
|
// RawV2 contains the raw secret identifier that is a combination of both the ID and the secret.
|
|
|
|
// This is used for secrets that are multi part and could have the same ID. Ex: AWS credentials
|
|
|
|
RawV2 []byte
|
2022-01-13 20:02:24 +00:00
|
|
|
// Redacted contains the redacted version of the raw secret identification data for display purposes.
|
|
|
|
// A secret ID should be used if available.
|
|
|
|
Redacted string
|
|
|
|
ExtraData map[string]string
|
|
|
|
StructuredData *detectorspb.StructuredData
|
2023-07-10 15:15:40 +00:00
|
|
|
|
|
|
|
// This field should only be populated if the verification process itself failed in a way that provides no
|
|
|
|
// information about the verification status of the candidate secret, such as if the verification request timed out.
|
|
|
|
VerificationError error
|
2022-01-13 20:02:24 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
type ResultWithMetadata struct {
|
2022-04-01 23:47:27 +00:00
|
|
|
// SourceMetadata contains source-specific contextual information.
|
2022-01-13 20:02:24 +00:00
|
|
|
SourceMetadata *source_metadatapb.MetaData
|
|
|
|
// SourceID is the ID of the source that the API uses to map secrets to specific sources.
|
2023-09-14 18:28:24 +00:00
|
|
|
SourceID sources.SourceID
|
2022-01-13 20:02:24 +00:00
|
|
|
// SourceType is the type of Source.
|
|
|
|
SourceType sourcespb.SourceType
|
|
|
|
// SourceName is the name of the Source.
|
|
|
|
SourceName string
|
|
|
|
Result
|
2023-05-24 16:21:41 +00:00
|
|
|
// Data from the sources.Chunk which this result was emitted for
|
|
|
|
Data []byte
|
2022-01-13 20:02:24 +00:00
|
|
|
}
|
|
|
|
|
2022-04-01 23:47:27 +00:00
|
|
|
// CopyMetadata returns a detector result with included metadata from the source chunk.
|
2022-01-13 20:02:24 +00:00
|
|
|
func CopyMetadata(chunk *sources.Chunk, result Result) ResultWithMetadata {
|
|
|
|
return ResultWithMetadata{
|
|
|
|
SourceMetadata: chunk.SourceMetadata,
|
|
|
|
SourceID: chunk.SourceID,
|
|
|
|
SourceType: chunk.SourceType,
|
|
|
|
SourceName: chunk.SourceName,
|
|
|
|
Result: result,
|
2023-05-24 16:21:41 +00:00
|
|
|
Data: chunk.Data,
|
2022-01-13 20:02:24 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
// CleanResults returns all verified secrets, and if there are no verified secrets,
|
|
|
|
// just one unverified secret if there are any.
|
|
|
|
func CleanResults(results []Result) []Result {
|
|
|
|
if len(results) == 0 {
|
|
|
|
return results
|
|
|
|
}
|
|
|
|
|
2022-01-19 06:24:42 +00:00
|
|
|
var cleaned = make(map[string]Result, 0)
|
2022-01-13 20:02:24 +00:00
|
|
|
|
|
|
|
for _, s := range results {
|
|
|
|
if s.Verified {
|
2022-01-19 06:24:42 +00:00
|
|
|
cleaned[s.Redacted] = s
|
2022-01-13 20:02:24 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
if len(cleaned) == 0 {
|
|
|
|
return results[:1]
|
|
|
|
}
|
|
|
|
|
2022-01-19 06:24:42 +00:00
|
|
|
results = results[:0]
|
|
|
|
for _, r := range cleaned {
|
|
|
|
results = append(results, r)
|
|
|
|
}
|
|
|
|
|
|
|
|
return results
|
|
|
|
}
|
|
|
|
|
2022-04-01 23:47:27 +00:00
|
|
|
// PrefixRegex ensures that at least one of the given keywords is within
|
2022-01-19 06:24:42 +00:00
|
|
|
// 20 characters of the capturing group that follows.
|
|
|
|
// This can help prevent false positives.
|
|
|
|
func PrefixRegex(keywords []string) string {
|
|
|
|
pre := `(?i)(?:`
|
|
|
|
middle := strings.Join(keywords, "|")
|
2022-04-18 22:09:50 +00:00
|
|
|
post := `)(?:.|[\n\r]){0,40}`
|
2022-01-19 06:24:42 +00:00
|
|
|
return pre + middle + post
|
|
|
|
}
|
|
|
|
|
2022-08-12 21:53:37 +00:00
|
|
|
// KeyIsRandom is a Low cost check to make sure that 'keys' include a number to reduce FPs.
|
2023-06-21 14:15:28 +00:00
|
|
|
// Golang doesn't support regex lookaheads, so must be done in separate calls.
|
2022-08-12 21:53:37 +00:00
|
|
|
// TODO improve checks. Shannon entropy did not work well.
|
2022-01-19 06:24:42 +00:00
|
|
|
func KeyIsRandom(key string) bool {
|
|
|
|
for _, ch := range key {
|
|
|
|
if unicode.IsDigit(ch) {
|
|
|
|
return true
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
return false
|
|
|
|
}
|
|
|
|
|
|
|
|
func MustGetBenchmarkData() map[string][]byte {
|
2023-08-23 21:34:10 +00:00
|
|
|
sizes := map[string]int{
|
|
|
|
"xsmall": 10, // 10 bytes
|
|
|
|
"small": 100, // 100 bytes
|
|
|
|
"medium": 1024, // 1KB
|
|
|
|
"large": 10 * 1024, // 10KB
|
|
|
|
"xlarge": 100 * 1024, // 100KB
|
|
|
|
"xxlarge": 1024 * 1024, // 1MB
|
2022-01-19 06:24:42 +00:00
|
|
|
}
|
2023-08-23 21:34:10 +00:00
|
|
|
data := make(map[string][]byte)
|
|
|
|
|
|
|
|
for key, size := range sizes {
|
|
|
|
// Generating a byte slice of a specific size with random data.
|
|
|
|
content := make([]byte, size)
|
|
|
|
for i := 0; i < size; i++ {
|
|
|
|
randomByte, err := rand.Int(rand.Reader, big.NewInt(256))
|
|
|
|
if err != nil {
|
|
|
|
panic(err)
|
|
|
|
}
|
|
|
|
content[i] = byte(randomByte.Int64())
|
|
|
|
}
|
|
|
|
data[key] = content
|
2022-01-19 06:24:42 +00:00
|
|
|
}
|
|
|
|
|
2023-08-23 21:34:10 +00:00
|
|
|
return data
|
2022-01-13 20:02:24 +00:00
|
|
|
}
|
2023-06-09 18:06:54 +00:00
|
|
|
|
|
|
|
func RedactURL(u url.URL) string {
|
|
|
|
u.User = url.UserPassword(u.User.Username(), "********")
|
|
|
|
return strings.TrimSpace(strings.Replace(u.String(), "%2A", "*", -1))
|
|
|
|
}
|