mirror of
https://github.com/trufflesecurity/trufflehog.git
synced 2024-11-14 00:47:21 +00:00
cb072603dc
* POC: Modularize scanning engine. * fix typo * update interface name * fix tests * update test * fix moar tests * fix bug * fixes. * fix merge * add detector verification overrides * handle --no-verification flag * support fp * add test * update name * filter * update test * explicit use of detector * updates
238 lines
7.7 KiB
Go
238 lines
7.7 KiB
Go
package detectors
|
|
|
|
import (
|
|
"context"
|
|
"crypto/rand"
|
|
"errors"
|
|
"math/big"
|
|
"net/url"
|
|
"strings"
|
|
"unicode"
|
|
|
|
"github.com/trufflesecurity/trufflehog/v3/pkg/pb/detectorspb"
|
|
"github.com/trufflesecurity/trufflehog/v3/pkg/pb/source_metadatapb"
|
|
"github.com/trufflesecurity/trufflehog/v3/pkg/pb/sourcespb"
|
|
"github.com/trufflesecurity/trufflehog/v3/pkg/sources"
|
|
)
|
|
|
|
// Detector defines an interface for scanning for and verifying secrets.
|
|
type Detector interface {
|
|
// FromData will scan bytes for results, and optionally verify them.
|
|
FromData(ctx context.Context, verify bool, data []byte) ([]Result, error)
|
|
// Keywords are used for efficiently pre-filtering chunks using substring operations.
|
|
// Use unique identifiers that are part of the secret if you can, or the provider name.
|
|
Keywords() []string
|
|
// Type returns the DetectorType number from detectors.proto for the given detector.
|
|
Type() detectorspb.DetectorType
|
|
}
|
|
|
|
// Versioner is an optional interface that a detector can implement to
|
|
// differentiate instances of the same detector type.
|
|
type Versioner interface {
|
|
Version() int
|
|
}
|
|
|
|
// MaxSecretSizeProvider is an optional interface that a detector can implement to
|
|
// provide a custom max size for the secret it finds.
|
|
type MaxSecretSizeProvider interface {
|
|
MaxSecretSize() int64
|
|
}
|
|
|
|
// StartOffsetProvider is an optional interface that a detector can implement to
|
|
// provide a custom start offset for the secret it finds.
|
|
type StartOffsetProvider interface {
|
|
StartOffset() int64
|
|
}
|
|
|
|
// MultiPartCredentialProvider is an optional interface that a detector can implement
|
|
// to indicate its compatibility with multi-part credentials and provide the maximum
|
|
// secret size for the credential it finds.
|
|
type MultiPartCredentialProvider interface {
|
|
// MaxCredentialSpan returns the maximum span or range of characters that the
|
|
// detector should consider when searching for a multi-part credential.
|
|
MaxCredentialSpan() int64
|
|
}
|
|
|
|
// EndpointCustomizer is an optional interface that a detector can implement to
|
|
// support verifying against user-supplied endpoints.
|
|
type EndpointCustomizer interface {
|
|
SetEndpoints(...string) error
|
|
DefaultEndpoint() string
|
|
}
|
|
|
|
type Result struct {
|
|
// DetectorType is the type of Detector.
|
|
DetectorType detectorspb.DetectorType
|
|
// DetectorName is the name of the Detector. Used for custom detectors.
|
|
DetectorName string
|
|
// DecoderType is the type of Decoder.
|
|
DecoderType detectorspb.DecoderType
|
|
Verified bool
|
|
// Raw contains the raw secret identifier data. Prefer IDs over secrets since it is used for deduping after hashing.
|
|
Raw []byte
|
|
// RawV2 contains the raw secret identifier that is a combination of both the ID and the secret.
|
|
// This is used for secrets that are multi part and could have the same ID. Ex: AWS credentials
|
|
RawV2 []byte
|
|
// Redacted contains the redacted version of the raw secret identification data for display purposes.
|
|
// A secret ID should be used if available.
|
|
Redacted string
|
|
ExtraData map[string]string
|
|
StructuredData *detectorspb.StructuredData
|
|
|
|
// This field should only be populated if the verification process itself failed in a way that provides no
|
|
// information about the verification status of the candidate secret, such as if the verification request timed out.
|
|
verificationError error
|
|
}
|
|
|
|
// SetVerificationError is the only way to set a verification error. Any sensitive values should be passed-in as secrets to be redacted.
|
|
func (r *Result) SetVerificationError(err error, secrets ...string) {
|
|
if err != nil {
|
|
r.verificationError = redactSecrets(err, secrets...)
|
|
}
|
|
}
|
|
|
|
// Public accessors for the fields could also be provided if needed.
|
|
func (r *Result) VerificationError() error {
|
|
return r.verificationError
|
|
}
|
|
|
|
// redactSecrets replaces all instances of the given secrets with [REDACTED] in the error message.
|
|
func redactSecrets(err error, secrets ...string) error {
|
|
lastErr := unwrapToLast(err)
|
|
errStr := lastErr.Error()
|
|
for _, secret := range secrets {
|
|
errStr = strings.Replace(errStr, secret, "[REDACTED]", -1)
|
|
}
|
|
return errors.New(errStr)
|
|
}
|
|
|
|
// unwrapToLast returns the last error in the chain of errors.
|
|
// This is added to exclude non-essential details (like URLs) for brevity and security.
|
|
// Also helps us optimize performance in redaction and enhance log clarity.
|
|
func unwrapToLast(err error) error {
|
|
for {
|
|
unwrapped := errors.Unwrap(err)
|
|
if unwrapped == nil {
|
|
// We've reached the last error in the chain
|
|
return err
|
|
}
|
|
err = unwrapped
|
|
}
|
|
}
|
|
|
|
type ResultWithMetadata struct {
|
|
// IsWordlistFalsePositive indicates whether this secret was flagged as a false positive based on a wordlist check
|
|
IsWordlistFalsePositive bool
|
|
// SourceMetadata contains source-specific contextual information.
|
|
SourceMetadata *source_metadatapb.MetaData
|
|
// SourceID is the ID of the source that the API uses to map secrets to specific sources.
|
|
SourceID sources.SourceID
|
|
// JobID is the ID of the job that the API uses to map secrets to specific jobs.
|
|
JobID sources.JobID
|
|
// SecretID is the ID of the secret, if it exists.
|
|
// Only secrets that are being reverified will have a SecretID.
|
|
SecretID int64
|
|
// SourceType is the type of Source.
|
|
SourceType sourcespb.SourceType
|
|
// SourceName is the name of the Source.
|
|
SourceName string
|
|
Result
|
|
// Data from the sources.Chunk which this result was emitted for
|
|
Data []byte
|
|
}
|
|
|
|
// CopyMetadata returns a detector result with included metadata from the source chunk.
|
|
func CopyMetadata(chunk *sources.Chunk, result Result) ResultWithMetadata {
|
|
return ResultWithMetadata{
|
|
SourceMetadata: chunk.SourceMetadata,
|
|
SourceID: chunk.SourceID,
|
|
JobID: chunk.JobID,
|
|
SecretID: chunk.SecretID,
|
|
SourceType: chunk.SourceType,
|
|
SourceName: chunk.SourceName,
|
|
Result: result,
|
|
Data: chunk.Data,
|
|
}
|
|
}
|
|
|
|
// CleanResults returns all verified secrets, and if there are no verified secrets,
|
|
// just one unverified secret if there are any.
|
|
func CleanResults(results []Result) []Result {
|
|
if len(results) == 0 {
|
|
return results
|
|
}
|
|
|
|
var cleaned = make(map[string]Result, 0)
|
|
|
|
for _, s := range results {
|
|
if s.Verified {
|
|
cleaned[s.Redacted] = s
|
|
}
|
|
}
|
|
|
|
if len(cleaned) == 0 {
|
|
return results[:1]
|
|
}
|
|
|
|
results = results[:0]
|
|
for _, r := range cleaned {
|
|
results = append(results, r)
|
|
}
|
|
|
|
return results
|
|
}
|
|
|
|
// PrefixRegex ensures that at least one of the given keywords is within
|
|
// 40 characters of the capturing group that follows.
|
|
// This can help prevent false positives.
|
|
func PrefixRegex(keywords []string) string {
|
|
pre := `(?i:`
|
|
middle := strings.Join(keywords, "|")
|
|
post := `)(?:.|[\n\r]){0,40}?`
|
|
return pre + middle + post
|
|
}
|
|
|
|
// KeyIsRandom is a Low cost check to make sure that 'keys' include a number to reduce FPs.
|
|
// Golang doesn't support regex lookaheads, so must be done in separate calls.
|
|
// TODO improve checks. Shannon entropy did not work well.
|
|
func KeyIsRandom(key string) bool {
|
|
for _, ch := range key {
|
|
if unicode.IsDigit(ch) {
|
|
return true
|
|
}
|
|
}
|
|
|
|
return false
|
|
}
|
|
|
|
func MustGetBenchmarkData() map[string][]byte {
|
|
sizes := map[string]int{
|
|
"xsmall": 10, // 10 bytes
|
|
"small": 100, // 100 bytes
|
|
"medium": 1024, // 1KB
|
|
"large": 10 * 1024, // 10KB
|
|
"xlarge": 100 * 1024, // 100KB
|
|
"xxlarge": 1024 * 1024, // 1MB
|
|
}
|
|
data := make(map[string][]byte)
|
|
|
|
for key, size := range sizes {
|
|
// Generating a byte slice of a specific size with random data.
|
|
content := make([]byte, size)
|
|
for i := 0; i < size; i++ {
|
|
randomByte, err := rand.Int(rand.Reader, big.NewInt(256))
|
|
if err != nil {
|
|
panic(err)
|
|
}
|
|
content[i] = byte(randomByte.Int64())
|
|
}
|
|
data[key] = content
|
|
}
|
|
|
|
return data
|
|
}
|
|
|
|
func RedactURL(u url.URL) string {
|
|
u.User = url.UserPassword(u.User.Username(), "********")
|
|
return strings.TrimSpace(strings.Replace(u.String(), "%2A", "*", -1))
|
|
}
|