2022-12-02 17:26:22 +00:00
|
|
|
package custom_detectors
|
|
|
|
|
|
|
|
import (
|
2022-12-14 16:26:53 +00:00
|
|
|
"bytes"
|
|
|
|
"context"
|
|
|
|
"encoding/json"
|
|
|
|
"net/http"
|
2022-12-02 17:26:22 +00:00
|
|
|
"regexp"
|
|
|
|
"strings"
|
|
|
|
|
2022-12-14 16:26:53 +00:00
|
|
|
"github.com/trufflesecurity/trufflehog/v3/pkg/common"
|
|
|
|
"github.com/trufflesecurity/trufflehog/v3/pkg/detectors"
|
2022-12-02 17:26:22 +00:00
|
|
|
"github.com/trufflesecurity/trufflehog/v3/pkg/pb/custom_detectorspb"
|
2022-12-14 16:26:53 +00:00
|
|
|
"github.com/trufflesecurity/trufflehog/v3/pkg/pb/detectorspb"
|
2022-12-02 17:26:22 +00:00
|
|
|
)
|
|
|
|
|
2022-12-14 16:26:53 +00:00
|
|
|
// The maximum number of matches from one chunk. This const is used when
|
|
|
|
// permutating each regex match to protect the scanner from doing too much work
|
|
|
|
// for poorly defined regexps.
|
|
|
|
const maxTotalMatches = 100
|
2022-12-02 17:26:22 +00:00
|
|
|
|
2022-12-14 16:26:53 +00:00
|
|
|
// customRegexWebhook is a CustomRegex with webhook validation that is
|
|
|
|
// guaranteed to be valid (assuming the data is not changed after
|
|
|
|
// initialization).
|
|
|
|
type customRegexWebhook struct {
|
|
|
|
*custom_detectorspb.CustomRegex
|
2022-12-02 17:26:22 +00:00
|
|
|
}
|
|
|
|
|
2022-12-14 16:26:53 +00:00
|
|
|
// Ensure the Scanner satisfies the interface at compile time.
|
|
|
|
var _ detectors.Detector = (*customRegexWebhook)(nil)
|
2022-12-02 17:26:22 +00:00
|
|
|
|
2022-12-14 16:26:53 +00:00
|
|
|
// NewWebhookCustomRegex initializes and validates a customRegexWebhook. An
|
|
|
|
// unexported type is intentionally returned here to ensure the values have
|
|
|
|
// been validated.
|
|
|
|
func NewWebhookCustomRegex(pb *custom_detectorspb.CustomRegex) (*customRegexWebhook, error) {
|
|
|
|
// TODO: Return all validation errors.
|
|
|
|
if err := ValidateKeywords(pb.Keywords); err != nil {
|
|
|
|
return nil, err
|
2022-12-02 17:26:22 +00:00
|
|
|
}
|
2022-12-14 16:26:53 +00:00
|
|
|
if err := ValidateRegex(pb.Regex); err != nil {
|
|
|
|
return nil, err
|
2022-12-02 17:26:22 +00:00
|
|
|
}
|
|
|
|
|
2022-12-14 16:26:53 +00:00
|
|
|
for _, verify := range pb.Verify {
|
|
|
|
if err := ValidateVerifyEndpoint(verify.Endpoint, verify.Unsafe); err != nil {
|
|
|
|
return nil, err
|
|
|
|
}
|
|
|
|
if err := ValidateVerifyHeaders(verify.Headers); err != nil {
|
|
|
|
return nil, err
|
2022-12-02 17:26:22 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2022-12-14 16:26:53 +00:00
|
|
|
// TODO: Copy only necessary data out of pb.
|
|
|
|
return &customRegexWebhook{pb}, nil
|
|
|
|
}
|
2022-12-02 17:26:22 +00:00
|
|
|
|
2022-12-14 16:26:53 +00:00
|
|
|
var httpClient = common.SaneHttpClient()
|
2022-12-02 17:26:22 +00:00
|
|
|
|
2022-12-14 16:26:53 +00:00
|
|
|
func (c *customRegexWebhook) FromData(ctx context.Context, verify bool, data []byte) (results []detectors.Result, err error) {
|
|
|
|
dataStr := string(data)
|
|
|
|
regexMatches := make(map[string][][]string, len(c.GetRegex()))
|
2022-12-02 17:26:22 +00:00
|
|
|
|
2022-12-14 16:26:53 +00:00
|
|
|
// Find all submatches for each regex.
|
|
|
|
for name, regex := range c.GetRegex() {
|
|
|
|
regex, err := regexp.Compile(regex)
|
|
|
|
if err != nil {
|
2023-01-09 17:45:30 +00:00
|
|
|
// This will only happen if the regex is invalid.
|
|
|
|
return nil, err
|
2022-12-02 17:26:22 +00:00
|
|
|
}
|
2022-12-14 16:26:53 +00:00
|
|
|
regexMatches[name] = regex.FindAllStringSubmatch(dataStr, -1)
|
|
|
|
}
|
2022-12-02 17:26:22 +00:00
|
|
|
|
2022-12-14 16:26:53 +00:00
|
|
|
// Permutate each individual match.
|
|
|
|
// {
|
|
|
|
// "foo": [["match1"]]
|
|
|
|
// "bar": [["match2"], ["match3"]]
|
|
|
|
// }
|
|
|
|
// becomes
|
|
|
|
// [
|
|
|
|
// {"foo": ["match1"], "bar": ["match2"]},
|
|
|
|
// {"foo": ["match1"], "bar": ["match3"]},
|
|
|
|
// ]
|
|
|
|
matches := permutateMatches(regexMatches)
|
|
|
|
|
|
|
|
// Create result object and test for verification.
|
|
|
|
for _, match := range matches {
|
|
|
|
if common.IsDone(ctx) {
|
|
|
|
// TODO: Log we're possibly leaving out results.
|
|
|
|
return results, nil
|
2022-12-02 17:26:22 +00:00
|
|
|
}
|
2022-12-14 16:26:53 +00:00
|
|
|
var raw string
|
|
|
|
for _, values := range match {
|
|
|
|
// values[0] contains the entire regex match.
|
|
|
|
raw += values[0]
|
2022-12-02 17:26:22 +00:00
|
|
|
}
|
2022-12-14 16:26:53 +00:00
|
|
|
result := detectors.Result{
|
|
|
|
DetectorType: detectorspb.DetectorType_CustomRegex,
|
|
|
|
Raw: []byte(raw),
|
2022-12-02 17:26:22 +00:00
|
|
|
}
|
|
|
|
|
2022-12-14 16:26:53 +00:00
|
|
|
if isKnownFalsePositive(match) {
|
|
|
|
continue
|
2022-12-02 17:26:22 +00:00
|
|
|
}
|
2022-12-14 16:26:53 +00:00
|
|
|
if !verify {
|
|
|
|
results = append(results, result)
|
|
|
|
continue
|
2022-12-02 17:26:22 +00:00
|
|
|
}
|
2022-12-14 16:26:53 +00:00
|
|
|
// Verify via webhook.
|
|
|
|
jsonBody, err := json.Marshal(map[string]map[string][]string{
|
|
|
|
c.GetName(): match,
|
|
|
|
})
|
|
|
|
if err != nil {
|
|
|
|
continue
|
|
|
|
}
|
|
|
|
// Try each config until we successfully verify.
|
|
|
|
for _, verifyConfig := range c.GetVerify() {
|
|
|
|
if common.IsDone(ctx) {
|
|
|
|
// TODO: Log we're possibly leaving out results.
|
|
|
|
return results, nil
|
|
|
|
}
|
|
|
|
req, err := http.NewRequestWithContext(ctx, "POST", verifyConfig.GetEndpoint(), bytes.NewReader(jsonBody))
|
|
|
|
if err != nil {
|
|
|
|
continue
|
|
|
|
}
|
|
|
|
for _, header := range verifyConfig.GetHeaders() {
|
|
|
|
key, value, found := strings.Cut(header, ":")
|
|
|
|
if !found {
|
|
|
|
// Should be unreachable due to validation.
|
|
|
|
continue
|
|
|
|
}
|
|
|
|
req.Header.Add(key, strings.TrimLeft(value, "\t\n\v\f\r "))
|
|
|
|
}
|
|
|
|
res, err := httpClient.Do(req)
|
|
|
|
if err != nil {
|
|
|
|
continue
|
|
|
|
}
|
|
|
|
// TODO: Read response body.
|
|
|
|
res.Body.Close()
|
|
|
|
if res.StatusCode == http.StatusOK {
|
|
|
|
result.Verified = true
|
|
|
|
break
|
2022-12-02 17:26:22 +00:00
|
|
|
}
|
|
|
|
}
|
2022-12-14 16:26:53 +00:00
|
|
|
results = append(results, result)
|
2022-12-02 17:26:22 +00:00
|
|
|
}
|
|
|
|
|
2022-12-14 16:26:53 +00:00
|
|
|
return results, nil
|
2022-12-02 17:26:22 +00:00
|
|
|
}
|
|
|
|
|
2022-12-14 16:26:53 +00:00
|
|
|
func (c *customRegexWebhook) Keywords() []string {
|
|
|
|
return c.GetKeywords()
|
|
|
|
}
|
2022-12-02 17:26:22 +00:00
|
|
|
|
2022-12-14 16:26:53 +00:00
|
|
|
// productIndices produces a permutation of indices for each length. Example:
|
|
|
|
// productIndices(3, 2) -> [[0 0] [1 0] [2 0] [0 1] [1 1] [2 1]]. It returns
|
|
|
|
// a slice of length no larger than maxTotalMatches.
|
|
|
|
func productIndices(lengths ...int) [][]int {
|
|
|
|
count := 1
|
|
|
|
for _, l := range lengths {
|
|
|
|
count *= l
|
|
|
|
}
|
|
|
|
if count == 0 {
|
|
|
|
return nil
|
|
|
|
}
|
|
|
|
if count > maxTotalMatches {
|
|
|
|
count = maxTotalMatches
|
2022-12-02 17:26:22 +00:00
|
|
|
}
|
|
|
|
|
2022-12-14 16:26:53 +00:00
|
|
|
results := make([][]int, count)
|
|
|
|
for i := 0; i < count; i++ {
|
|
|
|
j := 1
|
|
|
|
result := make([]int, 0, len(lengths))
|
|
|
|
for _, l := range lengths {
|
|
|
|
result = append(result, (i/j)%l)
|
|
|
|
j *= l
|
2022-12-02 17:26:22 +00:00
|
|
|
}
|
2022-12-14 16:26:53 +00:00
|
|
|
results[i] = result
|
|
|
|
}
|
|
|
|
return results
|
|
|
|
}
|
2022-12-02 17:26:22 +00:00
|
|
|
|
2022-12-14 16:26:53 +00:00
|
|
|
// permutateMatches converts the list of all regex matches into all possible
|
|
|
|
// permutations selecting one from each named entry in the map. For example:
|
|
|
|
// {"foo": [matchA, matchB], "bar": [matchC]} becomes
|
2023-01-09 17:45:30 +00:00
|
|
|
//
|
|
|
|
// [{"foo": matchA, "bar": matchC}, {"foo": matchB, "bar": matchC}]
|
2022-12-14 16:26:53 +00:00
|
|
|
func permutateMatches(regexMatches map[string][][]string) []map[string][]string {
|
|
|
|
// Get a consistent order for names and their matching lengths.
|
|
|
|
// The lengths are used in calculating the permutation so order matters.
|
|
|
|
names := make([]string, 0, len(regexMatches))
|
|
|
|
lengths := make([]int, 0, len(regexMatches))
|
|
|
|
for key, value := range regexMatches {
|
|
|
|
names = append(names, key)
|
|
|
|
lengths = append(lengths, len(value))
|
|
|
|
}
|
2022-12-02 17:26:22 +00:00
|
|
|
|
2022-12-14 16:26:53 +00:00
|
|
|
// Permutate all the indices for each match. For example, if "foo" has
|
|
|
|
// [matchA, matchB] and "bar" has [matchC], we will get indices [0 0] [1 0].
|
|
|
|
permutationIndices := productIndices(lengths...)
|
|
|
|
|
|
|
|
// Build {"foo": matchA, "bar": matchC} and {"foo": matchB, "bar": matchC}
|
|
|
|
// from the indices.
|
|
|
|
var matches []map[string][]string
|
|
|
|
for _, permutation := range permutationIndices {
|
|
|
|
candidate := make(map[string][]string, len(permutationIndices))
|
|
|
|
for i, name := range names {
|
|
|
|
candidate[name] = regexMatches[name][permutation[i]]
|
2022-12-02 17:26:22 +00:00
|
|
|
}
|
2022-12-14 16:26:53 +00:00
|
|
|
matches = append(matches, candidate)
|
|
|
|
}
|
2022-12-02 17:26:22 +00:00
|
|
|
|
2022-12-14 16:26:53 +00:00
|
|
|
return matches
|
|
|
|
}
|
2022-12-02 17:26:22 +00:00
|
|
|
|
2022-12-14 16:26:53 +00:00
|
|
|
// This function will check false positives for common test words, but also it
|
|
|
|
// will make sure the key appears 'random' enough to be a real key.
|
|
|
|
func isKnownFalsePositive(match map[string][]string) bool {
|
|
|
|
for _, values := range match {
|
|
|
|
for _, value := range values {
|
|
|
|
if detectors.IsKnownFalsePositive(value, detectors.DefaultFalsePositives, true) {
|
|
|
|
return true
|
|
|
|
}
|
|
|
|
}
|
2022-12-02 17:26:22 +00:00
|
|
|
}
|
2022-12-14 16:26:53 +00:00
|
|
|
return false
|
2022-12-02 17:26:22 +00:00
|
|
|
}
|