2022-12-02 17:26:22 +00:00
|
|
|
package custom_detectors
|
|
|
|
|
|
|
|
import (
|
2022-12-14 16:26:53 +00:00
|
|
|
"bytes"
|
|
|
|
"context"
|
|
|
|
"encoding/json"
|
|
|
|
"net/http"
|
2022-12-02 17:26:22 +00:00
|
|
|
"regexp"
|
|
|
|
"strings"
|
|
|
|
|
2024-06-14 15:26:05 +00:00
|
|
|
"golang.org/x/sync/errgroup"
|
|
|
|
|
2022-12-14 16:26:53 +00:00
|
|
|
"github.com/trufflesecurity/trufflehog/v3/pkg/common"
|
|
|
|
"github.com/trufflesecurity/trufflehog/v3/pkg/detectors"
|
2022-12-02 17:26:22 +00:00
|
|
|
"github.com/trufflesecurity/trufflehog/v3/pkg/pb/custom_detectorspb"
|
2022-12-14 16:26:53 +00:00
|
|
|
"github.com/trufflesecurity/trufflehog/v3/pkg/pb/detectorspb"
|
2022-12-02 17:26:22 +00:00
|
|
|
)
|
|
|
|
|
2022-12-14 16:26:53 +00:00
|
|
|
// The maximum number of matches from one chunk. This const is used when
|
|
|
|
// permutating each regex match to protect the scanner from doing too much work
|
|
|
|
// for poorly defined regexps.
|
|
|
|
const maxTotalMatches = 100
|
2022-12-02 17:26:22 +00:00
|
|
|
|
2023-10-30 22:17:17 +00:00
|
|
|
// CustomRegexWebhook is a CustomRegex with webhook validation that is
|
2022-12-14 16:26:53 +00:00
|
|
|
// guaranteed to be valid (assuming the data is not changed after
|
|
|
|
// initialization).
|
2023-10-30 22:17:17 +00:00
|
|
|
type CustomRegexWebhook struct {
|
2022-12-14 16:26:53 +00:00
|
|
|
*custom_detectorspb.CustomRegex
|
2022-12-02 17:26:22 +00:00
|
|
|
}
|
|
|
|
|
2022-12-14 16:26:53 +00:00
|
|
|
// Ensure the Scanner satisfies the interface at compile time.
|
2023-10-30 22:17:17 +00:00
|
|
|
var _ detectors.Detector = (*CustomRegexWebhook)(nil)
|
2024-04-30 20:10:26 +00:00
|
|
|
var _ detectors.CustomFalsePositiveChecker = (*CustomRegexWebhook)(nil)
|
2022-12-02 17:26:22 +00:00
|
|
|
|
2023-10-30 22:17:17 +00:00
|
|
|
// NewWebhookCustomRegex initializes and validates a CustomRegexWebhook. An
|
2022-12-14 16:26:53 +00:00
|
|
|
// unexported type is intentionally returned here to ensure the values have
|
|
|
|
// been validated.
|
2023-10-30 22:17:17 +00:00
|
|
|
func NewWebhookCustomRegex(pb *custom_detectorspb.CustomRegex) (*CustomRegexWebhook, error) {
|
2022-12-14 16:26:53 +00:00
|
|
|
// TODO: Return all validation errors.
|
|
|
|
if err := ValidateKeywords(pb.Keywords); err != nil {
|
|
|
|
return nil, err
|
2022-12-02 17:26:22 +00:00
|
|
|
}
|
2022-12-14 16:26:53 +00:00
|
|
|
if err := ValidateRegex(pb.Regex); err != nil {
|
|
|
|
return nil, err
|
2022-12-02 17:26:22 +00:00
|
|
|
}
|
|
|
|
|
2022-12-14 16:26:53 +00:00
|
|
|
for _, verify := range pb.Verify {
|
|
|
|
if err := ValidateVerifyEndpoint(verify.Endpoint, verify.Unsafe); err != nil {
|
|
|
|
return nil, err
|
|
|
|
}
|
|
|
|
if err := ValidateVerifyHeaders(verify.Headers); err != nil {
|
|
|
|
return nil, err
|
2022-12-02 17:26:22 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2022-12-14 16:26:53 +00:00
|
|
|
// TODO: Copy only necessary data out of pb.
|
2023-10-30 22:17:17 +00:00
|
|
|
return &CustomRegexWebhook{pb}, nil
|
2022-12-14 16:26:53 +00:00
|
|
|
}
|
2022-12-02 17:26:22 +00:00
|
|
|
|
2022-12-14 16:26:53 +00:00
|
|
|
var httpClient = common.SaneHttpClient()
|
2022-12-02 17:26:22 +00:00
|
|
|
|
2023-10-30 22:17:17 +00:00
|
|
|
func (c *CustomRegexWebhook) FromData(ctx context.Context, verify bool, data []byte) (results []detectors.Result, err error) {
|
2022-12-14 16:26:53 +00:00
|
|
|
dataStr := string(data)
|
|
|
|
regexMatches := make(map[string][][]string, len(c.GetRegex()))
|
2022-12-02 17:26:22 +00:00
|
|
|
|
2022-12-14 16:26:53 +00:00
|
|
|
// Find all submatches for each regex.
|
|
|
|
for name, regex := range c.GetRegex() {
|
|
|
|
regex, err := regexp.Compile(regex)
|
|
|
|
if err != nil {
|
2023-01-09 17:45:30 +00:00
|
|
|
// This will only happen if the regex is invalid.
|
|
|
|
return nil, err
|
2022-12-02 17:26:22 +00:00
|
|
|
}
|
2022-12-14 16:26:53 +00:00
|
|
|
regexMatches[name] = regex.FindAllStringSubmatch(dataStr, -1)
|
|
|
|
}
|
2022-12-02 17:26:22 +00:00
|
|
|
|
2022-12-14 16:26:53 +00:00
|
|
|
// Permutate each individual match.
|
|
|
|
// {
|
|
|
|
// "foo": [["match1"]]
|
|
|
|
// "bar": [["match2"], ["match3"]]
|
|
|
|
// }
|
|
|
|
// becomes
|
|
|
|
// [
|
|
|
|
// {"foo": ["match1"], "bar": ["match2"]},
|
|
|
|
// {"foo": ["match1"], "bar": ["match3"]},
|
|
|
|
// ]
|
|
|
|
matches := permutateMatches(regexMatches)
|
|
|
|
|
2023-02-28 16:12:24 +00:00
|
|
|
g := new(errgroup.Group)
|
|
|
|
|
2022-12-14 16:26:53 +00:00
|
|
|
// Create result object and test for verification.
|
2023-02-28 16:12:24 +00:00
|
|
|
resultsCh := make(chan detectors.Result, maxTotalMatches)
|
2022-12-14 16:26:53 +00:00
|
|
|
for _, match := range matches {
|
2023-02-28 16:12:24 +00:00
|
|
|
match := match
|
|
|
|
g.Go(func() error {
|
|
|
|
return c.createResults(ctx, match, verify, resultsCh)
|
|
|
|
})
|
|
|
|
}
|
|
|
|
|
2023-03-15 22:26:03 +00:00
|
|
|
// Ignore any errors and collect as many of the results as we can.
|
|
|
|
_ = g.Wait()
|
2023-02-28 16:12:24 +00:00
|
|
|
close(resultsCh)
|
|
|
|
|
|
|
|
for result := range resultsCh {
|
2023-06-20 18:55:31 +00:00
|
|
|
// NOTE: I don't believe this is being set anywhere else, hence the map assignment.
|
|
|
|
result.ExtraData = map[string]string{
|
|
|
|
"name": c.GetName(),
|
|
|
|
}
|
2023-02-28 16:12:24 +00:00
|
|
|
results = append(results, result)
|
|
|
|
}
|
|
|
|
|
|
|
|
return results, nil
|
|
|
|
}
|
|
|
|
|
2024-06-14 15:26:05 +00:00
|
|
|
func (c *CustomRegexWebhook) IsFalsePositive(_ detectors.Result) (bool, string) {
|
|
|
|
return false, ""
|
2024-04-30 20:10:26 +00:00
|
|
|
}
|
|
|
|
|
2023-10-30 22:17:17 +00:00
|
|
|
func (c *CustomRegexWebhook) createResults(ctx context.Context, match map[string][]string, verify bool, results chan<- detectors.Result) error {
|
2023-02-28 16:12:24 +00:00
|
|
|
if common.IsDone(ctx) {
|
|
|
|
// TODO: Log we're possibly leaving out results.
|
|
|
|
return ctx.Err()
|
|
|
|
}
|
|
|
|
var raw string
|
|
|
|
for _, values := range match {
|
|
|
|
// values[0] contains the entire regex match.
|
|
|
|
raw += values[0]
|
|
|
|
}
|
|
|
|
result := detectors.Result{
|
|
|
|
DetectorType: detectorspb.DetectorType_CustomRegex,
|
2023-03-30 16:40:05 +00:00
|
|
|
DetectorName: c.GetName(),
|
2023-02-28 16:12:24 +00:00
|
|
|
Raw: []byte(raw),
|
|
|
|
}
|
|
|
|
|
|
|
|
if !verify {
|
|
|
|
select {
|
|
|
|
case <-ctx.Done():
|
|
|
|
return ctx.Err()
|
|
|
|
case results <- result:
|
|
|
|
return nil
|
|
|
|
}
|
|
|
|
}
|
|
|
|
// Verify via webhook.
|
|
|
|
jsonBody, err := json.Marshal(map[string]map[string][]string{
|
|
|
|
c.GetName(): match,
|
|
|
|
})
|
|
|
|
if err != nil {
|
|
|
|
// This should never happen, but if it does, return nil to not
|
|
|
|
// disrupt other verification.
|
|
|
|
return nil
|
|
|
|
}
|
|
|
|
// Try each config until we successfully verify.
|
|
|
|
for _, verifyConfig := range c.GetVerify() {
|
2022-12-14 16:26:53 +00:00
|
|
|
if common.IsDone(ctx) {
|
|
|
|
// TODO: Log we're possibly leaving out results.
|
2023-02-28 16:12:24 +00:00
|
|
|
return ctx.Err()
|
2022-12-02 17:26:22 +00:00
|
|
|
}
|
2023-02-28 16:12:24 +00:00
|
|
|
req, err := http.NewRequestWithContext(ctx, "POST", verifyConfig.GetEndpoint(), bytes.NewReader(jsonBody))
|
2022-12-14 16:26:53 +00:00
|
|
|
if err != nil {
|
|
|
|
continue
|
|
|
|
}
|
2023-02-28 16:12:24 +00:00
|
|
|
for _, header := range verifyConfig.GetHeaders() {
|
|
|
|
key, value, found := strings.Cut(header, ":")
|
|
|
|
if !found {
|
|
|
|
// Should be unreachable due to validation.
|
2022-12-14 16:26:53 +00:00
|
|
|
continue
|
|
|
|
}
|
2023-02-28 16:12:24 +00:00
|
|
|
req.Header.Add(key, strings.TrimLeft(value, "\t\n\v\f\r "))
|
|
|
|
}
|
|
|
|
res, err := httpClient.Do(req)
|
|
|
|
if err != nil {
|
|
|
|
continue
|
|
|
|
}
|
|
|
|
// TODO: Read response body.
|
|
|
|
res.Body.Close()
|
|
|
|
if res.StatusCode == http.StatusOK {
|
|
|
|
result.Verified = true
|
|
|
|
break
|
2022-12-02 17:26:22 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2023-02-28 16:12:24 +00:00
|
|
|
select {
|
|
|
|
case <-ctx.Done():
|
|
|
|
return ctx.Err()
|
|
|
|
case results <- result:
|
|
|
|
return nil
|
|
|
|
}
|
2022-12-02 17:26:22 +00:00
|
|
|
}
|
|
|
|
|
2023-10-30 22:17:17 +00:00
|
|
|
func (c *CustomRegexWebhook) Keywords() []string {
|
2022-12-14 16:26:53 +00:00
|
|
|
return c.GetKeywords()
|
|
|
|
}
|
2022-12-02 17:26:22 +00:00
|
|
|
|
2022-12-14 16:26:53 +00:00
|
|
|
// productIndices produces a permutation of indices for each length. Example:
|
|
|
|
// productIndices(3, 2) -> [[0 0] [1 0] [2 0] [0 1] [1 1] [2 1]]. It returns
|
|
|
|
// a slice of length no larger than maxTotalMatches.
|
|
|
|
func productIndices(lengths ...int) [][]int {
|
|
|
|
count := 1
|
|
|
|
for _, l := range lengths {
|
|
|
|
count *= l
|
|
|
|
}
|
|
|
|
if count == 0 {
|
|
|
|
return nil
|
|
|
|
}
|
|
|
|
if count > maxTotalMatches {
|
|
|
|
count = maxTotalMatches
|
2022-12-02 17:26:22 +00:00
|
|
|
}
|
|
|
|
|
2022-12-14 16:26:53 +00:00
|
|
|
results := make([][]int, count)
|
|
|
|
for i := 0; i < count; i++ {
|
|
|
|
j := 1
|
|
|
|
result := make([]int, 0, len(lengths))
|
|
|
|
for _, l := range lengths {
|
|
|
|
result = append(result, (i/j)%l)
|
|
|
|
j *= l
|
2022-12-02 17:26:22 +00:00
|
|
|
}
|
2022-12-14 16:26:53 +00:00
|
|
|
results[i] = result
|
|
|
|
}
|
|
|
|
return results
|
|
|
|
}
|
2022-12-02 17:26:22 +00:00
|
|
|
|
2022-12-14 16:26:53 +00:00
|
|
|
// permutateMatches converts the list of all regex matches into all possible
|
|
|
|
// permutations selecting one from each named entry in the map. For example:
|
|
|
|
// {"foo": [matchA, matchB], "bar": [matchC]} becomes
|
2023-01-09 17:45:30 +00:00
|
|
|
//
|
|
|
|
// [{"foo": matchA, "bar": matchC}, {"foo": matchB, "bar": matchC}]
|
2022-12-14 16:26:53 +00:00
|
|
|
func permutateMatches(regexMatches map[string][][]string) []map[string][]string {
|
|
|
|
// Get a consistent order for names and their matching lengths.
|
|
|
|
// The lengths are used in calculating the permutation so order matters.
|
|
|
|
names := make([]string, 0, len(regexMatches))
|
|
|
|
lengths := make([]int, 0, len(regexMatches))
|
|
|
|
for key, value := range regexMatches {
|
|
|
|
names = append(names, key)
|
|
|
|
lengths = append(lengths, len(value))
|
|
|
|
}
|
2022-12-02 17:26:22 +00:00
|
|
|
|
2022-12-14 16:26:53 +00:00
|
|
|
// Permutate all the indices for each match. For example, if "foo" has
|
|
|
|
// [matchA, matchB] and "bar" has [matchC], we will get indices [0 0] [1 0].
|
|
|
|
permutationIndices := productIndices(lengths...)
|
|
|
|
|
|
|
|
// Build {"foo": matchA, "bar": matchC} and {"foo": matchB, "bar": matchC}
|
|
|
|
// from the indices.
|
|
|
|
var matches []map[string][]string
|
|
|
|
for _, permutation := range permutationIndices {
|
|
|
|
candidate := make(map[string][]string, len(permutationIndices))
|
|
|
|
for i, name := range names {
|
|
|
|
candidate[name] = regexMatches[name][permutation[i]]
|
2022-12-02 17:26:22 +00:00
|
|
|
}
|
2022-12-14 16:26:53 +00:00
|
|
|
matches = append(matches, candidate)
|
|
|
|
}
|
2022-12-02 17:26:22 +00:00
|
|
|
|
2022-12-14 16:26:53 +00:00
|
|
|
return matches
|
|
|
|
}
|
2023-02-09 22:46:03 +00:00
|
|
|
|
2023-10-30 22:17:17 +00:00
|
|
|
func (c *CustomRegexWebhook) Type() detectorspb.DetectorType {
|
2023-02-09 22:46:03 +00:00
|
|
|
return detectorspb.DetectorType_CustomRegex
|
|
|
|
}
|