Support filtering detectors by version (#1150)

* Adjust types to use DetectorID struct

* Parse versions with detector include and exclude input

* Update detectors filter to use version

Co-authored-by: steeeve <steve@trufflesec.com>

* Implement Versioner for github, gitlab, and npm detectors

Co-authored-by: steeeve <steve@trufflesec.com>

---------

Co-authored-by: steeeve <steve@trufflesec.com>
This commit is contained in:
Miccah 2023-03-02 16:33:56 -06:00 committed by GitHub
parent 4500ac3b10
commit e6846ede54
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
10 changed files with 153 additions and 57 deletions

50
main.go
View file

@ -216,30 +216,52 @@ func run(state overseer.State) {
}
// Build include and exclude detector filter sets.
var includeDetectorTypes, excludeDetectorTypes map[detectorspb.DetectorType]struct{}
var includeDetectorTypes, excludeDetectorTypes map[detectorspb.DetectorType]config.DetectorID
{
includeList, err := config.ParseDetectors(*includeDetectors)
if err != nil {
// Exit if there was an error to inform the user of the misconfiguration.
logger.Error(err, "invalid include list detector configuration")
os.Exit(1)
logFatal(err, "invalid include list detector configuration")
}
excludeList, err := config.ParseDetectors(*excludeDetectors)
if err != nil {
// Exit if there was an error to inform the user of the misconfiguration.
logger.Error(err, "invalid exclude list detector configuration")
os.Exit(1)
logFatal(err, "invalid exclude list detector configuration")
}
includeDetectorTypes = detectorTypeToSet(includeList)
excludeDetectorTypes = detectorTypeToSet(excludeList)
includeDetectorTypes = detectorTypeToMap(includeList)
excludeDetectorTypes = detectorTypeToMap(excludeList)
}
includeFilter := func(d detectors.Detector) bool {
_, ok := includeDetectorTypes[d.Type()]
return ok
id, ok := includeDetectorTypes[d.Type()]
if id.Version == 0 {
return ok
}
versionD, ok := d.(detectors.Versioner)
if !ok {
// Error: version provided but not a detectors.Versioner
logFatal(
fmt.Errorf("version provided but detector does not have a version"),
"invalid include list detector configuration",
"detector", id,
)
}
return versionD.Version() == id.Version
}
excludeFilter := func(d detectors.Detector) bool {
_, ok := excludeDetectorTypes[d.Type()]
return !ok
id, ok := excludeDetectorTypes[d.Type()]
if id.Version == 0 {
return !ok
}
versionD, ok := d.(detectors.Versioner)
if !ok {
// Error: version provided but not a detectors.Versioner
logFatal(
fmt.Errorf("version provided but detector does not have a version"),
"invalid exclude list detector configuration",
"detector", id,
)
}
return versionD.Version() != id.Version
}
e := engine.Start(ctx,
@ -431,10 +453,10 @@ func logFatalFunc(logger logr.Logger) func(error, string, ...any) {
}
}
func detectorTypeToSet(detectors []detectorspb.DetectorType) map[detectorspb.DetectorType]struct{} {
output := make(map[detectorspb.DetectorType]struct{}, len(detectors))
func detectorTypeToMap(detectors []config.DetectorID) map[detectorspb.DetectorType]config.DetectorID {
output := make(map[detectorspb.DetectorType]config.DetectorID, len(detectors))
for _, d := range detectors {
output[d] = struct{}{}
output[d.ID] = d
}
return output
}

View file

@ -2,15 +2,15 @@ package config
import (
"fmt"
"sort"
"strconv"
"strings"
dpb "github.com/trufflesecurity/trufflehog/v3/pkg/pb/detectorspb"
"golang.org/x/exp/slices"
)
var (
specialGroups = map[string][]dpb.DetectorType{
specialGroups = map[string][]DetectorID{
"all": allDetectors(),
}
@ -31,14 +31,19 @@ func init() {
}
}
type DetectorID struct {
ID dpb.DetectorType
Version int
}
// ParseDetectors parses user supplied string into a list of detectors types.
// "all" will return the list of all available detectors. The input is comma
// separated and may use the case-insensitive detector name defined in the
// protobuf, or the protobuf enum number. A range may be used as well in the
// form "start-end". Order is preserved and duplicates are ignored.
func ParseDetectors(input string) ([]dpb.DetectorType, error) {
var output []dpb.DetectorType
seenDetector := map[dpb.DetectorType]struct{}{}
func ParseDetectors(input string) ([]DetectorID, error) {
var output []DetectorID
seenDetector := map[DetectorID]struct{}{}
for _, item := range strings.Split(input, ",") {
item = strings.TrimSpace(item)
if item == "" {
@ -63,24 +68,35 @@ func ParseDetectors(input string) ([]dpb.DetectorType, error) {
return output, nil
}
// allDetectors reutrns an ordered slice of all detector types.
func allDetectors() []dpb.DetectorType {
all := make([]dpb.DetectorType, 0, len(dpb.DetectorType_name))
for id := range dpb.DetectorType_name {
all = append(all, dpb.DetectorType(id))
func (id DetectorID) String() string {
name := dpb.DetectorType_name[int32(id.ID)]
if name == "" {
name = "<invalid ID>"
}
slices.Sort(all)
if id.Version == 0 {
return name
}
return fmt.Sprintf("%s.v%d", name, id.Version)
}
// allDetectors reutrns an ordered slice of all detector types.
func allDetectors() []DetectorID {
all := make([]DetectorID, 0, len(dpb.DetectorType_name))
for id := range dpb.DetectorType_name {
all = append(all, DetectorID{ID: dpb.DetectorType(id)})
}
sort.Slice(all, func(i, j int) bool { return all[i].ID < all[j].ID })
return all
}
// asRange converts a single input into a slice of detector types. If the input
// is not in range format, a slice of length 1 is returned. Unbounded ranges
// are allowed.
func asRange(input string) ([]dpb.DetectorType, error) {
func asRange(input string) ([]DetectorID, error) {
// Check if it's a single detector type.
dt, err := asDetectorType(input)
dt, err := asDetectorID(input)
if err == nil {
return []dpb.DetectorType{dt}, nil
return []DetectorID{dt}, nil
}
// Check if it's a range; if not return the error from above.
@ -91,50 +107,77 @@ func asRange(input string) ([]dpb.DetectorType, error) {
start, end = strings.TrimSpace(start), strings.TrimSpace(end)
// Convert the range start and end to a DetectorType.
dtStart, err := asDetectorType(start)
dtStart, err := asDetectorID(start)
if err != nil {
return nil, err
}
dtEnd, err := asDetectorType(end)
dtEnd, err := asDetectorID(end)
// If end is empty it's an unbounded range.
if err != nil && end != "" {
return nil, err
}
if end == "" {
dtEnd = maxDetectorType
dtEnd.ID = maxDetectorType
}
// Ensure these ranges don't have versions.
if dtEnd.Version != 0 || dtStart.Version != 0 {
return nil, fmt.Errorf("versions within ranges are not supported: %s", input)
}
step := dpb.DetectorType(1)
if dtStart > dtEnd {
if dtStart.ID > dtEnd.ID {
step = -1
}
var output []dpb.DetectorType
for dt := dtStart; dt != dtEnd; dt += step {
var output []DetectorID
for dt := dtStart.ID; dt != dtEnd.ID; dt += step {
if _, ok := validDetectors[dt]; !ok {
continue
}
output = append(output, dt)
output = append(output, DetectorID{ID: dt})
}
return append(output, dtEnd), nil
}
// asDetectorType converts the case-insensitive input into a detector type.
// asDetectorID converts the case-insensitive input into a DetectorID.
// Name or ID may be used.
func asDetectorType(input string) (dpb.DetectorType, error) {
func asDetectorID(input string) (DetectorID, error) {
if input == "" {
return 0, fmt.Errorf("empty detector")
return DetectorID{}, fmt.Errorf("empty detector")
}
var detectorID DetectorID
// Separate the version if there is one.
if detector, version, hasVersion := strings.Cut(input, "."); hasVersion {
parsedVersion, err := parseVersion(version)
if err != nil {
return DetectorID{}, fmt.Errorf("invalid version for input: %q error: %w", input, err)
}
detectorID.Version = parsedVersion
// Because there was a version, the detector type input is the part before the '.'
input = detector
}
// Check if it's a named detector.
if dt, ok := detectorTypeValue[strings.ToLower(input)]; ok {
return dt, nil
detectorID.ID = dt
return detectorID, nil
}
// Check if it's a detector ID.
if i, err := strconv.ParseInt(input, 10, 32); err == nil {
dt := dpb.DetectorType(i)
if _, ok := validDetectors[dt]; !ok {
return 0, fmt.Errorf("invalid detector ID: %s", input)
return DetectorID{}, fmt.Errorf("invalid detector ID: %s", input)
}
return dt, nil
detectorID.ID = dt
return detectorID, nil
}
return 0, fmt.Errorf("unrecognized detector type: %s", input)
return DetectorID{}, fmt.Errorf("unrecognized detector type: %s", input)
}
func parseVersion(v string) (int, error) {
if !strings.HasPrefix(strings.ToLower(v), "v") {
return 0, fmt.Errorf("version must start with 'v'")
}
version := strings.TrimLeft(v, "vV")
return strconv.Atoi(version)
}

View file

@ -10,21 +10,28 @@ import (
func TestDetectorParsing(t *testing.T) {
tests := map[string]struct {
input string
expected []dpb.DetectorType
expected []DetectorID
}{
"all": {"AlL", allDetectors()},
"trailing range": {"0-", allDetectors()},
"all after 1": {"1-", allDetectors()[1:]},
"named and valid range": {"aWs,8-9", []dpb.DetectorType{dpb.DetectorType_AWS, dpb.DetectorType_Github, dpb.DetectorType_Gitlab}},
"duplicate order preserved": {"9, 8, 9", []dpb.DetectorType{9, 8}},
"named range": {"github - gitlab", []dpb.DetectorType{dpb.DetectorType_Github, dpb.DetectorType_Gitlab}},
"range preserved": {"8-9, 7-10", []dpb.DetectorType{8, 9, 7, 10}},
"reverse range": {"9-8", []dpb.DetectorType{9, 8}},
"named and valid range": {"aWs,8-9", []DetectorID{{ID: dpb.DetectorType_AWS}, {ID: dpb.DetectorType_Github}, {ID: dpb.DetectorType_Gitlab}}},
"duplicate order preserved": {"9, 8, 9", []DetectorID{{ID: 9}, {ID: 8}}},
"named range": {"github - gitlab", []DetectorID{{ID: dpb.DetectorType_Github}, {ID: dpb.DetectorType_Gitlab}}},
"range preserved": {"8-9, 7-10", []DetectorID{{ID: 8}, {ID: 9}, {ID: 7}, {ID: 10}}},
"reverse range": {"9-8", []DetectorID{{ID: 9}, {ID: 8}}},
"range preserved with all": {"10-,all", append(allDetectors()[10:], allDetectors()[:10]...)},
"empty list item": {"8, ,9", []dpb.DetectorType{8, 9}},
"empty list item": {"8, ,9", []DetectorID{{ID: 8}, {ID: 9}}},
"invalid end range": {"0-1337", nil},
"invalid name": {"foo", nil},
"negative": {"-1", nil},
"github.v1": {"github.v1", []DetectorID{{ID: dpb.DetectorType_Github, Version: 1}}},
"gitlab.v100": {"gitlab.v100", []DetectorID{{ID: dpb.DetectorType_Gitlab, Version: 100}}},
"range with versions": {"github.v2 - gitlab.v1", nil},
"invalid version no v": {"gitlab.2", nil},
"invalid version no number": {"gitlab.github", nil},
"capital V is fine": {"GiTlAb.V2", []DetectorID{{ID: dpb.DetectorType_Gitlab, Version: 2}}},
"id number with version": {"8.v2", []DetectorID{{ID: 8, Version: 2}}},
}
for name, tt := range tests {

View file

@ -25,6 +25,12 @@ type Detector interface {
Type() detectorspb.DetectorType
}
// Versioner is an optional interface that a detector can implement to
// differentiate instances of the same detector type.
type Versioner interface {
Version() int
}
type Result struct {
// DetectorType is the type of Detector.
DetectorType detectorspb.DetectorType

View file

@ -15,8 +15,11 @@ import (
type Scanner struct{}
// Ensure the Scanner satisfies the interface at compile time.
// Ensure the Scanner satisfies the interfaces at compile time.
var _ detectors.Detector = (*Scanner)(nil)
var _ detectors.Versioner = (*Scanner)(nil)
func (s Scanner) Version() int { return 2 }
var (
// Oauth token

View file

@ -14,8 +14,11 @@ import (
type Scanner struct{}
// Ensure the Scanner satisfies the interface at compile time.
// Ensure the Scanner satisfies the interfaces at compile time.
var _ detectors.Detector = (*Scanner)(nil)
var _ detectors.Versioner = (*Scanner)(nil)
func (s Scanner) Version() int { return 1 }
var (
// Oauth token
@ -38,7 +41,7 @@ type userRes struct {
// Keywords are used for efficiently pre-filtering chunks.
// Use identifiers in the secret preferably, or the provider name.
func (s Scanner) Keywords() []string {
return []string{"github","gh","pat"}
return []string{"github", "gh", "pat"}
}
// FromData will find and optionally verify GitHub secrets in a given set of bytes.

View file

@ -14,8 +14,11 @@ import (
type Scanner struct{}
// Ensure the Scanner satisfies the interface at compile time.
// Ensure the Scanner satisfies the interfaces at compile time.
var _ detectors.Detector = (*Scanner)(nil)
var _ detectors.Versioner = (*Scanner)(nil)
func (s Scanner) Version() int { return 1 }
var (
keyPat = regexp.MustCompile(detectors.PrefixRegex([]string{"gitlab"}) + `\b((?:glpat|)[a-zA-Z0-9\-=_]{20,22})\b`)

View file

@ -13,8 +13,11 @@ import (
type Scanner struct{}
// Ensure the Scanner satisfies the interface at compile time.
// Ensure the Scanner satisfies the interfaces at compile time.
var _ detectors.Detector = (*Scanner)(nil)
var _ detectors.Versioner = (*Scanner)(nil)
func (*Scanner) Version() int { return 2 }
var (
keyPat = regexp.MustCompile(`\b(glpat-[a-zA-Z0-9\-=_]{20,22})\b`)

View file

@ -14,8 +14,11 @@ import (
type Scanner struct{}
// Ensure the Scanner satisfies the interface at compile time.
// Ensure the Scanner satisfies the interfaces at compile time.
var _ detectors.Detector = (*Scanner)(nil)
var _ detectors.Versioner = (*Scanner)(nil)
func (s Scanner) Version() int { return 1 }
var (
client = common.SaneHttpClient()
@ -34,7 +37,7 @@ func (s Scanner) Keywords() []string {
func (s Scanner) FromData(ctx context.Context, verify bool, data []byte) (results []detectors.Result, err error) {
dataStr := string(data)
matches := keyPat.FindAllStringSubmatch(dataStr, -1)
matches := keyPat.FindAllStringSubmatch(dataStr, -1)
for _, match := range matches {
if len(match) != 2 {
continue

View file

@ -13,8 +13,11 @@ import (
type Scanner struct{}
// Ensure the Scanner satisfies the interface at compile time.
// Ensure the Scanner satisfies the interfaces at compile time.
var _ detectors.Detector = (*Scanner)(nil)
var _ detectors.Versioner = (*Scanner)(nil)
func (s Scanner) Version() int { return 2 }
var (
client = common.SaneHttpClient()