From 2d3ddad0766cce2f7d5bef768243bd0e78538561 Mon Sep 17 00:00:00 2001 From: Dustin Decker Date: Tue, 12 Jul 2022 08:34:02 -0700 Subject: [PATCH] Pr/589 (#654) * added common regex patterns for detectors * For HexPattern * enhancements * used parseInt * enhancement * enhanced regex for email and subdomain * enhancement for email pattern * update pattern and detector Co-authored-by: Roxanne Tampus --- pkg/common/patterns.go | 41 ++++++++++++++++++++++ pkg/detectors/clicksendsms/clicksendsms.go | 11 +++--- pkg/detectors/cloudconvert/cloudconvert.go | 3 +- pkg/detectors/codemagic/codemagic.go | 2 +- pkg/detectors/databox/databox.go | 4 +-- pkg/detectors/onesignal/onesignal.go | 4 +-- pkg/detectors/speechtextai/speechtextai.go | 2 +- pkg/detectors/zulipchat/zulipchat.go | 13 +++---- pkg/detectors/zulipchat/zulipchat_test.go | 6 ++-- 9 files changed, 60 insertions(+), 26 deletions(-) create mode 100644 pkg/common/patterns.go diff --git a/pkg/common/patterns.go b/pkg/common/patterns.go new file mode 100644 index 000000000..a6366acc8 --- /dev/null +++ b/pkg/common/patterns.go @@ -0,0 +1,41 @@ +package common + +import ( + "fmt" + "strconv" + "strings" + + log "github.com/sirupsen/logrus" +) + +const EmailPattern = `\b(?:[a-z0-9!#$%&'*+/=?^_\x60{|}~-]+(?:\.[a-z0-9!#$%&'*+/=?^_\x60{|}~-]+)*|"(?:[\x01-\x08\x0b\x0c\x0e-\x1f\x21\x23-\x5b\x5d-\x7f]|\\[\x01-\x09\x0b\x0c\x0e-\x7f])*")@(?:(?:[a-z0-9](?:[a-z0-9-]*[a-z0-9])?\.)+[a-z0-9](?:[a-z0-9-]*[a-z0-9])?|\[(?:(?:(2(5[0-5]|[0-4][0-9])|1[0-9][0-9]|[1-9]?[0-9]))\.){3}(?:(2(5[0-5]|[0-4][0-9])|1[0-9][0-9]|[1-9]?[0-9])|[a-z0-9-]*[a-z0-9]:(?:[\x01-\x08\x0b\x0c\x0e-\x1f\x21-\x5a\x53-\x7f]|\\[\x01-\x09\x0b\x0c\x0e-\x7f])+)\])\b` +const SubDomainPattern = `\b([A-Za-z0-9](?:[A-Za-z0-9\-]{0,61}[A-Za-z0-9])?)\b` +const UUIDPattern = `\b([0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12})\b` +const UUIDPatternUpperCase = `\b([0-9A-Z]{8}-[0-9A-Z]{4}-[0-9A-Z]{4}-[0-9A-Z]{4}-[0-9A-Z]{12})\b` + +const RegexPattern = "0-9a-z" +const AlphaNumPattern = "0-9a-zA-Z" +const HexPattern = "0-9a-f" + +//Custom Regex functions +func BuildRegex(pattern string, specialChar string, length int) string { + return fmt.Sprintf(`\b([%s%s]{%s})\b`, pattern, specialChar, strconv.Itoa(length)) +} + +func BuildRegexJWT(firstRange, secondRange, thirdRange string) string { + if RangeValidation(firstRange) || RangeValidation(secondRange) || RangeValidation(thirdRange) { + log.Error("Min value should not be greater than or equal to max") + } + return fmt.Sprintf(`\b(ey[%s]{%s}.ey[%s-\/_]{%s}.[%s-\/_]{%s})\b`, AlphaNumPattern, firstRange, AlphaNumPattern, secondRange, AlphaNumPattern, thirdRange) +} + +func RangeValidation(rangeInput string) bool { + range_split := strings.Split(rangeInput, ",") + range_min, _ := strconv.ParseInt(strings.TrimSpace(range_split[0]), 10, 0) + range_max, _ := strconv.ParseInt(strings.TrimSpace(range_split[1]), 10, 0) + return range_min >= range_max +} + +func ToUpperCase(input string) string { + return strings.ToUpper(input) +} diff --git a/pkg/detectors/clicksendsms/clicksendsms.go b/pkg/detectors/clicksendsms/clicksendsms.go index bf6c15f75..4447bf3f7 100644 --- a/pkg/detectors/clicksendsms/clicksendsms.go +++ b/pkg/detectors/clicksendsms/clicksendsms.go @@ -22,8 +22,8 @@ var ( client = common.SaneHttpClient() // Make sure that your group is surrounded in boundary characters such as below to reduce false positives. - keyPat = regexp.MustCompile(`\b([A-Z0-9]{8}-[A-Z0-9]{4}-[A-Z0-9]{4}-[A-Z0-9]{4}-[A-Z0-9]{12})\b`) - idPat = regexp.MustCompile(detectors.PrefixRegex([]string{"sms"}) + `\b([a-zA-Z0-9]{3,20}@[a-zA-Z0-9]{2,12}.[a-zA-Z0-9]{2,5})\b`) + keyPat = regexp.MustCompile(common.UUIDPatternUpperCase) + idPat = regexp.MustCompile(detectors.PrefixRegex([]string{"sms"}) + common.EmailPattern) ) // Keywords are used for efficiently pre-filtering chunks. @@ -44,11 +44,8 @@ func (s Scanner) FromData(ctx context.Context, verify bool, data []byte) (result continue } resMatch := strings.TrimSpace(match[1]) - for _, idmatch := range idMatches { - if len(idmatch) != 2 { - continue - } - resIdMatch := strings.TrimSpace(idmatch[1]) + for _, idMatch := range idMatches { + resIdMatch := strings.TrimSpace(idMatch[0][strings.LastIndex(idMatch[0], " ")+1:]) s1 := detectors.Result{ DetectorType: detectorspb.DetectorType_ClickSendsms, diff --git a/pkg/detectors/cloudconvert/cloudconvert.go b/pkg/detectors/cloudconvert/cloudconvert.go index edba1bb25..16f0f8330 100644 --- a/pkg/detectors/cloudconvert/cloudconvert.go +++ b/pkg/detectors/cloudconvert/cloudconvert.go @@ -21,7 +21,7 @@ var ( client = common.SaneHttpClient() //Make sure that your group is surrounded in boundry characters such as below to reduce false positives - keyPat = regexp.MustCompile(detectors.PrefixRegex([]string{"cloudconvert"}) + `\b(ey[0-9a-zA-Z]{34}.ey[0-9a-zA-Z-_]{200,500}.[0-9a-zA-Z-_]{600,700})\b`) + keyPat = regexp.MustCompile(detectors.PrefixRegex([]string{"cloudconvert"}) + common.BuildRegexJWT("30,34", "200,500", "600,700")) ) // Keywords are used for efficiently pre-filtering chunks. @@ -35,7 +35,6 @@ func (s Scanner) FromData(ctx context.Context, verify bool, data []byte) (result dataStr := string(data) matches := keyPat.FindAllStringSubmatch(dataStr, -1) - for _, match := range matches { if len(match) != 2 { continue diff --git a/pkg/detectors/codemagic/codemagic.go b/pkg/detectors/codemagic/codemagic.go index 44ae453d1..ae7d945f2 100644 --- a/pkg/detectors/codemagic/codemagic.go +++ b/pkg/detectors/codemagic/codemagic.go @@ -20,7 +20,7 @@ var ( client = common.SaneHttpClient() //Make sure that your group is surrounded in boundry characters such as below to reduce false positives - keyPat = regexp.MustCompile(detectors.PrefixRegex([]string{"codemagic"}) + `\b([a-zA-Z0-9_]{43})\b`) + keyPat = regexp.MustCompile(detectors.PrefixRegex([]string{"codemagic"}) + common.BuildRegex(common.AlphaNumPattern, "_", 43)) ) // Keywords are used for efficiently pre-filtering chunks. diff --git a/pkg/detectors/databox/databox.go b/pkg/detectors/databox/databox.go index 19e70fc7b..a6d0eb426 100644 --- a/pkg/detectors/databox/databox.go +++ b/pkg/detectors/databox/databox.go @@ -2,11 +2,11 @@ package databox import ( "context" + b64 "encoding/base64" "fmt" "net/http" "regexp" "strings" - b64 "encoding/base64" "github.com/trufflesecurity/trufflehog/v3/pkg/common" "github.com/trufflesecurity/trufflehog/v3/pkg/detectors" @@ -22,7 +22,7 @@ var ( client = common.SaneHttpClient() //Make sure that your group is surrounded in boundry characters such as below to reduce false positives - keyPat = regexp.MustCompile(detectors.PrefixRegex([]string{"databox"}) + `\b([a-z0-9]{21})\b`) + keyPat = regexp.MustCompile(detectors.PrefixRegex([]string{"databox"}) + common.BuildRegex(common.RegexPattern, "", 21)) ) // Keywords are used for efficiently pre-filtering chunks. diff --git a/pkg/detectors/onesignal/onesignal.go b/pkg/detectors/onesignal/onesignal.go index b524c7fda..2fa984610 100644 --- a/pkg/detectors/onesignal/onesignal.go +++ b/pkg/detectors/onesignal/onesignal.go @@ -2,11 +2,11 @@ package onesignal import ( "context" + b64 "encoding/base64" "fmt" "net/http" "regexp" "strings" - b64 "encoding/base64" "github.com/trufflesecurity/trufflehog/v3/pkg/common" "github.com/trufflesecurity/trufflehog/v3/pkg/detectors" @@ -22,7 +22,7 @@ var ( client = common.SaneHttpClient() //Make sure that your group is surrounded in boundry characters such as below to reduce false positives - keyPat = regexp.MustCompile(detectors.PrefixRegex([]string{"onesignal"}) + `\b([0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12})\b`) + keyPat = regexp.MustCompile(detectors.PrefixRegex([]string{"onesignal"}) + common.UUIDPattern) ) // Keywords are used for efficiently pre-filtering chunks. diff --git a/pkg/detectors/speechtextai/speechtextai.go b/pkg/detectors/speechtextai/speechtextai.go index 0143b0907..4a750a68c 100644 --- a/pkg/detectors/speechtextai/speechtextai.go +++ b/pkg/detectors/speechtextai/speechtextai.go @@ -21,7 +21,7 @@ var ( client = common.SaneHttpClient() //Make sure that your group is surrounded in boundry characters such as below to reduce false positives - keyPat = regexp.MustCompile(detectors.PrefixRegex([]string{"speechtext"}) + `\b([0-9a-f]{32})\b`) + keyPat = regexp.MustCompile(detectors.PrefixRegex([]string{"speechtext"}) + common.BuildRegex(common.HexPattern, "", 32)) ) // Keywords are used for efficiently pre-filtering chunks. diff --git a/pkg/detectors/zulipchat/zulipchat.go b/pkg/detectors/zulipchat/zulipchat.go index 08ad43853..412c96c62 100644 --- a/pkg/detectors/zulipchat/zulipchat.go +++ b/pkg/detectors/zulipchat/zulipchat.go @@ -21,9 +21,9 @@ var ( client = common.SaneHttpClient() // Make sure that your group is surrounded in boundary characters such as below to reduce false positives. - keyPat = regexp.MustCompile(detectors.PrefixRegex([]string{"zulipchat"}) + `\b([0-9a-zA-Z]{32})\b`) - idPat = regexp.MustCompile(detectors.PrefixRegex([]string{"zulipchat"}) + `\b([a-z0-9]{4,25}@[a-zA-Z0-9]{2,12}.[a-zA-Z0-9]{2,6})\b`) - domainPat = regexp.MustCompile(detectors.PrefixRegex([]string{"zulipchat", "domain"}) + `\b([0-9a-z]{2,20})\b`) + keyPat = regexp.MustCompile(detectors.PrefixRegex([]string{"zulipchat"}) + common.BuildRegex(common.AlphaNumPattern, "", 32)) + idPat = regexp.MustCompile(detectors.PrefixRegex([]string{"zulipchat"}) + common.EmailPattern) + domainPat = regexp.MustCompile(detectors.PrefixRegex([]string{"zulipchat", "domain"}) + common.SubDomainPattern) ) // Keywords are used for efficiently pre-filtering chunks. @@ -47,11 +47,8 @@ func (s Scanner) FromData(ctx context.Context, verify bool, data []byte) (result resMatch := strings.TrimSpace(match[1]) for _, idMatch := range idMatches { - if len(idMatch) != 2 { - continue - } - - resIdMatch := strings.TrimSpace(idMatch[1]) + //getting the last word of the string + resIdMatch := strings.TrimSpace(idMatch[0][strings.LastIndex(idMatch[0], " ")+1:]) for _, domainMatch := range domainMatches { if len(domainMatch) != 2 { diff --git a/pkg/detectors/zulipchat/zulipchat_test.go b/pkg/detectors/zulipchat/zulipchat_test.go index 432abdbdf..33bf3e427 100644 --- a/pkg/detectors/zulipchat/zulipchat_test.go +++ b/pkg/detectors/zulipchat/zulipchat_test.go @@ -25,7 +25,7 @@ func TestZulipChat_FromChunk(t *testing.T) { } secret := testSecrets.MustGetField("ZULIPCHAT") id := testSecrets.MustGetField("ZULIPCHAT_ID") - domain := testSecrets.MustGetField("ZULIPCHAT_DOMAIN") + domain := testSecrets.MustGetField("ZULIPCHAT_DOMAINV2") inactiveSecret := testSecrets.MustGetField("ZULIPCHAT_INACTIVE") type args struct { @@ -45,7 +45,7 @@ func TestZulipChat_FromChunk(t *testing.T) { s: Scanner{}, args: args{ ctx: context.Background(), - data: []byte(fmt.Sprintf("You can find a zulipchat secret %s within zulipchat %s and zulipchat domain %s", secret, id, domain)), + data: []byte(fmt.Sprintf("You can find a zulipchat secret %s within zulipchat %s and zulipchat %s", secret, id, domain)), verify: true, }, want: []detectors.Result{ @@ -61,7 +61,7 @@ func TestZulipChat_FromChunk(t *testing.T) { s: Scanner{}, args: args{ ctx: context.Background(), - data: []byte(fmt.Sprintf("You can find a zulipchat secret %s within zulipchat %s and zulipchat domain %s but not valid", inactiveSecret, id, domain)), // the secret would satisfy the regex but not pass validation + data: []byte(fmt.Sprintf("You can find a zulipchat secret %s within zulipchat %s and zulipchat %s but not valid", inactiveSecret, id, domain)), // the secret would satisfy the regex but not pass validation verify: true, }, want: []detectors.Result{