* added common regex patterns for detectors

* For HexPattern

* enhancements

* used parseInt

* enhancement

* enhanced regex for email and subdomain

* enhancement for email pattern

* update pattern and detector

Co-authored-by: Roxanne Tampus <roxannetampus02@gmail.com>
This commit is contained in:
Dustin Decker 2022-07-12 08:34:02 -07:00 committed by GitHub
parent 3053169a0d
commit 2d3ddad076
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
9 changed files with 60 additions and 26 deletions

41
pkg/common/patterns.go Normal file
View file

@ -0,0 +1,41 @@
package common
import (
"fmt"
"strconv"
"strings"
log "github.com/sirupsen/logrus"
)
const EmailPattern = `\b(?:[a-z0-9!#$%&'*+/=?^_\x60{|}~-]+(?:\.[a-z0-9!#$%&'*+/=?^_\x60{|}~-]+)*|"(?:[\x01-\x08\x0b\x0c\x0e-\x1f\x21\x23-\x5b\x5d-\x7f]|\\[\x01-\x09\x0b\x0c\x0e-\x7f])*")@(?:(?:[a-z0-9](?:[a-z0-9-]*[a-z0-9])?\.)+[a-z0-9](?:[a-z0-9-]*[a-z0-9])?|\[(?:(?:(2(5[0-5]|[0-4][0-9])|1[0-9][0-9]|[1-9]?[0-9]))\.){3}(?:(2(5[0-5]|[0-4][0-9])|1[0-9][0-9]|[1-9]?[0-9])|[a-z0-9-]*[a-z0-9]:(?:[\x01-\x08\x0b\x0c\x0e-\x1f\x21-\x5a\x53-\x7f]|\\[\x01-\x09\x0b\x0c\x0e-\x7f])+)\])\b`
const SubDomainPattern = `\b([A-Za-z0-9](?:[A-Za-z0-9\-]{0,61}[A-Za-z0-9])?)\b`
const UUIDPattern = `\b([0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12})\b`
const UUIDPatternUpperCase = `\b([0-9A-Z]{8}-[0-9A-Z]{4}-[0-9A-Z]{4}-[0-9A-Z]{4}-[0-9A-Z]{12})\b`
const RegexPattern = "0-9a-z"
const AlphaNumPattern = "0-9a-zA-Z"
const HexPattern = "0-9a-f"
//Custom Regex functions
func BuildRegex(pattern string, specialChar string, length int) string {
return fmt.Sprintf(`\b([%s%s]{%s})\b`, pattern, specialChar, strconv.Itoa(length))
}
func BuildRegexJWT(firstRange, secondRange, thirdRange string) string {
if RangeValidation(firstRange) || RangeValidation(secondRange) || RangeValidation(thirdRange) {
log.Error("Min value should not be greater than or equal to max")
}
return fmt.Sprintf(`\b(ey[%s]{%s}.ey[%s-\/_]{%s}.[%s-\/_]{%s})\b`, AlphaNumPattern, firstRange, AlphaNumPattern, secondRange, AlphaNumPattern, thirdRange)
}
func RangeValidation(rangeInput string) bool {
range_split := strings.Split(rangeInput, ",")
range_min, _ := strconv.ParseInt(strings.TrimSpace(range_split[0]), 10, 0)
range_max, _ := strconv.ParseInt(strings.TrimSpace(range_split[1]), 10, 0)
return range_min >= range_max
}
func ToUpperCase(input string) string {
return strings.ToUpper(input)
}

View file

@ -22,8 +22,8 @@ var (
client = common.SaneHttpClient() client = common.SaneHttpClient()
// Make sure that your group is surrounded in boundary characters such as below to reduce false positives. // Make sure that your group is surrounded in boundary characters such as below to reduce false positives.
keyPat = regexp.MustCompile(`\b([A-Z0-9]{8}-[A-Z0-9]{4}-[A-Z0-9]{4}-[A-Z0-9]{4}-[A-Z0-9]{12})\b`) keyPat = regexp.MustCompile(common.UUIDPatternUpperCase)
idPat = regexp.MustCompile(detectors.PrefixRegex([]string{"sms"}) + `\b([a-zA-Z0-9]{3,20}@[a-zA-Z0-9]{2,12}.[a-zA-Z0-9]{2,5})\b`) idPat = regexp.MustCompile(detectors.PrefixRegex([]string{"sms"}) + common.EmailPattern)
) )
// Keywords are used for efficiently pre-filtering chunks. // Keywords are used for efficiently pre-filtering chunks.
@ -44,11 +44,8 @@ func (s Scanner) FromData(ctx context.Context, verify bool, data []byte) (result
continue continue
} }
resMatch := strings.TrimSpace(match[1]) resMatch := strings.TrimSpace(match[1])
for _, idmatch := range idMatches { for _, idMatch := range idMatches {
if len(idmatch) != 2 { resIdMatch := strings.TrimSpace(idMatch[0][strings.LastIndex(idMatch[0], " ")+1:])
continue
}
resIdMatch := strings.TrimSpace(idmatch[1])
s1 := detectors.Result{ s1 := detectors.Result{
DetectorType: detectorspb.DetectorType_ClickSendsms, DetectorType: detectorspb.DetectorType_ClickSendsms,

View file

@ -21,7 +21,7 @@ var (
client = common.SaneHttpClient() client = common.SaneHttpClient()
//Make sure that your group is surrounded in boundry characters such as below to reduce false positives //Make sure that your group is surrounded in boundry characters such as below to reduce false positives
keyPat = regexp.MustCompile(detectors.PrefixRegex([]string{"cloudconvert"}) + `\b(ey[0-9a-zA-Z]{34}.ey[0-9a-zA-Z-_]{200,500}.[0-9a-zA-Z-_]{600,700})\b`) keyPat = regexp.MustCompile(detectors.PrefixRegex([]string{"cloudconvert"}) + common.BuildRegexJWT("30,34", "200,500", "600,700"))
) )
// Keywords are used for efficiently pre-filtering chunks. // Keywords are used for efficiently pre-filtering chunks.
@ -35,7 +35,6 @@ func (s Scanner) FromData(ctx context.Context, verify bool, data []byte) (result
dataStr := string(data) dataStr := string(data)
matches := keyPat.FindAllStringSubmatch(dataStr, -1) matches := keyPat.FindAllStringSubmatch(dataStr, -1)
for _, match := range matches { for _, match := range matches {
if len(match) != 2 { if len(match) != 2 {
continue continue

View file

@ -20,7 +20,7 @@ var (
client = common.SaneHttpClient() client = common.SaneHttpClient()
//Make sure that your group is surrounded in boundry characters such as below to reduce false positives //Make sure that your group is surrounded in boundry characters such as below to reduce false positives
keyPat = regexp.MustCompile(detectors.PrefixRegex([]string{"codemagic"}) + `\b([a-zA-Z0-9_]{43})\b`) keyPat = regexp.MustCompile(detectors.PrefixRegex([]string{"codemagic"}) + common.BuildRegex(common.AlphaNumPattern, "_", 43))
) )
// Keywords are used for efficiently pre-filtering chunks. // Keywords are used for efficiently pre-filtering chunks.

View file

@ -2,11 +2,11 @@ package databox
import ( import (
"context" "context"
b64 "encoding/base64"
"fmt" "fmt"
"net/http" "net/http"
"regexp" "regexp"
"strings" "strings"
b64 "encoding/base64"
"github.com/trufflesecurity/trufflehog/v3/pkg/common" "github.com/trufflesecurity/trufflehog/v3/pkg/common"
"github.com/trufflesecurity/trufflehog/v3/pkg/detectors" "github.com/trufflesecurity/trufflehog/v3/pkg/detectors"
@ -22,7 +22,7 @@ var (
client = common.SaneHttpClient() client = common.SaneHttpClient()
//Make sure that your group is surrounded in boundry characters such as below to reduce false positives //Make sure that your group is surrounded in boundry characters such as below to reduce false positives
keyPat = regexp.MustCompile(detectors.PrefixRegex([]string{"databox"}) + `\b([a-z0-9]{21})\b`) keyPat = regexp.MustCompile(detectors.PrefixRegex([]string{"databox"}) + common.BuildRegex(common.RegexPattern, "", 21))
) )
// Keywords are used for efficiently pre-filtering chunks. // Keywords are used for efficiently pre-filtering chunks.

View file

@ -2,11 +2,11 @@ package onesignal
import ( import (
"context" "context"
b64 "encoding/base64"
"fmt" "fmt"
"net/http" "net/http"
"regexp" "regexp"
"strings" "strings"
b64 "encoding/base64"
"github.com/trufflesecurity/trufflehog/v3/pkg/common" "github.com/trufflesecurity/trufflehog/v3/pkg/common"
"github.com/trufflesecurity/trufflehog/v3/pkg/detectors" "github.com/trufflesecurity/trufflehog/v3/pkg/detectors"
@ -22,7 +22,7 @@ var (
client = common.SaneHttpClient() client = common.SaneHttpClient()
//Make sure that your group is surrounded in boundry characters such as below to reduce false positives //Make sure that your group is surrounded in boundry characters such as below to reduce false positives
keyPat = regexp.MustCompile(detectors.PrefixRegex([]string{"onesignal"}) + `\b([0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12})\b`) keyPat = regexp.MustCompile(detectors.PrefixRegex([]string{"onesignal"}) + common.UUIDPattern)
) )
// Keywords are used for efficiently pre-filtering chunks. // Keywords are used for efficiently pre-filtering chunks.

View file

@ -21,7 +21,7 @@ var (
client = common.SaneHttpClient() client = common.SaneHttpClient()
//Make sure that your group is surrounded in boundry characters such as below to reduce false positives //Make sure that your group is surrounded in boundry characters such as below to reduce false positives
keyPat = regexp.MustCompile(detectors.PrefixRegex([]string{"speechtext"}) + `\b([0-9a-f]{32})\b`) keyPat = regexp.MustCompile(detectors.PrefixRegex([]string{"speechtext"}) + common.BuildRegex(common.HexPattern, "", 32))
) )
// Keywords are used for efficiently pre-filtering chunks. // Keywords are used for efficiently pre-filtering chunks.

View file

@ -21,9 +21,9 @@ var (
client = common.SaneHttpClient() client = common.SaneHttpClient()
// Make sure that your group is surrounded in boundary characters such as below to reduce false positives. // Make sure that your group is surrounded in boundary characters such as below to reduce false positives.
keyPat = regexp.MustCompile(detectors.PrefixRegex([]string{"zulipchat"}) + `\b([0-9a-zA-Z]{32})\b`) keyPat = regexp.MustCompile(detectors.PrefixRegex([]string{"zulipchat"}) + common.BuildRegex(common.AlphaNumPattern, "", 32))
idPat = regexp.MustCompile(detectors.PrefixRegex([]string{"zulipchat"}) + `\b([a-z0-9]{4,25}@[a-zA-Z0-9]{2,12}.[a-zA-Z0-9]{2,6})\b`) idPat = regexp.MustCompile(detectors.PrefixRegex([]string{"zulipchat"}) + common.EmailPattern)
domainPat = regexp.MustCompile(detectors.PrefixRegex([]string{"zulipchat", "domain"}) + `\b([0-9a-z]{2,20})\b`) domainPat = regexp.MustCompile(detectors.PrefixRegex([]string{"zulipchat", "domain"}) + common.SubDomainPattern)
) )
// Keywords are used for efficiently pre-filtering chunks. // Keywords are used for efficiently pre-filtering chunks.
@ -47,11 +47,8 @@ func (s Scanner) FromData(ctx context.Context, verify bool, data []byte) (result
resMatch := strings.TrimSpace(match[1]) resMatch := strings.TrimSpace(match[1])
for _, idMatch := range idMatches { for _, idMatch := range idMatches {
if len(idMatch) != 2 { //getting the last word of the string
continue resIdMatch := strings.TrimSpace(idMatch[0][strings.LastIndex(idMatch[0], " ")+1:])
}
resIdMatch := strings.TrimSpace(idMatch[1])
for _, domainMatch := range domainMatches { for _, domainMatch := range domainMatches {
if len(domainMatch) != 2 { if len(domainMatch) != 2 {

View file

@ -25,7 +25,7 @@ func TestZulipChat_FromChunk(t *testing.T) {
} }
secret := testSecrets.MustGetField("ZULIPCHAT") secret := testSecrets.MustGetField("ZULIPCHAT")
id := testSecrets.MustGetField("ZULIPCHAT_ID") id := testSecrets.MustGetField("ZULIPCHAT_ID")
domain := testSecrets.MustGetField("ZULIPCHAT_DOMAIN") domain := testSecrets.MustGetField("ZULIPCHAT_DOMAINV2")
inactiveSecret := testSecrets.MustGetField("ZULIPCHAT_INACTIVE") inactiveSecret := testSecrets.MustGetField("ZULIPCHAT_INACTIVE")
type args struct { type args struct {
@ -45,7 +45,7 @@ func TestZulipChat_FromChunk(t *testing.T) {
s: Scanner{}, s: Scanner{},
args: args{ args: args{
ctx: context.Background(), ctx: context.Background(),
data: []byte(fmt.Sprintf("You can find a zulipchat secret %s within zulipchat %s and zulipchat domain %s", secret, id, domain)), data: []byte(fmt.Sprintf("You can find a zulipchat secret %s within zulipchat %s and zulipchat %s", secret, id, domain)),
verify: true, verify: true,
}, },
want: []detectors.Result{ want: []detectors.Result{
@ -61,7 +61,7 @@ func TestZulipChat_FromChunk(t *testing.T) {
s: Scanner{}, s: Scanner{},
args: args{ args: args{
ctx: context.Background(), ctx: context.Background(),
data: []byte(fmt.Sprintf("You can find a zulipchat secret %s within zulipchat %s and zulipchat domain %s but not valid", inactiveSecret, id, domain)), // the secret would satisfy the regex but not pass validation data: []byte(fmt.Sprintf("You can find a zulipchat secret %s within zulipchat %s and zulipchat %s but not valid", inactiveSecret, id, domain)), // the secret would satisfy the regex but not pass validation
verify: true, verify: true,
}, },
want: []detectors.Result{ want: []detectors.Result{