feat(voiceflow): basic detector (#1900)

This commit is contained in:
Richard Gomez 2023-10-18 17:17:11 -04:00 committed by GitHub
parent a354cbd796
commit b57b1c1aa7
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
5 changed files with 410 additions and 2 deletions

View file

@ -0,0 +1,99 @@
package voiceflow
import (
"bytes"
"context"
"fmt"
"io"
"net/http"
"regexp"
"strings"
"github.com/trufflesecurity/trufflehog/v3/pkg/common"
"github.com/trufflesecurity/trufflehog/v3/pkg/detectors"
"github.com/trufflesecurity/trufflehog/v3/pkg/pb/detectorspb"
)
type Scanner struct {
client *http.Client
}
// Ensure the Scanner satisfies the interface at compile time.
var _ detectors.Detector = (*Scanner)(nil)
var (
defaultClient = common.SaneHttpClient()
// Reference: https://developer.voiceflow.com/reference/project#dialog-manager-api-keys
//
//TODO: This includes Workspace and Legacy Workspace API keys; I haven't validated whether these actually work.
// https://github.com/voiceflow/general-runtime/blob/master/tests/runtime/lib/DataAPI/utils.unit.ts
keyPat = regexp.MustCompile(`\b(VF\.(?:(?:DM|WS)\.)?[a-fA-F0-9]{24}\.[a-zA-Z0-9]{16})\b`)
)
// Keywords are used for efficiently pre-filtering chunks.
// Use identifiers in the secret preferably, or the provider name.
func (s Scanner) Keywords() []string {
return []string{"vf", "dm"}
}
// FromData will find and optionally verify Voiceflow secrets in a given set of bytes.
func (s Scanner) FromData(ctx context.Context, verify bool, data []byte) (results []detectors.Result, err error) {
dataStr := string(data)
matches := keyPat.FindAllStringSubmatch(dataStr, -1)
for _, match := range matches {
if len(match) != 2 {
continue
}
resMatch := strings.TrimSpace(match[1])
s1 := detectors.Result{
DetectorType: detectorspb.DetectorType_Voiceflow,
Raw: []byte(resMatch),
}
if verify {
client := s.client
if client == nil {
client = defaultClient
}
// Fetch the state for a random user.
payload := []byte(`{"question": "why is the sky blue?"}`)
req, err := http.NewRequestWithContext(ctx, "POST", "https://general-runtime.voiceflow.com/knowledge-base/query", bytes.NewBuffer(payload))
if err != nil {
continue
}
req.Header.Set("Accept", "application/json")
req.Header.Set("Authorization", resMatch)
req.Header.Set("Content-Type", "application/json")
res, err := client.Do(req)
if err == nil {
if res.StatusCode == http.StatusOK {
s1.Verified = true
} else if res.StatusCode == http.StatusUnauthorized {
// The secret is determinately not verified (nothing to do)
} else {
var buf bytes.Buffer
var bodyString string
_, err = io.Copy(&buf, res.Body)
if err == nil {
bodyString = buf.String()
}
s1.VerificationError = fmt.Errorf("unexpected HTTP response [status=%d, body=%s]", res.StatusCode, bodyString)
}
_ = res.Body.Close()
} else {
s1.VerificationError = err
}
}
results = append(results, s1)
}
return results, nil
}
func (s Scanner) Type() detectorspb.DetectorType {
return detectorspb.DetectorType_Voiceflow
}

View file

@ -0,0 +1,302 @@
//go:build detectors
// +build detectors
package voiceflow
import (
"context"
"fmt"
"github.com/google/go-cmp/cmp"
"github.com/google/go-cmp/cmp/cmpopts"
"testing"
"time"
"github.com/trufflesecurity/trufflehog/v3/pkg/detectors"
"github.com/trufflesecurity/trufflehog/v3/pkg/common"
"github.com/trufflesecurity/trufflehog/v3/pkg/pb/detectorspb"
)
func TestVoiceflow_Pattern(t *testing.T) {
tests := []struct {
name string
data string
shouldMatch bool
match string
}{
// True positives
// https://github.com/funDAOmental/endlessquest/blob/5c008f7c6a7e58c45a88b72fef4b965c258d665c/Voiceflow/agent-api/index.js#L6
{
name: `valid_result1`,
data: `// z0MG IT'S NOT A SECRET (but we'll delete it)
const API_KEY = "VF.DM.6469b4e5909a470007b96250.k4ip0SMy84jWlCsF"; // it should look like this: VF.DM.XXXXXXX.XXXXXX... keep this a secret!`,
shouldMatch: true,
match: `VF.DM.6469b4e5909a470007b96250.k4ip0SMy84jWlCsF`,
},
// https://github.com/sherifButt/ll-site/blob/b98b268214324da42a84e996e4c03c242e122680/src/components/Chatbot.jsx#L14
{
name: `valid_result2`,
data: ` const runtime = useRuntime({
verify: { authorization: 'VF.DM.652da078cde70b0008e1c5df.zsIo23VTxNXKfb9f' },
session: { userID: 'user_123' },
});`,
shouldMatch: true,
match: `VF.DM.652da078cde70b0008e1c5df.zsIo23VTxNXKfb9f`,
},
// https://github.com/the-vv/Voiceflow-chatbot/blob/324db17693dd46387ea7a020e92c4e79b94306c6/src/app/chat/chat.component.ts#L27
{
name: `valid_result3`,
data: ` this.http.delete('https://general-runtime.voiceflow.com/state/user/TEST_USER', {
headers: {
Authorization: "VF.DM.652ecc210267ec00078fc726.ZFPdEwvU0d1jiIMq"
}
}).subscribe(res => {
this.loading = false;
this.doPrompt('', { action: { type: 'launch' } });
})`,
shouldMatch: true,
match: `VF.DM.652ecc210267ec00078fc726.ZFPdEwvU0d1jiIMq`,
},
// https://github.com/legionX7/Graduation-Project-API/blob/451431771d3fba1d8c634b8855274b414d7aed6d/mainAPI.py#L547
{
name: `valid_result4`,
data: `
API_KEY = 'VF.DM.646388eb1419c80007bbbaa4.XHOqETFO3cvTxlGl'
VERSION_ID = '646bc'`,
shouldMatch: true,
match: `VF.DM.646388eb1419c80007bbbaa4.XHOqETFO3cvTxlGl`,
},
// https://github.com/voiceflow/general-runtime/blob/master/tests/runtime/lib/DataAPI/utils.unit.ts
{
name: `valid_result5`,
data: ` it('extracts ID from a Dialog Manager API key', () => {
// eslint-disable-next-line no-secrets/no-secrets
const key = 'VF.DM.628d5d92faf688001bda7907.dmC8KKO1oX8JO5ai';
const result = utils.extractAPIKeyID(key);
expect(result).to.equal('628d5d92faf688001bda7907');
});`,
shouldMatch: true,
match: `VF.DM.628d5d92faf688001bda7907.dmC8KKO1oX8JO5ai`,
},
{
name: `valid_result6_legacy`,
data: ` it('extracts ID from a Workspace API key', () => {
// eslint-disable-next-line no-secrets/no-secrets
const key = 'VF.WS.62bcb0cca5184300066f5ac7.egnKyyzZksiS5iGa';
const result = utils.extractAPIKeyID(key);
expect(result).to.equal('62bcb0cca5184300066f5ac7');
});
`,
shouldMatch: true,
match: `VF.WS.62bcb0cca5184300066f5ac7.egnKyyzZksiS5iGa`,
},
{
name: `valid_result7_legacy`,
data: ` it('extracts ID from a Legacy Workspace API key', () => {
// eslint-disable-next-line no-secrets/no-secrets
const key = 'VF.62bcb0cca5184300066f5ac7.dmC8KKO1oX8JO5az';
const result = utils.extractAPIKeyID(key);
expect(result).to.equal('62bcb0cca5184300066f5ac7');
});`,
shouldMatch: true,
match: `VF.62bcb0cca5184300066f5ac7.dmC8KKO1oX8JO5az`,
},
// False positives
// https://github.com/ImperialCollegeLondon/voiceflow-integration-whatsapp/blob/0f3d6a5638b9acb4989d5bf8e77081cc78e9b976/README.md?plain=1#L155
{
name: `invalid_result1`,
data: "Now, paste it in your .env file for the **VF_PROJECT_API** variable<br>\n```VF_PROJECT_API='VF.DM.62xxxxxxxxxxxxxxxxxxxxxxx'```",
shouldMatch: false,
},
// https://github.com/voiceflow/api-examples/blob/c3d8ba9ee8eced7ec8d241973b1eb0284aaec212/rust/src/main.rs#L5
{
name: `invalid_result2`,
data: `const API_KEY: &str = "YOUR_API_KEY_HERE"; // it should look like this: VF.DM.XXXXXXX.XXXXXX... keep this a secret!`,
shouldMatch: false,
},
}
for _, test := range tests {
t.Run(test.name, func(t *testing.T) {
s := Scanner{}
results, err := s.FromData(context.Background(), true, []byte(test.data))
if err != nil {
t.Errorf("CoinbaseWaaS.FromData() error = %v", err)
return
}
if test.shouldMatch {
if len(results) == 0 {
t.Errorf("%s: did not receive a match for '%v' when one was expected", test.name, test.data)
return
}
expected := test.data
if test.match != "" {
expected = test.match
}
result := results[0]
if result.VerificationError != nil {
fmt.Printf("VerificationError: %v\n", result.VerificationError)
}
resultData := string(result.Raw)
if resultData != expected {
t.Errorf("%s: did not receive expected match.\n\texpected: '%s'\n\t actual: '%s'", test.name, expected, resultData)
return
}
} else {
if len(results) > 0 {
t.Errorf("%s: received a match for '%v' when one wasn't wanted", test.name, test.data)
return
}
}
})
}
}
func TestVoiceflow_FromChunk(t *testing.T) {
ctx, cancel := context.WithTimeout(context.Background(), time.Second*5)
defer cancel()
testSecrets, err := common.GetSecret(ctx, "trufflehog-testing", "detectors5")
if err != nil {
t.Fatalf("could not get test secrets from GCP: %s", err)
}
secret := testSecrets.MustGetField("VOICEFLOW")
inactiveSecret := testSecrets.MustGetField("VOICEFLOW_INACTIVE")
type args struct {
ctx context.Context
data []byte
verify bool
}
tests := []struct {
name string
s Scanner
args args
want []detectors.Result
wantErr bool
wantVerificationErr bool
}{
{
name: "found, verified",
s: Scanner{},
args: args{
ctx: context.Background(),
data: []byte(fmt.Sprintf("You can find a voiceflow secret %s within", secret)),
verify: true,
},
want: []detectors.Result{
{
DetectorType: detectorspb.DetectorType_Voiceflow,
Verified: true,
},
},
wantErr: false,
wantVerificationErr: false,
},
{
name: "found, unverified",
s: Scanner{},
args: args{
ctx: context.Background(),
data: []byte(fmt.Sprintf("You can find a voiceflow secret %s within but not valid", inactiveSecret)), // the secret would satisfy the regex but not pass validation
verify: true,
},
want: []detectors.Result{
{
DetectorType: detectorspb.DetectorType_Voiceflow,
Verified: false,
},
},
wantErr: false,
wantVerificationErr: false,
},
{
name: "not found",
s: Scanner{},
args: args{
ctx: context.Background(),
data: []byte("You cannot find the secret within"),
verify: true,
},
want: nil,
wantErr: false,
wantVerificationErr: false,
},
{
name: "found, would be verified if not for timeout",
s: Scanner{client: common.SaneHttpClientTimeOut(1 * time.Microsecond)},
args: args{
ctx: context.Background(),
data: []byte(fmt.Sprintf("You can find a voiceflow secret %s within", secret)),
verify: true,
},
want: []detectors.Result{
{
DetectorType: detectorspb.DetectorType_Voiceflow,
Verified: false,
},
},
wantErr: false,
wantVerificationErr: true,
},
{
name: "found, verified but unexpected api surface",
s: Scanner{client: common.ConstantResponseHttpClient(404, "")},
args: args{
ctx: context.Background(),
data: []byte(fmt.Sprintf("You can find a voiceflow secret %s within", secret)),
verify: true,
},
want: []detectors.Result{
{
DetectorType: detectorspb.DetectorType_Voiceflow,
Verified: false,
},
},
wantErr: false,
wantVerificationErr: true,
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
got, err := tt.s.FromData(tt.args.ctx, tt.args.verify, tt.args.data)
if (err != nil) != tt.wantErr {
t.Errorf("Voiceflow.FromData() error = %v, wantErr %v", err, tt.wantErr)
return
}
for i := range got {
if len(got[i].Raw) == 0 {
t.Fatalf("no raw secret present: \n %+v", got[i])
}
if (got[i].VerificationError != nil) != tt.wantVerificationErr {
t.Fatalf("wantVerificationError = %v, verification error = %v", tt.wantVerificationErr, got[i].VerificationError)
}
}
ignoreOpts := cmpopts.IgnoreFields(detectors.Result{}, "Raw", "VerificationError")
if diff := cmp.Diff(got, tt.want, ignoreOpts); diff != "" {
t.Errorf("Voiceflow.FromData() %s diff: (-got +want)\n%s", tt.name, diff)
}
})
}
}
func BenchmarkFromData(benchmark *testing.B) {
ctx := context.Background()
s := Scanner{}
for name, data := range detectors.MustGetBenchmarkData() {
benchmark.Run(name, func(b *testing.B) {
b.ResetTimer()
for n := 0; n < b.N; n++ {
_, err := s.FromData(ctx, false, data)
if err != nil {
b.Fatal(err)
}
}
})
}
}

View file

@ -11,6 +11,7 @@ import (
"github.com/trufflesecurity/trufflehog/v3/pkg/detectors/sourcegraphcody"
"github.com/trufflesecurity/trufflehog/v3/pkg/detectors/tailscale"
"github.com/trufflesecurity/trufflehog/v3/pkg/detectors/trufflehogenterprise"
"github.com/trufflesecurity/trufflehog/v3/pkg/detectors/voiceflow"
"github.com/trufflesecurity/trufflehog/v3/pkg/detectors"
"github.com/trufflesecurity/trufflehog/v3/pkg/detectors/abbysale"
@ -1554,6 +1555,7 @@ func DefaultDetectors() []detectors.Detector {
&ramp.Scanner{},
&anthropic.Scanner{},
&sourcegraphcody.Scanner{},
voiceflow.Scanner{},
}
}

View file

@ -1008,6 +1008,7 @@ const (
DetectorType_Ramp DetectorType = 934
DetectorType_Klaviyo DetectorType = 935
DetectorType_SourcegraphCody DetectorType = 936
DetectorType_Voiceflow DetectorType = 937
)
// Enum value maps for DetectorType.
@ -1946,6 +1947,7 @@ var (
934: "Ramp",
935: "Klaviyo",
936: "SourcegraphCody",
937: "Voiceflow",
}
DetectorType_value = map[string]int32{
"Alibaba": 0,
@ -2881,6 +2883,7 @@ var (
"Ramp": 934,
"Klaviyo": 935,
"SourcegraphCody": 936,
"Voiceflow": 937,
}
)
@ -3259,7 +3262,7 @@ var file_detectors_proto_rawDesc = []byte{
0x44, 0x65, 0x63, 0x6f, 0x64, 0x65, 0x72, 0x54, 0x79, 0x70, 0x65, 0x12, 0x0b, 0x0a, 0x07, 0x55,
0x4e, 0x4b, 0x4e, 0x4f, 0x57, 0x4e, 0x10, 0x00, 0x12, 0x09, 0x0a, 0x05, 0x50, 0x4c, 0x41, 0x49,
0x4e, 0x10, 0x01, 0x12, 0x0a, 0x0a, 0x06, 0x42, 0x41, 0x53, 0x45, 0x36, 0x34, 0x10, 0x02, 0x12,
0x09, 0x0a, 0x05, 0x55, 0x54, 0x46, 0x31, 0x36, 0x10, 0x03, 0x2a, 0xa6, 0x75, 0x0a, 0x0c, 0x44,
0x09, 0x0a, 0x05, 0x55, 0x54, 0x46, 0x31, 0x36, 0x10, 0x03, 0x2a, 0xb6, 0x75, 0x0a, 0x0c, 0x44,
0x65, 0x74, 0x65, 0x63, 0x74, 0x6f, 0x72, 0x54, 0x79, 0x70, 0x65, 0x12, 0x0b, 0x0a, 0x07, 0x41,
0x6c, 0x69, 0x62, 0x61, 0x62, 0x61, 0x10, 0x00, 0x12, 0x08, 0x0a, 0x04, 0x41, 0x4d, 0x51, 0x50,
0x10, 0x01, 0x12, 0x07, 0x0a, 0x03, 0x41, 0x57, 0x53, 0x10, 0x02, 0x12, 0x09, 0x0a, 0x05, 0x41,
@ -4198,7 +4201,8 @@ var file_detectors_proto_rawDesc = []byte{
0x63, 0x10, 0xa5, 0x07, 0x12, 0x09, 0x0a, 0x04, 0x52, 0x61, 0x6d, 0x70, 0x10, 0xa6, 0x07, 0x12,
0x0c, 0x0a, 0x07, 0x4b, 0x6c, 0x61, 0x76, 0x69, 0x79, 0x6f, 0x10, 0xa7, 0x07, 0x12, 0x14, 0x0a,
0x0f, 0x53, 0x6f, 0x75, 0x72, 0x63, 0x65, 0x67, 0x72, 0x61, 0x70, 0x68, 0x43, 0x6f, 0x64, 0x79,
0x10, 0xa8, 0x07, 0x42, 0x3d, 0x5a, 0x3b, 0x67, 0x69, 0x74, 0x68, 0x75, 0x62, 0x2e, 0x63, 0x6f,
0x10, 0xa8, 0x07, 0x12, 0x0e, 0x0a, 0x09, 0x56, 0x6f, 0x69, 0x63, 0x65, 0x66, 0x6c, 0x6f, 0x77,
0x10, 0xa9, 0x07, 0x42, 0x3d, 0x5a, 0x3b, 0x67, 0x69, 0x74, 0x68, 0x75, 0x62, 0x2e, 0x63, 0x6f,
0x6d, 0x2f, 0x74, 0x72, 0x75, 0x66, 0x66, 0x6c, 0x65, 0x73, 0x65, 0x63, 0x75, 0x72, 0x69, 0x74,
0x79, 0x2f, 0x74, 0x72, 0x75, 0x66, 0x66, 0x6c, 0x65, 0x68, 0x6f, 0x67, 0x2f, 0x76, 0x33, 0x2f,
0x70, 0x6b, 0x67, 0x2f, 0x70, 0x62, 0x2f, 0x64, 0x65, 0x74, 0x65, 0x63, 0x74, 0x6f, 0x72, 0x73,

View file

@ -945,6 +945,7 @@ enum DetectorType {
Ramp = 934;
Klaviyo = 935;
SourcegraphCody = 936;
Voiceflow = 937;
}
message Result {