//go:build detectors // +build detectors package openai import ( "context" "fmt" "testing" "time" "github.com/google/go-cmp/cmp" "github.com/kylelemons/godebug/pretty" "github.com/trufflesecurity/trufflehog/v3/pkg/common" "github.com/trufflesecurity/trufflehog/v3/pkg/detectors" "github.com/trufflesecurity/trufflehog/v3/pkg/engine/ahocorasick" "github.com/trufflesecurity/trufflehog/v3/pkg/pb/detectorspb" ) func TestOpenAI_Pattern(t *testing.T) { d := Scanner{} ahoCorasickCore := ahocorasick.NewAhoCorasickCore([]detectors.Detector{d}) tests := []struct { name string input string want []string }{ { name: "user API key", input: "openai.api-key: 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", want: []string{"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"}, }, { name: "project API key", input: `OPENAI_API_KEY = "sk-proj-mpjtr05CFsJqs4TAeKlCT3BlbkFJsh1KtN0SUjTPeJiagE8K"`, want: []string{"sk-proj-mpjtr05CFsJqs4TAeKlCT3BlbkFJsh1KtN0SUjTPeJiagE8K"}, }, { name: "service account API key", input: `OPENAI_API_KEY = "sk-service-account-name-Ofbtr05CFsJqs4TAeKlCT3BlbkFJsh1KtN0SUjTPeJiaglyC"`, want: []string{"sk-service-account-name-Ofbtr05CFsJqs4TAeKlCT3BlbkFJsh1KtN0SUjTPeJiaglyC"}, }, { name: "newer user API key", input: `"OPENAI_API_KEY = "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"`, want: []string{"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"}, }, { name: "newer service account API key", input: `OPENAI_API_KEY = "sk-svcacct-IUXtc5gIZK-2cBfB-nTgEWbD8mi-fi-gc20oGtq8ve51sET3BlbkFJCg8iQkCVz_nmE_q1dCWlMpemoaoMqHzQ6D-FnWGqlz4C8A"`, want: []string{"sk-svcacct-IUXtc5gIZK-2cBfB-nTgEWbD8mi-fi-gc20oGtq8ve51sET3BlbkFJCg8iQkCVz_nmE_q1dCWlMpemoaoMqHzQ6D-FnWGqlz4C8A"}, }, } for _, test := range tests { t.Run(test.name, func(t *testing.T) { detectorMatches := ahoCorasickCore.FindDetectorMatches([]byte(test.input)) if len(detectorMatches) == 0 { t.Errorf("keywords '%v' not matched by: %s", d.Keywords(), test.input) return } results, err := d.FromData(context.Background(), false, []byte(test.input)) if err != nil { t.Errorf("error = %v", err) return } if len(results) != len(test.want) { if len(results) == 0 { t.Errorf("did not receive result") } else { t.Errorf("expected %d results, only received %d", len(test.want), len(results)) } return } actual := make(map[string]struct{}, len(results)) for _, r := range results { if len(r.RawV2) > 0 { actual[string(r.RawV2)] = struct{}{} } else { actual[string(r.Raw)] = struct{}{} } } expected := make(map[string]struct{}, len(test.want)) for _, v := range test.want { expected[v] = struct{}{} } if diff := cmp.Diff(expected, actual); diff != "" { t.Errorf("%s diff: (-want +got)\n%s", test.name, diff) } }) } } func TestOpenAI_FromChunk(t *testing.T) { ctx, cancel := context.WithTimeout(context.Background(), time.Second*5) defer cancel() testSecrets, err := common.GetSecret(ctx, "trufflehog-testing", "detectors4") if err != nil { t.Fatalf("could not get test secrets from GCP: %s", err) } oaiUnverified := testSecrets.MustGetField("OPENAI_UNVERIFIED") oaiVerified := testSecrets.MustGetField("OPENAI_VERIFIED") type args struct { ctx context.Context data []byte verify bool } tests := []struct { name string s Scanner args args want []detectors.Result wantErr bool }{ { name: "Found, unverified OpenAI token sk-", s: Scanner{}, args: args{ ctx: context.Background(), data: []byte(fmt.Sprintf("You can find an OpenAI secret %s within", oaiUnverified)), verify: true, }, want: []detectors.Result{ { DetectorType: detectorspb.DetectorType_OpenAI, Redacted: "sk-...gOPc", Verified: false, }, }, wantErr: false, }, { name: "Found, verified OpenAI token sk-", s: Scanner{}, args: args{ ctx: context.Background(), data: []byte(fmt.Sprintf("You can find an OpenAI secret %s within", oaiVerified)), verify: true, }, want: []detectors.Result{ { DetectorType: detectorspb.DetectorType_OpenAI, Verified: true, Redacted: "sk-...gOPb", }, }, wantErr: false, }, { name: "not found", s: Scanner{}, args: args{ ctx: context.Background(), data: []byte("You cannot find the secret within"), verify: true, }, want: nil, wantErr: false, }, } for _, tt := range tests { t.Run(tt.name, func(t *testing.T) { s := Scanner{} got, err := s.FromData(tt.args.ctx, tt.args.verify, tt.args.data) if (err != nil) != tt.wantErr { t.Errorf("OpenAI.FromData() error = %v, wantErr %v", err, tt.wantErr) return } for i := range got { if len(got[i].Raw) == 0 { t.Fatal("no raw secret present") } got[i].Raw = nil got[i].ExtraData = nil got[i].AnalysisInfo = nil } if diff := pretty.Compare(got, tt.want); diff != "" { t.Errorf("OpenAI.FromData() %s diff: (-got +want)\n%s", tt.name, diff) } }) } } func BenchmarkFromData(benchmark *testing.B) { ctx := context.Background() s := Scanner{} for name, data := range detectors.MustGetBenchmarkData() { benchmark.Run(name, func(b *testing.B) { b.ResetTimer() for n := 0; n < b.N; n++ { _, err := s.FromData(ctx, false, data) if err != nil { b.Fatal(err) } } }) } }