mirror of
https://github.com/trufflesecurity/trufflehog.git
synced 2024-11-10 07:04:24 +00:00
adding pypi detector (#3287)
* adding pypi detector * update test and use helper --------- Co-authored-by: Dylan Ayrey <dxa4481@rit.edu> Co-authored-by: Dustin Decker <dustin@trufflesec.com>
This commit is contained in:
parent
70c6bb5634
commit
d201e54305
5 changed files with 359 additions and 7 deletions
125
pkg/detectors/pypi/pypi.go
Normal file
125
pkg/detectors/pypi/pypi.go
Normal file
|
@ -0,0 +1,125 @@
|
|||
package pypi
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"context"
|
||||
"fmt"
|
||||
"io"
|
||||
"mime/multipart"
|
||||
"net/http"
|
||||
|
||||
regexp "github.com/wasilibs/go-re2"
|
||||
|
||||
"github.com/trufflesecurity/trufflehog/v3/pkg/common"
|
||||
"github.com/trufflesecurity/trufflehog/v3/pkg/detectors"
|
||||
"github.com/trufflesecurity/trufflehog/v3/pkg/pb/detectorspb"
|
||||
)
|
||||
|
||||
type Scanner struct {
|
||||
client *http.Client
|
||||
}
|
||||
|
||||
// Ensure the Scanner satisfies the interface at compile time.
|
||||
var _ detectors.Detector = (*Scanner)(nil)
|
||||
|
||||
var (
|
||||
defaultClient = common.SaneHttpClient()
|
||||
// Make sure that your group is surrounded in boundary characters such as below to reduce false positives.
|
||||
keyPat = regexp.MustCompile("(pypi-AgEIcHlwaS5vcmcCJ[a-zA-Z0-9-_]{157})")
|
||||
)
|
||||
|
||||
// Keywords are used for efficiently pre-filtering chunks.
|
||||
// Use identifiers in the secret preferably, or the provider name.
|
||||
func (s Scanner) Keywords() []string {
|
||||
return []string{"pypi-AgEIcHlwaS5vcmcCJ"}
|
||||
}
|
||||
|
||||
// FromData will find and optionally verify Pypi secrets in a given set of bytes.
|
||||
func (s Scanner) FromData(ctx context.Context, verify bool, data []byte) (results []detectors.Result, err error) {
|
||||
dataStr := string(data)
|
||||
|
||||
uniqueMatches := make(map[string]struct{})
|
||||
for _, match := range keyPat.FindAllStringSubmatch(dataStr, -1) {
|
||||
uniqueMatches[match[1]] = struct{}{}
|
||||
}
|
||||
|
||||
for match := range uniqueMatches {
|
||||
s1 := detectors.Result{
|
||||
DetectorType: detectorspb.DetectorType_PyPI,
|
||||
Raw: []byte(match),
|
||||
}
|
||||
|
||||
if verify {
|
||||
client := s.client
|
||||
if client == nil {
|
||||
client = defaultClient
|
||||
}
|
||||
|
||||
isVerified, extraData, verificationErr := verifyMatch(ctx, client, match)
|
||||
s1.Verified = isVerified
|
||||
s1.ExtraData = extraData
|
||||
s1.SetVerificationError(verificationErr, match)
|
||||
}
|
||||
|
||||
results = append(results, s1)
|
||||
}
|
||||
|
||||
return
|
||||
}
|
||||
|
||||
func verifyMatch(ctx context.Context, client *http.Client, token string) (bool, map[string]string, error) {
|
||||
// Create a buffer to hold the multipart form data
|
||||
var body bytes.Buffer
|
||||
writer := multipart.NewWriter(&body)
|
||||
|
||||
// Add the form fields like in the curl request
|
||||
_ = writer.WriteField(":action", "file_upload")
|
||||
_ = writer.WriteField("name", "dummy-package")
|
||||
_ = writer.WriteField("version", "0.0.1")
|
||||
_ = writer.WriteField("content", "dummy-content")
|
||||
|
||||
// Close the writer to finalize the form
|
||||
writer.Close()
|
||||
|
||||
// Create a new POST request to the PyPI legacy upload URL
|
||||
req, err := http.NewRequestWithContext(ctx, http.MethodPost, "https://upload.pypi.org/legacy/", &body)
|
||||
if err != nil {
|
||||
return false, nil, err
|
||||
}
|
||||
|
||||
// Add the Authorization header with the PyPI API token
|
||||
req.Header.Add("Authorization", "token "+token)
|
||||
// Set the Content-Type to the multipart form boundary
|
||||
req.Header.Set("Content-Type", writer.FormDataContentType())
|
||||
|
||||
// Execute the HTTP request
|
||||
res, err := client.Do(req)
|
||||
if err != nil {
|
||||
return false, nil, err
|
||||
}
|
||||
defer func() {
|
||||
_, _ = io.Copy(io.Discard, res.Body)
|
||||
_ = res.Body.Close()
|
||||
}()
|
||||
|
||||
// Check for expected status codes for verification
|
||||
if res.StatusCode == http.StatusBadRequest {
|
||||
verified, err := common.ResponseContainsSubstring(res.Body, "Include at least one message digest.")
|
||||
if err != nil {
|
||||
return false, nil, err
|
||||
}
|
||||
if verified {
|
||||
return true, nil, nil
|
||||
}
|
||||
} else if res.StatusCode == http.StatusForbidden {
|
||||
// If we get a 403 status, the key is invalid
|
||||
return false, nil, nil
|
||||
}
|
||||
|
||||
// For all other status codes, return an error
|
||||
return false, nil, fmt.Errorf("unexpected HTTP response status %d", res.StatusCode)
|
||||
}
|
||||
|
||||
func (s Scanner) Type() detectorspb.DetectorType {
|
||||
return detectorspb.DetectorType_PyPI
|
||||
}
|
220
pkg/detectors/pypi/pypi_test.go
Normal file
220
pkg/detectors/pypi/pypi_test.go
Normal file
|
@ -0,0 +1,220 @@
|
|||
//go:build detectors
|
||||
// +build detectors
|
||||
|
||||
package pypi
|
||||
|
||||
import (
|
||||
"context"
|
||||
"fmt"
|
||||
"testing"
|
||||
"time"
|
||||
|
||||
"github.com/google/go-cmp/cmp"
|
||||
"github.com/google/go-cmp/cmp/cmpopts"
|
||||
|
||||
"github.com/trufflesecurity/trufflehog/v3/pkg/common"
|
||||
"github.com/trufflesecurity/trufflehog/v3/pkg/detectors"
|
||||
"github.com/trufflesecurity/trufflehog/v3/pkg/engine/ahocorasick"
|
||||
"github.com/trufflesecurity/trufflehog/v3/pkg/pb/detectorspb"
|
||||
)
|
||||
|
||||
func TestPypi_Pattern(t *testing.T) {
|
||||
d := Scanner{}
|
||||
ahoCorasickCore := ahocorasick.NewAhoCorasickCore([]detectors.Detector{d})
|
||||
tests := []struct {
|
||||
name string
|
||||
input string
|
||||
want []string
|
||||
}{
|
||||
{
|
||||
name: "typical pattern",
|
||||
input: "pypi_token = 'pypi-AgEIcHlwaS5vcmcCJDQyM2M0Yjg4LWUyNDnnnnhhMy1hNigyLWI2ZWUyMTMwYzI2MgACKlszLCJhOWQwMWE0MS01Nzk4LTQyOWYtOTk4MS1lYzE5NTJhM2E3YzgiXQAABiBeGtDnnnnnV32VpiyeU-YUDKplSv0E5ngmwsnHaV2jGg'",
|
||||
want: []string{"pypi-AgEIcHlwaS5vcmcCJDQyM2M0Yjg4LWUyNDnnnnhhMy1hNigyLWI2ZWUyMTMwYzI2MgACKlszLCJhOWQwMWE0MS01Nzk4LTQyOWYtOTk4MS1lYzE5NTJhM2E3YzgiXQAABiBeGtDnnnnnV32VpiyeU-YUDKplSv0E5ngmwsnHaV2jGg"},
|
||||
},
|
||||
}
|
||||
|
||||
for _, test := range tests {
|
||||
t.Run(test.name, func(t *testing.T) {
|
||||
matchedDetectors := ahoCorasickCore.FindDetectorMatches([]byte(test.input))
|
||||
if len(matchedDetectors) == 0 {
|
||||
t.Errorf("keywords '%v' not matched by: %s", d.Keywords(), test.input)
|
||||
return
|
||||
}
|
||||
|
||||
results, err := d.FromData(context.Background(), false, []byte(test.input))
|
||||
if err != nil {
|
||||
t.Errorf("error = %v", err)
|
||||
return
|
||||
}
|
||||
|
||||
if len(results) != len(test.want) {
|
||||
if len(results) == 0 {
|
||||
t.Errorf("did not receive result")
|
||||
} else {
|
||||
t.Errorf("expected %d results, only received %d", len(test.want), len(results))
|
||||
}
|
||||
return
|
||||
}
|
||||
|
||||
actual := make(map[string]struct{}, len(results))
|
||||
for _, r := range results {
|
||||
if len(r.RawV2) > 0 {
|
||||
actual[string(r.RawV2)] = struct{}{}
|
||||
} else {
|
||||
actual[string(r.Raw)] = struct{}{}
|
||||
}
|
||||
}
|
||||
expected := make(map[string]struct{}, len(test.want))
|
||||
for _, v := range test.want {
|
||||
expected[v] = struct{}{}
|
||||
}
|
||||
|
||||
if diff := cmp.Diff(expected, actual); diff != "" {
|
||||
t.Errorf("%s diff: (-want +got)\n%s", test.name, diff)
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
func TestPypi_FromChunk(t *testing.T) {
|
||||
ctx, cancel := context.WithTimeout(context.Background(), time.Second*5)
|
||||
defer cancel()
|
||||
testSecrets, err := common.GetSecret(ctx, "trufflehog-testing", "detectors5")
|
||||
if err != nil {
|
||||
t.Fatalf("could not get test secrets from GCP: %s", err)
|
||||
}
|
||||
secret := testSecrets.MustGetField("PYPI")
|
||||
inactiveSecret := testSecrets.MustGetField("PYPI_INACTIVE")
|
||||
|
||||
type args struct {
|
||||
ctx context.Context
|
||||
data []byte
|
||||
verify bool
|
||||
}
|
||||
tests := []struct {
|
||||
name string
|
||||
s Scanner
|
||||
args args
|
||||
want []detectors.Result
|
||||
wantErr bool
|
||||
wantVerificationErr bool
|
||||
}{
|
||||
{
|
||||
name: "found, verified",
|
||||
s: Scanner{},
|
||||
args: args{
|
||||
ctx: context.Background(),
|
||||
data: []byte(fmt.Sprintf("You can find a pypi secret %s within", secret)),
|
||||
verify: true,
|
||||
},
|
||||
want: []detectors.Result{
|
||||
{
|
||||
DetectorType: detectorspb.DetectorType_PyPI,
|
||||
Verified: true,
|
||||
},
|
||||
},
|
||||
wantErr: false,
|
||||
wantVerificationErr: false,
|
||||
},
|
||||
{
|
||||
name: "found, unverified",
|
||||
s: Scanner{},
|
||||
args: args{
|
||||
ctx: context.Background(),
|
||||
data: []byte(fmt.Sprintf("You can find a pypi secret %s within but not valid", inactiveSecret)), // the secret would satisfy the regex but not pass validation
|
||||
verify: true,
|
||||
},
|
||||
want: []detectors.Result{
|
||||
{
|
||||
DetectorType: detectorspb.DetectorType_PyPI,
|
||||
Verified: false,
|
||||
},
|
||||
},
|
||||
wantErr: false,
|
||||
wantVerificationErr: false,
|
||||
},
|
||||
{
|
||||
name: "not found",
|
||||
s: Scanner{},
|
||||
args: args{
|
||||
ctx: context.Background(),
|
||||
data: []byte("You cannot find the secret within"),
|
||||
verify: true,
|
||||
},
|
||||
want: nil,
|
||||
wantErr: false,
|
||||
wantVerificationErr: false,
|
||||
},
|
||||
{
|
||||
name: "found, would be verified if not for timeout",
|
||||
s: Scanner{client: common.SaneHttpClientTimeOut(1 * time.Microsecond)},
|
||||
args: args{
|
||||
ctx: context.Background(),
|
||||
data: []byte(fmt.Sprintf("You can find a pypi secret %s within", secret)),
|
||||
verify: true,
|
||||
},
|
||||
want: []detectors.Result{
|
||||
{
|
||||
DetectorType: detectorspb.DetectorType_PyPI,
|
||||
Verified: false,
|
||||
},
|
||||
},
|
||||
wantErr: false,
|
||||
wantVerificationErr: true,
|
||||
},
|
||||
{
|
||||
name: "found, verified but unexpected api surface",
|
||||
s: Scanner{client: common.ConstantResponseHttpClient(404, "")},
|
||||
args: args{
|
||||
ctx: context.Background(),
|
||||
data: []byte(fmt.Sprintf("You can find a pypi secret %s within", secret)),
|
||||
verify: true,
|
||||
},
|
||||
want: []detectors.Result{
|
||||
{
|
||||
DetectorType: detectorspb.DetectorType_PyPI,
|
||||
Verified: false,
|
||||
},
|
||||
},
|
||||
wantErr: false,
|
||||
wantVerificationErr: true,
|
||||
},
|
||||
}
|
||||
for _, tt := range tests {
|
||||
t.Run(tt.name, func(t *testing.T) {
|
||||
got, err := tt.s.FromData(tt.args.ctx, tt.args.verify, tt.args.data)
|
||||
if (err != nil) != tt.wantErr {
|
||||
t.Errorf("Pypi.FromData() error = %v, wantErr %v", err, tt.wantErr)
|
||||
return
|
||||
}
|
||||
for i := range got {
|
||||
if len(got[i].Raw) == 0 {
|
||||
t.Fatalf("no raw secret present: \n %+v", got[i])
|
||||
}
|
||||
if (got[i].VerificationError() != nil) != tt.wantVerificationErr {
|
||||
t.Fatalf("wantVerificationError = %v, verification error = %v", tt.wantVerificationErr, got[i].VerificationError())
|
||||
}
|
||||
}
|
||||
ignoreOpts := cmpopts.IgnoreFields(detectors.Result{}, "Raw", "verificationError")
|
||||
if diff := cmp.Diff(got, tt.want, ignoreOpts); diff != "" {
|
||||
t.Errorf("Pypi.FromData() %s diff: (-got +want)\n%s", tt.name, diff)
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
func BenchmarkFromData(benchmark *testing.B) {
|
||||
ctx := context.Background()
|
||||
s := Scanner{}
|
||||
for name, data := range detectors.MustGetBenchmarkData() {
|
||||
benchmark.Run(name, func(b *testing.B) {
|
||||
b.ResetTimer()
|
||||
for n := 0; n < b.N; n++ {
|
||||
_, err := s.FromData(ctx, false, data)
|
||||
if err != nil {
|
||||
b.Fatal(err)
|
||||
}
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
|
@ -551,6 +551,7 @@ import (
|
|||
"github.com/trufflesecurity/trufflehog/v3/pkg/detectors/purestake"
|
||||
"github.com/trufflesecurity/trufflehog/v3/pkg/detectors/pushbulletapikey"
|
||||
"github.com/trufflesecurity/trufflehog/v3/pkg/detectors/pusherchannelkey"
|
||||
"github.com/trufflesecurity/trufflehog/v3/pkg/detectors/pypi"
|
||||
"github.com/trufflesecurity/trufflehog/v3/pkg/detectors/qase"
|
||||
"github.com/trufflesecurity/trufflehog/v3/pkg/detectors/qualaroo"
|
||||
"github.com/trufflesecurity/trufflehog/v3/pkg/detectors/qubole"
|
||||
|
@ -806,6 +807,7 @@ import (
|
|||
func DefaultDetectors() []detectors.Detector {
|
||||
return []detectors.Detector{
|
||||
&heroku.Scanner{},
|
||||
&pypi.Scanner{},
|
||||
&linearapi.Scanner{},
|
||||
&alibaba.Scanner{},
|
||||
aws.New(),
|
||||
|
|
|
@ -1099,6 +1099,7 @@ const (
|
|||
DetectorType_Netsuite DetectorType = 995
|
||||
DetectorType_RobinhoodCrypto DetectorType = 996
|
||||
DetectorType_NVAPI DetectorType = 997
|
||||
DetectorType_PyPI DetectorType = 998
|
||||
)
|
||||
|
||||
// Enum value maps for DetectorType.
|
||||
|
@ -2098,6 +2099,7 @@ var (
|
|||
995: "Netsuite",
|
||||
996: "RobinhoodCrypto",
|
||||
997: "NVAPI",
|
||||
998: "PyPI",
|
||||
}
|
||||
DetectorType_value = map[string]int32{
|
||||
"Alibaba": 0,
|
||||
|
@ -3094,6 +3096,7 @@ var (
|
|||
"Netsuite": 995,
|
||||
"RobinhoodCrypto": 996,
|
||||
"NVAPI": 997,
|
||||
"PyPI": 998,
|
||||
}
|
||||
)
|
||||
|
||||
|
@ -3547,7 +3550,7 @@ var file_detectors_proto_rawDesc = []byte{
|
|||
0x4c, 0x41, 0x49, 0x4e, 0x10, 0x01, 0x12, 0x0a, 0x0a, 0x06, 0x42, 0x41, 0x53, 0x45, 0x36, 0x34,
|
||||
0x10, 0x02, 0x12, 0x09, 0x0a, 0x05, 0x55, 0x54, 0x46, 0x31, 0x36, 0x10, 0x03, 0x12, 0x13, 0x0a,
|
||||
0x0f, 0x45, 0x53, 0x43, 0x41, 0x50, 0x45, 0x44, 0x5f, 0x55, 0x4e, 0x49, 0x43, 0x4f, 0x44, 0x45,
|
||||
0x10, 0x04, 0x2a, 0xba, 0x7f, 0x0a, 0x0c, 0x44, 0x65, 0x74, 0x65, 0x63, 0x74, 0x6f, 0x72, 0x54,
|
||||
0x10, 0x04, 0x2a, 0xc5, 0x7f, 0x0a, 0x0c, 0x44, 0x65, 0x74, 0x65, 0x63, 0x74, 0x6f, 0x72, 0x54,
|
||||
0x79, 0x70, 0x65, 0x12, 0x0b, 0x0a, 0x07, 0x41, 0x6c, 0x69, 0x62, 0x61, 0x62, 0x61, 0x10, 0x00,
|
||||
0x12, 0x08, 0x0a, 0x04, 0x41, 0x4d, 0x51, 0x50, 0x10, 0x01, 0x12, 0x07, 0x0a, 0x03, 0x41, 0x57,
|
||||
0x53, 0x10, 0x02, 0x12, 0x09, 0x0a, 0x05, 0x41, 0x7a, 0x75, 0x72, 0x65, 0x10, 0x03, 0x12, 0x0a,
|
||||
|
@ -4566,12 +4569,13 @@ var file_detectors_proto_rawDesc = []byte{
|
|||
0x0a, 0x45, 0x6c, 0x65, 0x76, 0x65, 0x6e, 0x4c, 0x61, 0x62, 0x73, 0x10, 0xe2, 0x07, 0x12, 0x0d,
|
||||
0x0a, 0x08, 0x4e, 0x65, 0x74, 0x73, 0x75, 0x69, 0x74, 0x65, 0x10, 0xe3, 0x07, 0x12, 0x14, 0x0a,
|
||||
0x0f, 0x52, 0x6f, 0x62, 0x69, 0x6e, 0x68, 0x6f, 0x6f, 0x64, 0x43, 0x72, 0x79, 0x70, 0x74, 0x6f,
|
||||
0x10, 0xe4, 0x07, 0x12, 0x0a, 0x0a, 0x05, 0x4e, 0x56, 0x41, 0x50, 0x49, 0x10, 0xe5, 0x07, 0x42,
|
||||
0x3d, 0x5a, 0x3b, 0x67, 0x69, 0x74, 0x68, 0x75, 0x62, 0x2e, 0x63, 0x6f, 0x6d, 0x2f, 0x74, 0x72,
|
||||
0x75, 0x66, 0x66, 0x6c, 0x65, 0x73, 0x65, 0x63, 0x75, 0x72, 0x69, 0x74, 0x79, 0x2f, 0x74, 0x72,
|
||||
0x75, 0x66, 0x66, 0x6c, 0x65, 0x68, 0x6f, 0x67, 0x2f, 0x76, 0x33, 0x2f, 0x70, 0x6b, 0x67, 0x2f,
|
||||
0x70, 0x62, 0x2f, 0x64, 0x65, 0x74, 0x65, 0x63, 0x74, 0x6f, 0x72, 0x73, 0x70, 0x62, 0x62, 0x06,
|
||||
0x70, 0x72, 0x6f, 0x74, 0x6f, 0x33,
|
||||
0x10, 0xe4, 0x07, 0x12, 0x0a, 0x0a, 0x05, 0x4e, 0x56, 0x41, 0x50, 0x49, 0x10, 0xe5, 0x07, 0x12,
|
||||
0x09, 0x0a, 0x04, 0x50, 0x79, 0x50, 0x49, 0x10, 0xe6, 0x07, 0x42, 0x3d, 0x5a, 0x3b, 0x67, 0x69,
|
||||
0x74, 0x68, 0x75, 0x62, 0x2e, 0x63, 0x6f, 0x6d, 0x2f, 0x74, 0x72, 0x75, 0x66, 0x66, 0x6c, 0x65,
|
||||
0x73, 0x65, 0x63, 0x75, 0x72, 0x69, 0x74, 0x79, 0x2f, 0x74, 0x72, 0x75, 0x66, 0x66, 0x6c, 0x65,
|
||||
0x68, 0x6f, 0x67, 0x2f, 0x76, 0x33, 0x2f, 0x70, 0x6b, 0x67, 0x2f, 0x70, 0x62, 0x2f, 0x64, 0x65,
|
||||
0x74, 0x65, 0x63, 0x74, 0x6f, 0x72, 0x73, 0x70, 0x62, 0x62, 0x06, 0x70, 0x72, 0x6f, 0x74, 0x6f,
|
||||
0x33,
|
||||
}
|
||||
|
||||
var (
|
||||
|
|
|
@ -1007,6 +1007,7 @@ enum DetectorType {
|
|||
Netsuite = 995;
|
||||
RobinhoodCrypto = 996;
|
||||
NVAPI = 997;
|
||||
PyPI = 998;
|
||||
}
|
||||
|
||||
message Result {
|
||||
|
|
Loading…
Reference in a new issue