mirror of
https://github.com/trufflesecurity/trufflehog.git
synced 2024-11-10 07:04:24 +00:00
Detector-Competition-Fix: Fix ScraperSite (deprecated) (#2074)
Co-authored-by: āh̳̕mͭͭͨͩ̐e̘ͬ́͋ͬ̊̓͂d <13666360+0x1@users.noreply.github.com>
This commit is contained in:
parent
41e9cc59e2
commit
3b9ecaa704
5 changed files with 27 additions and 237 deletions
|
@ -1,89 +0,0 @@
|
|||
package scrapersite
|
||||
|
||||
import (
|
||||
"context"
|
||||
"fmt"
|
||||
"io"
|
||||
"net/http"
|
||||
"regexp"
|
||||
"strings"
|
||||
"time"
|
||||
|
||||
"github.com/trufflesecurity/trufflehog/v3/pkg/common"
|
||||
"github.com/trufflesecurity/trufflehog/v3/pkg/detectors"
|
||||
"github.com/trufflesecurity/trufflehog/v3/pkg/pb/detectorspb"
|
||||
)
|
||||
|
||||
type Scanner struct{}
|
||||
|
||||
// Ensure the Scanner satisfies the interface at compile time.
|
||||
var _ detectors.Detector = (*Scanner)(nil)
|
||||
|
||||
var (
|
||||
client = common.SaneHttpClientTimeOut(10 * time.Second)
|
||||
|
||||
// Make sure that your group is surrounded in boundary characters such as below to reduce false positives.
|
||||
keyPat = regexp.MustCompile(detectors.PrefixRegex([]string{"scrapersite"}) + `\b([a-zA-Z0-9]{45})\b`)
|
||||
)
|
||||
|
||||
// Keywords are used for efficiently pre-filtering chunks.
|
||||
// Use identifiers in the secret preferably, or the provider name.
|
||||
func (s Scanner) Keywords() []string {
|
||||
return []string{"scrapersite"}
|
||||
}
|
||||
|
||||
// FromData will find and optionally verify ScraperSite secrets in a given set of bytes.
|
||||
func (s Scanner) FromData(ctx context.Context, verify bool, data []byte) (results []detectors.Result, err error) {
|
||||
dataStr := string(data)
|
||||
|
||||
matches := keyPat.FindAllStringSubmatch(dataStr, -1)
|
||||
|
||||
for _, match := range matches {
|
||||
if len(match) != 2 {
|
||||
continue
|
||||
}
|
||||
resMatch := strings.TrimSpace(match[1])
|
||||
|
||||
s1 := detectors.Result{
|
||||
DetectorType: detectorspb.DetectorType_ScraperSite,
|
||||
Raw: []byte(resMatch),
|
||||
}
|
||||
|
||||
if verify {
|
||||
req, err := http.NewRequestWithContext(ctx, "GET", fmt.Sprintf("https://scrapersite.com/api-v1?api_key=%s&url=https://google.com", resMatch), nil)
|
||||
if err != nil {
|
||||
continue
|
||||
}
|
||||
res, err := client.Do(req)
|
||||
if err == nil {
|
||||
bodyBytes, err := io.ReadAll(res.Body)
|
||||
if err != nil {
|
||||
continue
|
||||
}
|
||||
bodyString := string(bodyBytes)
|
||||
validResponse := strings.Contains(bodyString, `"status":true`)
|
||||
defer res.Body.Close()
|
||||
if res.StatusCode >= 200 && res.StatusCode < 300 {
|
||||
if validResponse {
|
||||
s1.Verified = true
|
||||
} else {
|
||||
s1.Verified = false
|
||||
}
|
||||
} else {
|
||||
// This function will check false positives for common test words, but also it will make sure the key appears 'random' enough to be a real key.
|
||||
if detectors.IsKnownFalsePositive(resMatch, detectors.DefaultFalsePositives, true) {
|
||||
continue
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
results = append(results, s1)
|
||||
}
|
||||
|
||||
return results, nil
|
||||
}
|
||||
|
||||
func (s Scanner) Type() detectorspb.DetectorType {
|
||||
return detectorspb.DetectorType_ScraperSite
|
||||
}
|
|
@ -1,120 +0,0 @@
|
|||
//go:build detectors
|
||||
// +build detectors
|
||||
|
||||
package scrapersite
|
||||
|
||||
import (
|
||||
"context"
|
||||
"fmt"
|
||||
"testing"
|
||||
"time"
|
||||
|
||||
"github.com/kylelemons/godebug/pretty"
|
||||
"github.com/trufflesecurity/trufflehog/v3/pkg/detectors"
|
||||
|
||||
"github.com/trufflesecurity/trufflehog/v3/pkg/common"
|
||||
"github.com/trufflesecurity/trufflehog/v3/pkg/pb/detectorspb"
|
||||
)
|
||||
|
||||
func TestScraperSite_FromChunk(t *testing.T) {
|
||||
ctx, cancel := context.WithTimeout(context.Background(), time.Second*5)
|
||||
defer cancel()
|
||||
testSecrets, err := common.GetSecret(ctx, "trufflehog-testing", "detectors2")
|
||||
if err != nil {
|
||||
t.Fatalf("could not get test secrets from GCP: %s", err)
|
||||
}
|
||||
secret := testSecrets.MustGetField("SCRAPERSITE")
|
||||
inactiveSecret := testSecrets.MustGetField("SCRAPERSITE_INACTIVE")
|
||||
|
||||
type args struct {
|
||||
ctx context.Context
|
||||
data []byte
|
||||
verify bool
|
||||
}
|
||||
tests := []struct {
|
||||
name string
|
||||
s Scanner
|
||||
args args
|
||||
want []detectors.Result
|
||||
wantErr bool
|
||||
}{
|
||||
{
|
||||
name: "found, verified",
|
||||
s: Scanner{},
|
||||
args: args{
|
||||
ctx: context.Background(),
|
||||
data: []byte(fmt.Sprintf("You can find a scrapersite secret %s within", secret)),
|
||||
verify: true,
|
||||
},
|
||||
want: []detectors.Result{
|
||||
{
|
||||
DetectorType: detectorspb.DetectorType_ScraperSite,
|
||||
Verified: true,
|
||||
},
|
||||
},
|
||||
wantErr: false,
|
||||
},
|
||||
{
|
||||
name: "found, unverified",
|
||||
s: Scanner{},
|
||||
args: args{
|
||||
ctx: context.Background(),
|
||||
data: []byte(fmt.Sprintf("You can find a scrapersite secret %s within but not valid", inactiveSecret)), // the secret would satisfy the regex but not pass validation
|
||||
verify: true,
|
||||
},
|
||||
want: []detectors.Result{
|
||||
{
|
||||
DetectorType: detectorspb.DetectorType_ScraperSite,
|
||||
Verified: false,
|
||||
},
|
||||
},
|
||||
wantErr: false,
|
||||
},
|
||||
{
|
||||
name: "not found",
|
||||
s: Scanner{},
|
||||
args: args{
|
||||
ctx: context.Background(),
|
||||
data: []byte("You cannot find the secret within"),
|
||||
verify: true,
|
||||
},
|
||||
want: nil,
|
||||
wantErr: false,
|
||||
},
|
||||
}
|
||||
for _, tt := range tests {
|
||||
t.Run(tt.name, func(t *testing.T) {
|
||||
s := Scanner{}
|
||||
got, err := s.FromData(tt.args.ctx, tt.args.verify, tt.args.data)
|
||||
if (err != nil) != tt.wantErr {
|
||||
t.Errorf("ScraperSite.FromData() error = %v, wantErr %v", err, tt.wantErr)
|
||||
return
|
||||
}
|
||||
for i := range got {
|
||||
if len(got[i].Raw) == 0 {
|
||||
t.Fatalf("no raw secret present: \n %+v", got[i])
|
||||
}
|
||||
got[i].Raw = nil
|
||||
}
|
||||
if diff := pretty.Compare(got, tt.want); diff != "" {
|
||||
t.Errorf("ScraperSite.FromData() %s diff: (-got +want)\n%s", tt.name, diff)
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
func BenchmarkFromData(benchmark *testing.B) {
|
||||
ctx := context.Background()
|
||||
s := Scanner{}
|
||||
for name, data := range detectors.MustGetBenchmarkData() {
|
||||
benchmark.Run(name, func(b *testing.B) {
|
||||
b.ResetTimer()
|
||||
for n := 0; n < b.N; n++ {
|
||||
_, err := s.FromData(ctx, false, data)
|
||||
if err != nil {
|
||||
b.Fatal(err)
|
||||
}
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
|
@ -569,7 +569,6 @@ import (
|
|||
"github.com/trufflesecurity/trufflehog/v3/pkg/detectors/scrapeowl"
|
||||
"github.com/trufflesecurity/trufflehog/v3/pkg/detectors/scraperapi"
|
||||
"github.com/trufflesecurity/trufflehog/v3/pkg/detectors/scraperbox"
|
||||
"github.com/trufflesecurity/trufflehog/v3/pkg/detectors/scrapersite"
|
||||
"github.com/trufflesecurity/trufflehog/v3/pkg/detectors/scrapestack"
|
||||
"github.com/trufflesecurity/trufflehog/v3/pkg/detectors/scrapfly"
|
||||
"github.com/trufflesecurity/trufflehog/v3/pkg/detectors/scrapingant"
|
||||
|
@ -1315,7 +1314,6 @@ func DefaultDetectors() []detectors.Detector {
|
|||
zenkitapi.Scanner{},
|
||||
sherpadesk.Scanner{},
|
||||
shotstack.Scanner{},
|
||||
scrapersite.Scanner{},
|
||||
luno.Scanner{},
|
||||
apacta.Scanner{},
|
||||
fmfw.Scanner{},
|
||||
|
|
|
@ -713,31 +713,32 @@ const (
|
|||
DetectorType_Apacta DetectorType = 632
|
||||
DetectorType_GetSandbox DetectorType = 633
|
||||
// Deprecated: Do not use.
|
||||
DetectorType_Happi DetectorType = 634
|
||||
DetectorType_Oanda DetectorType = 635
|
||||
DetectorType_FastForex DetectorType = 636
|
||||
DetectorType_APIMatic DetectorType = 637
|
||||
DetectorType_VersionEye DetectorType = 638
|
||||
DetectorType_EagleEyeNetworks DetectorType = 639
|
||||
DetectorType_ThousandEyes DetectorType = 640
|
||||
DetectorType_SelectPDF DetectorType = 641
|
||||
DetectorType_Flightstats DetectorType = 642
|
||||
DetectorType_ChecIO DetectorType = 643
|
||||
DetectorType_Manifest DetectorType = 644
|
||||
DetectorType_ApiScience DetectorType = 645
|
||||
DetectorType_AppSynergy DetectorType = 646
|
||||
DetectorType_Caflou DetectorType = 647
|
||||
DetectorType_Caspio DetectorType = 648
|
||||
DetectorType_ChecklyHQ DetectorType = 649
|
||||
DetectorType_CloudElements DetectorType = 650
|
||||
DetectorType_DronaHQ DetectorType = 651
|
||||
DetectorType_Enablex DetectorType = 652
|
||||
DetectorType_Fmfw DetectorType = 653
|
||||
DetectorType_GoodDay DetectorType = 654
|
||||
DetectorType_Luno DetectorType = 655
|
||||
DetectorType_Meistertask DetectorType = 656
|
||||
DetectorType_Mindmeister DetectorType = 657
|
||||
DetectorType_PeopleDataLabs DetectorType = 658
|
||||
DetectorType_Happi DetectorType = 634
|
||||
DetectorType_Oanda DetectorType = 635
|
||||
DetectorType_FastForex DetectorType = 636
|
||||
DetectorType_APIMatic DetectorType = 637
|
||||
DetectorType_VersionEye DetectorType = 638
|
||||
DetectorType_EagleEyeNetworks DetectorType = 639
|
||||
DetectorType_ThousandEyes DetectorType = 640
|
||||
DetectorType_SelectPDF DetectorType = 641
|
||||
DetectorType_Flightstats DetectorType = 642
|
||||
DetectorType_ChecIO DetectorType = 643
|
||||
DetectorType_Manifest DetectorType = 644
|
||||
DetectorType_ApiScience DetectorType = 645
|
||||
DetectorType_AppSynergy DetectorType = 646
|
||||
DetectorType_Caflou DetectorType = 647
|
||||
DetectorType_Caspio DetectorType = 648
|
||||
DetectorType_ChecklyHQ DetectorType = 649
|
||||
DetectorType_CloudElements DetectorType = 650
|
||||
DetectorType_DronaHQ DetectorType = 651
|
||||
DetectorType_Enablex DetectorType = 652
|
||||
DetectorType_Fmfw DetectorType = 653
|
||||
DetectorType_GoodDay DetectorType = 654
|
||||
DetectorType_Luno DetectorType = 655
|
||||
DetectorType_Meistertask DetectorType = 656
|
||||
DetectorType_Mindmeister DetectorType = 657
|
||||
DetectorType_PeopleDataLabs DetectorType = 658
|
||||
// Deprecated: Do not use.
|
||||
DetectorType_ScraperSite DetectorType = 659
|
||||
DetectorType_Scrapfly DetectorType = 660
|
||||
DetectorType_SimplyNoted DetectorType = 661
|
||||
|
|
|
@ -667,7 +667,7 @@ enum DetectorType {
|
|||
Meistertask = 656;
|
||||
Mindmeister = 657;
|
||||
PeopleDataLabs = 658;
|
||||
ScraperSite = 659;
|
||||
ScraperSite = 659 [deprecated = true];
|
||||
Scrapfly = 660;
|
||||
SimplyNoted = 661;
|
||||
TravelPayouts = 662;
|
||||
|
|
Loading…
Reference in a new issue