Remove unused code and add git binary check (#80)

This commit is contained in:
Bill Rich 2022-03-14 16:47:18 -08:00 committed by GitHub
parent 72c13c4b8f
commit e8234c3514
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
4 changed files with 6 additions and 84 deletions

View file

@ -1,9 +1,7 @@
package engine
import (
"bytes"
"context"
"crypto/sha256"
"runtime"
"strings"
"sync"
@ -14,8 +12,6 @@ import (
"github.com/trufflesecurity/trufflehog/v3/pkg/decoders"
"github.com/trufflesecurity/trufflehog/v3/pkg/detectors"
"github.com/trufflesecurity/trufflehog/v3/pkg/pb/source_metadatapb"
"github.com/trufflesecurity/trufflehog/v3/pkg/pb/sourcespb"
"github.com/trufflesecurity/trufflehog/v3/pkg/sources"
)
@ -27,16 +23,10 @@ type Engine struct {
detectors map[bool][]detectors.Detector
chunksScanned uint64
detectorAvgTime sync.Map
detectedSecret secretTracker
}
type EngineOption func(*Engine)
type secretTracker struct {
secret map[[32]byte]bool
sync sync.Mutex
}
func WithConcurrency(concurrency int) EngineOption {
return func(e *Engine) {
e.concurrency = concurrency
@ -67,10 +57,6 @@ func Start(ctx context.Context, options ...EngineOption) *Engine {
chunks: make(chan *sources.Chunk),
results: make(chan detectors.ResultWithMetadata),
detectorAvgTime: sync.Map{},
detectedSecret: secretTracker{
secret: map[[32]byte]bool{},
sync: sync.Mutex{},
},
}
for _, option := range options {
@ -188,50 +174,7 @@ func (e *Engine) detectorWorker(ctx context.Context) {
continue
}
for _, result := range results {
if isGitSource(chunk.SourceType) {
repo := ""
file := ""
commit := ""
switch metadata := chunk.SourceMetadata.GetData().(type) {
case *source_metadatapb.MetaData_Git:
repo = metadata.Git.Repository
file = metadata.Git.File
commit = metadata.Git.Commit
case *source_metadatapb.MetaData_Github:
repo = metadata.Github.Repository
file = metadata.Github.File
commit = metadata.Github.Commit
case *source_metadatapb.MetaData_Gitlab:
repo = metadata.Gitlab.Repository
file = metadata.Gitlab.File
commit = metadata.Gitlab.Commit
case *source_metadatapb.MetaData_Bitbucket:
repo = metadata.Bitbucket.Repository
file = metadata.Bitbucket.File
commit = metadata.Bitbucket.Commit
case *source_metadatapb.MetaData_Gerrit:
repo = metadata.Gerrit.Project
file = metadata.Gerrit.File
commit = metadata.Gerrit.Commit
}
if repo != "" && file != "" {
data := bytes.Join([][]byte{result.Raw, []byte(repo), []byte(file)}, []byte{})
sid := sha256.Sum256(data)
logrus.WithField("raw", string(result.Raw)).WithField("repo", repo).WithField("file", file).Debugf("result: %s", result.Raw)
_, exists := e.detectedSecret.secret[sid]
if exists {
commit = commit
// logrus.Debugf("skipping duplicate result for %s in commit %s", result.Raw, commit)
continue
}
e.detectedSecret.sync.Lock()
e.detectedSecret.secret[sid] = true
e.detectedSecret.sync.Unlock()
}
}
if !chunk.IgnoreResult {
e.results <- detectors.CopyMetadata(chunk, result)
}
e.results <- detectors.CopyMetadata(chunk, result)
}
if len(results) > 0 {
elasped := time.Since(start)
@ -253,26 +196,3 @@ func (e *Engine) detectorWorker(ctx context.Context) {
atomic.AddUint64(&e.chunksScanned, 1)
}
}
// gitSources is a list of sources that utilize the Git source. It is stored this way because slice consts are not
// supported.
func gitSources() []sourcespb.SourceType {
return []sourcespb.SourceType{
sourcespb.SourceType_SOURCE_TYPE_GIT,
sourcespb.SourceType_SOURCE_TYPE_GITHUB,
sourcespb.SourceType_SOURCE_TYPE_GITLAB,
sourcespb.SourceType_SOURCE_TYPE_BITBUCKET,
sourcespb.SourceType_SOURCE_TYPE_GERRIT,
sourcespb.SourceType_SOURCE_TYPE_GITHUB_UNAUTHENTICATED_ORG,
sourcespb.SourceType_SOURCE_TYPE_PUBLIC_GIT,
}
}
func isGitSource(sourceType sourcespb.SourceType) bool {
for _, i := range gitSources() {
if i == sourceType {
return true
}
}
return false
}

View file

@ -23,7 +23,7 @@ func (e *Engine) ScanFileSystem(ctx context.Context, directories []string) error
}
fileSystemSource := filesystem.Source{}
err = fileSystemSource.Init(ctx, "local", 0, int64(sourcespb.SourceType_SOURCE_TYPE_FILESYSTEM), true, &conn, runtime.NumCPU())
err = fileSystemSource.Init(ctx, "trufflehog - filesystem", 0, int64(sourcespb.SourceType_SOURCE_TYPE_FILESYSTEM), true, &conn, runtime.NumCPU())
if err != nil {
return errors.WrapPrefix(err, "could not init filesystem source", 0)
}

View file

@ -8,6 +8,7 @@ import (
"io/ioutil"
"net/url"
"os"
"os/exec"
"path/filepath"
"runtime"
"strings"
@ -256,6 +257,9 @@ func CloneRepoUsingUnauthenticated(url string) (clonePath string, repo *git.Repo
}
func (s *Git) ScanCommits(repo *git.Repository, path string, scanOptions *ScanOptions, chunksChan chan *sources.Chunk) error {
if errors.Is(exec.Command("git").Run(), exec.ErrNotFound) {
return fmt.Errorf("'git' command not found in $PATH. Make sure git is installed and included in $PATH")
}
zerolog.SetGlobalLevel(zerolog.Disabled)
fileChan, err := glgo.GitLog(path, scanOptions.HeadHash)
if err != nil {

View file

@ -24,8 +24,6 @@ type Chunk struct {
Data []byte
// Verify specifies whether any secrets in the Chunk should be verified.
Verify bool
// Do not report any results that came from this chunk.
IgnoreResult bool
}
// Source defines the interface required to implement a source chunker.