mirror of
https://github.com/trufflesecurity/trufflehog.git
synced 2024-11-10 07:04:24 +00:00
[THOG-128] Code cleanup/ OSS onboarding (#117)
* Small amount of code clean up. * Rename sem to concurrency for better readability and to remove an extra comment. * fix stashing issue. Co-authored-by: Ahrav Dutta <ahrav.dutta@trufflesec.com>
This commit is contained in:
parent
6ed01500f8
commit
cedb3393d1
5 changed files with 27 additions and 17 deletions
|
@ -25,7 +25,7 @@ func FilterEmpty() *Filter {
|
|||
return filter
|
||||
}
|
||||
|
||||
// FilterFromFiles creates a Filter using from the rules in the provided include and exclude files.
|
||||
// FilterFromFiles creates a Filter using the rules in the provided include and exclude files.
|
||||
func FilterFromFiles(includeFilterPath, excludeFilterPath string) (*Filter, error) {
|
||||
includeRules, err := FilterRulesFromFile(includeFilterPath)
|
||||
if err != nil {
|
||||
|
@ -62,7 +62,12 @@ func FilterRulesFromFile(source string) (*FilterRuleSet, error) {
|
|||
if err != nil {
|
||||
log.WithError(err).Fatalf("unable to open filter file: %s", source)
|
||||
}
|
||||
defer file.Close()
|
||||
defer func(file *os.File) {
|
||||
err := file.Close()
|
||||
if err != nil {
|
||||
log.WithError(err).Fatalf("unable to close filter file: %s", source)
|
||||
}
|
||||
}(file)
|
||||
|
||||
scanner := bufio.NewScanner(file)
|
||||
for scanner.Scan() {
|
||||
|
@ -83,7 +88,6 @@ func FilterRulesFromFile(source string) (*FilterRuleSet, error) {
|
|||
func (filter *Filter) Pass(object string) bool {
|
||||
excluded := filter.exclude.Matches(object)
|
||||
included := filter.include.Matches(object)
|
||||
// log.Debugf("test PathFilter: file: %s, included: %t, excluded: %t, pass: %t", object, included, excluded, !excluded && included)
|
||||
return !excluded && included
|
||||
}
|
||||
|
||||
|
|
|
@ -20,7 +20,7 @@ func Fuzz(data []byte) int {
|
|||
decoded := false
|
||||
for i, decoder := range DefaultDecoders() {
|
||||
// Skip the first decoder (plain), because it will always decode and give
|
||||
// priority to the input (return 1)
|
||||
// priority to the input (return 1).
|
||||
if i == 0 {
|
||||
continue
|
||||
}
|
||||
|
@ -32,5 +32,5 @@ func Fuzz(data []byte) int {
|
|||
if decoded {
|
||||
return 1 // prioritize the input
|
||||
}
|
||||
return -1 // don't add input to the corpus
|
||||
return -1 // Don't add input to the corpus.
|
||||
}
|
||||
|
|
|
@ -15,7 +15,7 @@ import (
|
|||
"github.com/trufflesecurity/trufflehog/v3/pkg/sources"
|
||||
)
|
||||
|
||||
// Detector defines and interface for scanning for and verifying secrets.
|
||||
// Detector defines an interface for scanning for and verifying secrets.
|
||||
type Detector interface {
|
||||
// FromData will scan bytes for results, and optionally verify them.
|
||||
FromData(ctx context.Context, verify bool, data []byte) ([]Result, error)
|
||||
|
@ -38,7 +38,7 @@ type Result struct {
|
|||
}
|
||||
|
||||
type ResultWithMetadata struct {
|
||||
// SourceMetadata contains source-specific contextual information
|
||||
// SourceMetadata contains source-specific contextual information.
|
||||
SourceMetadata *source_metadatapb.MetaData
|
||||
// SourceID is the ID of the source that the API uses to map secrets to specific sources.
|
||||
SourceID int64
|
||||
|
@ -49,6 +49,7 @@ type ResultWithMetadata struct {
|
|||
Result
|
||||
}
|
||||
|
||||
// CopyMetadata returns a detector result with included metadata from the source chunk.
|
||||
func CopyMetadata(chunk *sources.Chunk, result Result) ResultWithMetadata {
|
||||
return ResultWithMetadata{
|
||||
SourceMetadata: chunk.SourceMetadata,
|
||||
|
@ -86,7 +87,7 @@ func CleanResults(results []Result) []Result {
|
|||
return results
|
||||
}
|
||||
|
||||
// Prefix regex ensures that at least one of the given keywords is within
|
||||
// PrefixRegex ensures that at least one of the given keywords is within
|
||||
// 20 characters of the capturing group that follows.
|
||||
// This can help prevent false positives.
|
||||
func PrefixRegex(keywords []string) string {
|
||||
|
@ -97,7 +98,7 @@ func PrefixRegex(keywords []string) string {
|
|||
}
|
||||
|
||||
//KeyIsRandom is a Low cost check to make sure that 'keys' include a number to reduce FPs.
|
||||
//Golang doesnt support regex lookaheads, so must be done in seperate calls.
|
||||
//Golang doesnt support regex lookaheads, so must be done in separate calls.
|
||||
//TODO improve checks. Shannon entropy did not work well.
|
||||
func KeyIsRandom(key string) bool {
|
||||
for _, ch := range key {
|
||||
|
|
|
@ -66,7 +66,7 @@ func Start(ctx context.Context, options ...EngineOption) *Engine {
|
|||
option(e)
|
||||
}
|
||||
|
||||
// set defaults
|
||||
// Set defaults.
|
||||
|
||||
if e.concurrency == 0 {
|
||||
numCPU := runtime.NumCPU()
|
||||
|
@ -186,17 +186,17 @@ func (e *Engine) detectorWorker(ctx context.Context) {
|
|||
e.results <- detectors.CopyMetadata(targetChunk, result)
|
||||
}
|
||||
if len(results) > 0 {
|
||||
elasped := time.Since(start)
|
||||
elapsed := time.Since(start)
|
||||
detectorName := results[0].DetectorType.String()
|
||||
avgTimeI, ok := e.detectorAvgTime.Load(detectorName)
|
||||
avgTime := []time.Duration{}
|
||||
var avgTime []time.Duration
|
||||
if ok {
|
||||
avgTime, ok = avgTimeI.([]time.Duration)
|
||||
if !ok {
|
||||
continue
|
||||
}
|
||||
}
|
||||
avgTime = append(avgTime, elasped)
|
||||
avgTime = append(avgTime, elapsed)
|
||||
e.detectorAvgTime.Store(detectorName, avgTime)
|
||||
}
|
||||
}
|
||||
|
@ -229,6 +229,7 @@ func isGitSource(sourceType sourcespb.SourceType) bool {
|
|||
return false
|
||||
}
|
||||
|
||||
// SetLineNumber sets the line number for a provided source chunk with a given detector result.
|
||||
func SetLineNumber(chunk *sources.Chunk, result *detectors.Result) {
|
||||
var startingLine *int64
|
||||
switch metadata := chunk.SourceMetadata.GetData().(type) {
|
||||
|
|
|
@ -49,8 +49,7 @@ type Git struct {
|
|||
jobID int64
|
||||
sourceMetadataFunc func(file, email, commit, timestamp, repository string, line int64) *source_metadatapb.MetaData
|
||||
verify bool
|
||||
// sem is used to limit concurrency
|
||||
sem *semaphore.Weighted
|
||||
concurrency *semaphore.Weighted
|
||||
}
|
||||
|
||||
func NewGit(sourceType sourcespb.SourceType, jobID, sourceID int64, sourceName string, verify bool, concurrency int,
|
||||
|
@ -63,11 +62,11 @@ func NewGit(sourceType sourcespb.SourceType, jobID, sourceID int64, sourceName s
|
|||
jobID: jobID,
|
||||
sourceMetadataFunc: sourceMetadataFunc,
|
||||
verify: verify,
|
||||
sem: semaphore.NewWeighted(int64(concurrency)),
|
||||
concurrency: semaphore.NewWeighted(int64(concurrency)),
|
||||
}
|
||||
}
|
||||
|
||||
// Ensure the Source satisfies the interface at compile time
|
||||
// Ensure the Source satisfies the interface at compile time.
|
||||
var _ sources.Source = (*Source)(nil)
|
||||
|
||||
// Type returns the type of source.
|
||||
|
@ -241,11 +240,13 @@ func CloneRepo(userInfo *url.Userinfo, gitUrl string) (clonePath string, repo *g
|
|||
return
|
||||
}
|
||||
|
||||
// CloneRepoUsingToken clones a repo using a provided token.
|
||||
func CloneRepoUsingToken(token, gitUrl, user string) (string, *git.Repository, error) {
|
||||
userInfo := url.UserPassword(user, token)
|
||||
return CloneRepo(userInfo, gitUrl)
|
||||
}
|
||||
|
||||
// CloneRepoUsingUnauthenticated clones a repo with no authentication required.
|
||||
func CloneRepoUsingUnauthenticated(url string) (string, *git.Repository, error) {
|
||||
return CloneRepo(nil, url)
|
||||
}
|
||||
|
@ -430,6 +431,7 @@ func stripPassword(u string) (string, error) {
|
|||
return repoURL.String(), nil
|
||||
}
|
||||
|
||||
// TryAdditionalBaseRefs looks for additional possible base refs for a repo and returns a hash if found.
|
||||
func TryAdditionalBaseRefs(repo *git.Repository, base string) (*plumbing.Hash, error) {
|
||||
revisionPrefixes := []string{
|
||||
"",
|
||||
|
@ -449,6 +451,8 @@ func TryAdditionalBaseRefs(repo *git.Repository, base string) (*plumbing.Hash, e
|
|||
|
||||
return nil, fmt.Errorf("no base refs succeeded for base: %q", base)
|
||||
}
|
||||
|
||||
// PrepareRepo clones a repo if possible and returns the cloned repo string.
|
||||
func PrepareRepo(uriString string) (string, bool, error) {
|
||||
var path string
|
||||
uri, err := url.Parse(uriString)
|
||||
|
|
Loading…
Reference in a new issue