[THOG-128] Code cleanup/ OSS onboarding (#117)

* Small amount of code clean up.

* Rename sem to concurrency for better readability and to remove an extra comment.

* fix stashing issue.

Co-authored-by: Ahrav Dutta <ahrav.dutta@trufflesec.com>
This commit is contained in:
ahrav 2022-04-01 16:47:27 -07:00 committed by GitHub
parent 6ed01500f8
commit cedb3393d1
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
5 changed files with 27 additions and 17 deletions

View file

@ -25,7 +25,7 @@ func FilterEmpty() *Filter {
return filter
}
// FilterFromFiles creates a Filter using from the rules in the provided include and exclude files.
// FilterFromFiles creates a Filter using the rules in the provided include and exclude files.
func FilterFromFiles(includeFilterPath, excludeFilterPath string) (*Filter, error) {
includeRules, err := FilterRulesFromFile(includeFilterPath)
if err != nil {
@ -62,7 +62,12 @@ func FilterRulesFromFile(source string) (*FilterRuleSet, error) {
if err != nil {
log.WithError(err).Fatalf("unable to open filter file: %s", source)
}
defer file.Close()
defer func(file *os.File) {
err := file.Close()
if err != nil {
log.WithError(err).Fatalf("unable to close filter file: %s", source)
}
}(file)
scanner := bufio.NewScanner(file)
for scanner.Scan() {
@ -83,7 +88,6 @@ func FilterRulesFromFile(source string) (*FilterRuleSet, error) {
func (filter *Filter) Pass(object string) bool {
excluded := filter.exclude.Matches(object)
included := filter.include.Matches(object)
// log.Debugf("test PathFilter: file: %s, included: %t, excluded: %t, pass: %t", object, included, excluded, !excluded && included)
return !excluded && included
}

View file

@ -20,7 +20,7 @@ func Fuzz(data []byte) int {
decoded := false
for i, decoder := range DefaultDecoders() {
// Skip the first decoder (plain), because it will always decode and give
// priority to the input (return 1)
// priority to the input (return 1).
if i == 0 {
continue
}
@ -32,5 +32,5 @@ func Fuzz(data []byte) int {
if decoded {
return 1 // prioritize the input
}
return -1 // don't add input to the corpus
return -1 // Don't add input to the corpus.
}

View file

@ -15,7 +15,7 @@ import (
"github.com/trufflesecurity/trufflehog/v3/pkg/sources"
)
// Detector defines and interface for scanning for and verifying secrets.
// Detector defines an interface for scanning for and verifying secrets.
type Detector interface {
// FromData will scan bytes for results, and optionally verify them.
FromData(ctx context.Context, verify bool, data []byte) ([]Result, error)
@ -38,7 +38,7 @@ type Result struct {
}
type ResultWithMetadata struct {
// SourceMetadata contains source-specific contextual information
// SourceMetadata contains source-specific contextual information.
SourceMetadata *source_metadatapb.MetaData
// SourceID is the ID of the source that the API uses to map secrets to specific sources.
SourceID int64
@ -49,6 +49,7 @@ type ResultWithMetadata struct {
Result
}
// CopyMetadata returns a detector result with included metadata from the source chunk.
func CopyMetadata(chunk *sources.Chunk, result Result) ResultWithMetadata {
return ResultWithMetadata{
SourceMetadata: chunk.SourceMetadata,
@ -86,7 +87,7 @@ func CleanResults(results []Result) []Result {
return results
}
// Prefix regex ensures that at least one of the given keywords is within
// PrefixRegex ensures that at least one of the given keywords is within
// 20 characters of the capturing group that follows.
// This can help prevent false positives.
func PrefixRegex(keywords []string) string {
@ -97,7 +98,7 @@ func PrefixRegex(keywords []string) string {
}
//KeyIsRandom is a Low cost check to make sure that 'keys' include a number to reduce FPs.
//Golang doesnt support regex lookaheads, so must be done in seperate calls.
//Golang doesnt support regex lookaheads, so must be done in separate calls.
//TODO improve checks. Shannon entropy did not work well.
func KeyIsRandom(key string) bool {
for _, ch := range key {

View file

@ -66,7 +66,7 @@ func Start(ctx context.Context, options ...EngineOption) *Engine {
option(e)
}
// set defaults
// Set defaults.
if e.concurrency == 0 {
numCPU := runtime.NumCPU()
@ -186,17 +186,17 @@ func (e *Engine) detectorWorker(ctx context.Context) {
e.results <- detectors.CopyMetadata(targetChunk, result)
}
if len(results) > 0 {
elasped := time.Since(start)
elapsed := time.Since(start)
detectorName := results[0].DetectorType.String()
avgTimeI, ok := e.detectorAvgTime.Load(detectorName)
avgTime := []time.Duration{}
var avgTime []time.Duration
if ok {
avgTime, ok = avgTimeI.([]time.Duration)
if !ok {
continue
}
}
avgTime = append(avgTime, elasped)
avgTime = append(avgTime, elapsed)
e.detectorAvgTime.Store(detectorName, avgTime)
}
}
@ -229,6 +229,7 @@ func isGitSource(sourceType sourcespb.SourceType) bool {
return false
}
// SetLineNumber sets the line number for a provided source chunk with a given detector result.
func SetLineNumber(chunk *sources.Chunk, result *detectors.Result) {
var startingLine *int64
switch metadata := chunk.SourceMetadata.GetData().(type) {

View file

@ -49,8 +49,7 @@ type Git struct {
jobID int64
sourceMetadataFunc func(file, email, commit, timestamp, repository string, line int64) *source_metadatapb.MetaData
verify bool
// sem is used to limit concurrency
sem *semaphore.Weighted
concurrency *semaphore.Weighted
}
func NewGit(sourceType sourcespb.SourceType, jobID, sourceID int64, sourceName string, verify bool, concurrency int,
@ -63,11 +62,11 @@ func NewGit(sourceType sourcespb.SourceType, jobID, sourceID int64, sourceName s
jobID: jobID,
sourceMetadataFunc: sourceMetadataFunc,
verify: verify,
sem: semaphore.NewWeighted(int64(concurrency)),
concurrency: semaphore.NewWeighted(int64(concurrency)),
}
}
// Ensure the Source satisfies the interface at compile time
// Ensure the Source satisfies the interface at compile time.
var _ sources.Source = (*Source)(nil)
// Type returns the type of source.
@ -241,11 +240,13 @@ func CloneRepo(userInfo *url.Userinfo, gitUrl string) (clonePath string, repo *g
return
}
// CloneRepoUsingToken clones a repo using a provided token.
func CloneRepoUsingToken(token, gitUrl, user string) (string, *git.Repository, error) {
userInfo := url.UserPassword(user, token)
return CloneRepo(userInfo, gitUrl)
}
// CloneRepoUsingUnauthenticated clones a repo with no authentication required.
func CloneRepoUsingUnauthenticated(url string) (string, *git.Repository, error) {
return CloneRepo(nil, url)
}
@ -430,6 +431,7 @@ func stripPassword(u string) (string, error) {
return repoURL.String(), nil
}
// TryAdditionalBaseRefs looks for additional possible base refs for a repo and returns a hash if found.
func TryAdditionalBaseRefs(repo *git.Repository, base string) (*plumbing.Hash, error) {
revisionPrefixes := []string{
"",
@ -449,6 +451,8 @@ func TryAdditionalBaseRefs(repo *git.Repository, base string) (*plumbing.Hash, e
return nil, fmt.Errorf("no base refs succeeded for base: %q", base)
}
// PrepareRepo clones a repo if possible and returns the cloned repo string.
func PrepareRepo(uriString string) (string, bool, error) {
var path string
uri, err := url.Parse(uriString)