[fixup] - correctly use the buffered file writer (#2373)

* correctly use the buffered file writer

* use value from source

* reorder fields

* use only the DetectorKey as a map field

* address comments and use factory function

* fix optional params

* remove commented out code
This commit is contained in:
ahrav 2024-02-05 10:43:55 -08:00 committed by GitHub
parent 905b7c3a01
commit 135cc3eb69
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
2 changed files with 34 additions and 18 deletions

View file

@ -16,6 +16,7 @@ import (
"github.com/trufflesecurity/trufflehog/v3/pkg/common"
"github.com/trufflesecurity/trufflehog/v3/pkg/context"
bufferedfilewriter "github.com/trufflesecurity/trufflehog/v3/pkg/writers/buffered_file_writer"
)
const (
@ -100,10 +101,11 @@ func (b *buffer) String() (string, error) { return b.Buffer.String(), nil }
// The use of contentWriter enables the management of diff data either in memory or on disk,
// based on its size, optimizing resource usage and performance.
type Diff struct {
PathB string
LineStart int
PathB string
LineStart int
IsBinary bool
contentWriter contentWriter
IsBinary bool
}
type diffOption func(*Diff)
@ -111,10 +113,14 @@ type diffOption func(*Diff)
// withPathB sets the PathB option.
func withPathB(pathB string) diffOption { return func(d *Diff) { d.PathB = pathB } }
// withCustomContentWriter sets the useCustomContentWriter option.
func withCustomContentWriter(cr contentWriter) diffOption {
return func(d *Diff) { d.contentWriter = cr }
}
// NewDiff creates a new Diff with a threshold.
func NewDiff(opts ...diffOption) *Diff {
diff := new(Diff)
diff.contentWriter = newBuffer()
for _, opt := range opts {
opt(diff)
}
@ -203,7 +209,8 @@ type Parser struct {
maxDiffSize int
maxCommitSize int
dateFormat string
contentWriter contentWriter
useCustomContentWriter bool
}
type ParseState int
@ -250,11 +257,9 @@ func (state ParseState) String() string {
}[state]
}
// WithContentWriter sets the ContentWriter for the Parser.
func WithContentWriter(writer contentWriter) Option {
return func(parser *Parser) {
parser.contentWriter = writer
}
// UseCustomContentWriter sets useCustomContentWriter option.
func UseCustomContentWriter() Option {
return func(parser *Parser) { parser.useCustomContentWriter = true }
}
// WithMaxDiffSize sets maxDiffSize option. Diffs larger than maxDiffSize will
@ -283,7 +288,6 @@ func NewParser(options ...Option) *Parser {
dateFormat: defaultDateFormat,
maxDiffSize: defaultMaxDiffSize,
maxCommitSize: defaultMaxCommitSize,
contentWriter: newBuffer(),
}
for _, option := range options {
option(parser)
@ -387,7 +391,18 @@ func (c *Parser) FromReader(ctx context.Context, stdOut io.Reader, commitChan ch
totalLogSize int
)
var latestState = Initial
currentDiff := NewDiff()
diff := func(opts ...diffOption) *Diff {
opts = append(opts, withCustomContentWriter(newBuffer()))
return NewDiff(opts...)
}
if c.useCustomContentWriter {
diff = func(opts ...diffOption) *Diff {
opts = append(opts, withCustomContentWriter(bufferedfilewriter.New()))
return NewDiff(opts...)
}
}
currentDiff := diff()
defer common.RecoverWithExit(ctx)
defer close(commitChan)
@ -430,7 +445,8 @@ func (c *Parser) FromReader(ctx context.Context, stdOut io.Reader, commitChan ch
totalLogSize += currentCommit.Size
}
// Create a new currentDiff and currentCommit
currentDiff = NewDiff()
currentDiff = diff()
// currentDiff = NewDiff(withCustomContentWriter(c.contentWriter()))
currentCommit = &Commit{Message: strings.Builder{}}
// Check that the commit line contains a hash and set it.
if len(line) >= 47 {
@ -498,7 +514,8 @@ func (c *Parser) FromReader(ctx context.Context, stdOut io.Reader, commitChan ch
currentCommit.Message.WriteString(oldCommit.Message.String())
}
}
currentDiff = NewDiff()
currentDiff = diff()
// currentDiff = NewDiff(withCustomContentWriter(c.contentWriter()))
case isModeLine(isStaged, latestState, line):
latestState = ModeLine
// NoOp
@ -538,7 +555,7 @@ func (c *Parser) FromReader(ctx context.Context, stdOut io.Reader, commitChan ch
}
currentCommit.Diffs = append(currentCommit.Diffs, *currentDiff)
}
currentDiff = NewDiff(withPathB(currentDiff.PathB))
currentDiff = diff(withPathB(currentDiff.PathB))
words := bytes.Split(line, []byte(" "))
if len(words) >= 3 {

View file

@ -34,7 +34,6 @@ import (
"github.com/trufflesecurity/trufflehog/v3/pkg/pb/sourcespb"
"github.com/trufflesecurity/trufflehog/v3/pkg/sanitizer"
"github.com/trufflesecurity/trufflehog/v3/pkg/sources"
bufferedfilewriter "github.com/trufflesecurity/trufflehog/v3/pkg/writers/buffered_file_writer"
)
const SourceType = sourcespb.SourceType_SOURCE_TYPE_GIT
@ -99,7 +98,7 @@ type Config struct {
func NewGit(config *Config) *Git {
var parser *gitparse.Parser
if config.UseCustomContentWriter {
parser = gitparse.NewParser(gitparse.WithContentWriter(bufferedfilewriter.New()))
parser = gitparse.NewParser(gitparse.UseCustomContentWriter())
} else {
parser = gitparse.NewParser()
}
@ -522,7 +521,7 @@ func (s *Git) ScanCommits(ctx context.Context, repo *git.Repository, path string
repoCtx = context.WithValue(ctx, "repo", path)
}
commitChan, err := gitparse.NewParser().RepoPath(repoCtx, path, scanOptions.HeadHash, scanOptions.BaseHash == "", scanOptions.ExcludeGlobs, scanOptions.Bare)
commitChan, err := s.parser.RepoPath(repoCtx, path, scanOptions.HeadHash, scanOptions.BaseHash == "", scanOptions.ExcludeGlobs, scanOptions.Bare)
if err != nil {
return err
}