Make archive handler configurable (#1077)

* Make archive handler configurable.

* Use common.IsDone()
This commit is contained in:
Bill Rich 2023-02-07 15:25:14 -08:00 committed by GitHub
parent b37080e6a5
commit 7dd2b74f1f
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
4 changed files with 55 additions and 20 deletions

14
main.go
View file

@ -22,6 +22,7 @@ import (
"github.com/trufflesecurity/trufflehog/v3/pkg/context"
"github.com/trufflesecurity/trufflehog/v3/pkg/decoders"
"github.com/trufflesecurity/trufflehog/v3/pkg/engine"
"github.com/trufflesecurity/trufflehog/v3/pkg/handlers"
"github.com/trufflesecurity/trufflehog/v3/pkg/log"
"github.com/trufflesecurity/trufflehog/v3/pkg/output"
"github.com/trufflesecurity/trufflehog/v3/pkg/sources"
@ -46,6 +47,9 @@ var (
printAvgDetectorTime = cli.Flag("print-avg-detector-time", "Print the average time spent on each detector.").Bool()
noUpdate = cli.Flag("no-update", "Don't check for updates.").Bool()
fail = cli.Flag("fail", "Exit with code 183 if results are found.").Bool()
archiveMaxSize = cli.Flag("archive-max-size", "Maximum size of archive to scan.").Bytes()
archiveMaxDepth = cli.Flag("archive-max-depth", "Maximum depth of archive to scan.").Int()
archiveTimeout = cli.Flag("archive-timeout", "Maximum time to spend extracting an archive.").Duration()
gitScan = cli.Command("git", "Find credentials in git repositories.")
gitScanURI = gitScan.Arg("uri", "Git repository URL. https://, file://, or ssh:// schema expected.").Required().String()
@ -192,6 +196,16 @@ func run(state overseer.State) {
}
}
if *archiveMaxSize != 0 {
handlers.SetArchiveMaxSize(int(*archiveMaxSize))
}
if *archiveMaxDepth != 0 {
handlers.SetArchiveMaxDepth(*archiveMaxDepth)
}
if *archiveTimeout != 0 {
handlers.SetArchiveMaxTimeout(*archiveTimeout)
}
ctx := context.TODO()
e := engine.Start(ctx,
engine.WithConcurrency(*concurrency),

View file

@ -6,9 +6,11 @@ import (
"errors"
"fmt"
"io"
"time"
"github.com/mholt/archiver/v4"
log "github.com/sirupsen/logrus"
"github.com/trufflesecurity/trufflehog/v3/pkg/common"
)
type ctxKey int
@ -19,25 +21,41 @@ const (
var (
maxDepth = 5
maxSize = 250 * 1024 * 1024 // 20MB
maxTimeout = time.Duration(30) * time.Second
)
// Archive is a handler for extracting and decompressing archives.
type Archive struct {
maxSize int
size int
}
// New sets a default maximum size and current size counter.
func (d *Archive) New() {
d.maxSize = 20 * 1024 * 1024 // 20MB
d.size = 0
}
// SetArchiveMaxSize sets the maximum size of the archive.
func SetArchiveMaxSize(size int) {
maxSize = size
}
// SetArchiveMaxDepth sets the maximum depth of the archive.
func SetArchiveMaxDepth(depth int) {
maxDepth = depth
}
// SetArchiveMaxTimeout sets the maximum timeout for the archive handler.
func SetArchiveMaxTimeout(timeout time.Duration) {
maxTimeout = timeout
}
// FromFile extracts the files from an archive.
func (d *Archive) FromFile(data io.Reader) chan ([]byte) {
ctx := context.Background()
func (d *Archive) FromFile(originalCtx context.Context, data io.Reader) chan ([]byte) {
archiveChan := make(chan ([]byte), 512)
go func() {
ctx, cancel := context.WithTimeout(originalCtx, maxTimeout)
defer cancel()
defer close(archiveChan)
err := d.openArchive(ctx, 0, data, archiveChan)
if err != nil {
@ -83,7 +101,7 @@ func (d *Archive) openArchive(ctx context.Context, depth int, reader io.Reader,
if err != nil {
return err
}
fileBytes, err := d.ReadToMax(compReader)
fileBytes, err := d.ReadToMax(ctx, compReader)
if err != nil {
return err
}
@ -94,7 +112,7 @@ func (d *Archive) openArchive(ctx context.Context, depth int, reader io.Reader,
}
// IsFiletype returns true if the provided reader is an archive.
func (d *Archive) IsFiletype(reader io.Reader) (io.Reader, bool) {
func (d *Archive) IsFiletype(ctx context.Context, reader io.Reader) (io.Reader, bool) {
format, readerB, err := archiver.Identify("", reader)
if err != nil {
return readerB, false
@ -121,7 +139,7 @@ func (d *Archive) extractorHandler(archiveChan chan ([]byte)) func(context.Conte
if err != nil {
return err
}
fileBytes, err := d.ReadToMax(fReader)
fileBytes, err := d.ReadToMax(ctx, fReader)
if err != nil {
return err
}
@ -136,7 +154,7 @@ func (d *Archive) extractorHandler(archiveChan chan ([]byte)) func(context.Conte
}
// ReadToMax reads up to the max size.
func (d *Archive) ReadToMax(reader io.Reader) (data []byte, err error) {
func (d *Archive) ReadToMax(ctx context.Context, reader io.Reader) (data []byte, err error) {
// Archiver v4 is in alpha and using an experimental version of
// rardecode. There is a bug somewhere with rar decoder format 29
// that can lead to a panic. An issue is open in rardecode repo
@ -153,8 +171,11 @@ func (d *Archive) ReadToMax(reader io.Reader) (data []byte, err error) {
}
}()
fileContent := bytes.Buffer{}
log.Tracef("Remaining buffer capacity: %d", d.maxSize-d.size)
for i := 0; i <= d.maxSize/512; i++ {
log.Tracef("Remaining buffer capacity: %d", maxSize-d.size)
for i := 0; i <= maxSize/512; i++ {
if common.IsDone(ctx) {
return nil, ctx.Err()
}
fileChunk := make([]byte, 512)
bRead, err := reader.Read(fileChunk)
if err != nil && !errors.Is(err, io.EOF) {
@ -165,11 +186,11 @@ func (d *Archive) ReadToMax(reader io.Reader) (data []byte, err error) {
fileContent.Write(fileChunk[0:bRead])
}
if bRead < 512 {
break
return fileContent.Bytes(), nil
}
if d.size >= d.maxSize && bRead == 512 {
if d.size >= maxSize && bRead == 512 {
log.Debug("Max archive size reached.")
break
return fileContent.Bytes(), nil
}
}
return fileContent.Bytes(), nil

View file

@ -79,7 +79,7 @@ func TestArchiveHandler(t *testing.T) {
if err != nil {
t.Errorf("error creating reusable reader: %s", err)
}
archiveChan := archive.FromFile(newReader)
archiveChan := archive.FromFile(context.TODO(), newReader)
count := 0
re := regexp.MustCompile(testCase.matchString)

View file

@ -14,8 +14,8 @@ func DefaultHandlers() []Handler {
}
type Handler interface {
FromFile(io.Reader) chan ([]byte)
IsFiletype(io.Reader) (io.Reader, bool)
FromFile(context.Context, io.Reader) chan ([]byte)
IsFiletype(context.Context, io.Reader) (io.Reader, bool)
New()
}
@ -25,7 +25,7 @@ func HandleFile(ctx context.Context, file io.Reader, chunkSkel *sources.Chunk, c
for _, h := range DefaultHandlers() {
h.New()
var isType bool
if file, isType = h.IsFiletype(file); isType {
if file, isType = h.IsFiletype(ctx, file); isType {
handler = h
break
}
@ -35,7 +35,7 @@ func HandleFile(ctx context.Context, file io.Reader, chunkSkel *sources.Chunk, c
}
// Process the file and read all []byte chunks from handlerChan.
handlerChan := handler.FromFile(file)
handlerChan := handler.FromFile(ctx, file)
for {
select {
case data, open := <-handlerChan: