trufflehog/pkg/handlers/default.go
ahrav 42b3a9d999
[perf] - Optimize MIME Type Detection to Reduce Allocations (#3048)
* Streaming file handling.

* cleanup

* update tests

* lint

* defer close on input io.ReadCloser's

* remove redundant mime type detection

* Reduce allocations

* fix test

* update comment

* fix seek bug

* address comment

* undo
2024-07-17 14:04:29 -07:00

97 lines
3.9 KiB
Go

package handlers
import (
"context"
"errors"
"time"
"github.com/trufflesecurity/trufflehog/v3/pkg/common"
logContext "github.com/trufflesecurity/trufflehog/v3/pkg/context"
"github.com/trufflesecurity/trufflehog/v3/pkg/sources"
)
// defaultHandler is a handler for non-archive files.
// It is embedded in other specialized handlers to provide a consistent way of handling non-archive content
// once it has been extracted or decompressed by the specific handler.
// This allows the specialized handlers to focus on their specific archive formats while leveraging
// the common functionality provided by the defaultHandler for processing the extracted content.
type defaultHandler struct{ metrics *metrics }
// newDefaultHandler creates a defaultHandler with metrics configured based on the provided handlerType.
// The handlerType parameter is used to initialize the metrics instance with the appropriate handler type,
// ensuring that the metrics recorded within the defaultHandler methods are correctly attributed to the
// specific handler that invoked them.
func newDefaultHandler(handlerType handlerType) *defaultHandler {
return &defaultHandler{metrics: newHandlerMetrics(handlerType)}
}
// HandleFile processes the input as either an archive or non-archive based on its content,
// utilizing a single output channel. It first tries to identify the input as an archive. If it is an archive,
// it processes it accordingly; otherwise, it handles the input as non-archive content.
// The function returns a channel that will receive the extracted data bytes and an error if the initial setup fails.
func (h *defaultHandler) HandleFile(ctx logContext.Context, input fileReader) (chan []byte, error) {
// Shared channel for both archive and non-archive content.
dataChan := make(chan []byte, defaultBufferSize)
go func() {
defer close(dataChan)
// Update the metrics for the file processing.
start := time.Now()
var err error
defer func() {
h.measureLatencyAndHandleErrors(start, err)
h.metrics.incFilesProcessed()
}()
if err = h.handleNonArchiveContent(ctx, newMimeTypeReaderFromFileReader(input), dataChan); err != nil {
ctx.Logger().Error(err, "error handling non-archive content.")
}
}()
return dataChan, nil
}
// measureLatencyAndHandleErrors measures the latency of the file processing and updates the metrics accordingly.
// It also records errors and timeouts in the metrics.
func (h *defaultHandler) measureLatencyAndHandleErrors(start time.Time, err error) {
if err == nil {
h.metrics.observeHandleFileLatency(time.Since(start).Milliseconds())
return
}
h.metrics.incErrors()
if errors.Is(err, context.DeadlineExceeded) {
h.metrics.incFileProcessingTimeouts()
}
}
// handleNonArchiveContent processes files that do not contain nested archives, serving as the final stage in the
// extraction/decompression process. It reads the content to detect its MIME type and decides whether to skip based
// on the type, particularly for binary files. It manages reading file chunks and writing them to the archive channel,
// effectively collecting the final bytes for further processing. This function is a key component in ensuring that all
// file content, regardless of being an archive or not, is handled appropriately.
func (h *defaultHandler) handleNonArchiveContent(ctx logContext.Context, reader mimeTypeReader, archiveChan chan []byte) error {
mimeExt := reader.mimeExt
if common.SkipFile(mimeExt) || common.IsBinary(mimeExt) {
ctx.Logger().V(5).Info("skipping file", "ext", mimeExt)
h.metrics.incFilesSkipped()
return nil
}
chunkReader := sources.NewChunkReader()
for data := range chunkReader(ctx, reader) {
if err := data.Error(); err != nil {
ctx.Logger().Error(err, "error reading chunk")
h.metrics.incErrors()
continue
}
if err := common.CancellableWrite(ctx, archiveChan, data.Bytes()); err != nil {
return err
}
h.metrics.incBytesProcessed(len(data.Bytes()))
}
return nil
}