mirror of
https://github.com/trufflesecurity/trufflehog.git
synced 2024-11-14 00:47:21 +00:00
ead9dd5748
* Remove specialized handler and archive struct and restructure handlers pkg. * Refactor RPM archive handlers to use a library instead of shelling out * make rpm handling context aware * update test * Refactor AR/deb archive handler to use an existing library instead of shelling out * Update tests * Handle non-archive data within the DefaultHandler * make structs and methods private * Remove non-archive data handling within sources * add max size check * add filename and size to context kvp * move skip file check and is binary check before opening file * fix test * preserve existing funcitonality of not handling non-archive files in HandleFile * Handle non-archive data within the DefaultHandler * rebase * Remove non-archive data handling within sources * Adjust check for rpm/deb archive type * add additional deb mime type * add gzip * move diskbuffered rereader setup into handler pkg * remove DiskBuffereReader creation logic within sources * update comment * move rewind closer * reduce log verbosity * add metrics for file handling * add metrics for errors * make defaultBufferSize a const * add metrics for file handling * add metrics for errors * fix tests * add metrics for max archive depth and skipped files * update error * skip symlinks and dirs * update err * Address incompatible reader to openArchive * remove nil check * fix err assignment * Allow git cat-file blob to complete before trying to handle the file * wrap compReader with DiskbufferReader * Allow git cat-file blob to complete before trying to handle the file * updates * use buffer writer * update * refactor * update context pkg * revert stuff * update test * fix test * remove * use correct reader * add metrics for file handling * add metrics for errors * fix tests * rebase * add metrics for errors * add metrics for max archive depth and skipped files * update error * skip symlinks and dirs * update err * fix err assignment * rebase * remove * Update write method in contentWriter interface * Add bufferReadSeekCloser * update name * update comment * fix lint * Remove specialized handler and archive struct and restructure handlers pkg. * Refactor RPM archive handlers to use a library instead of shelling out * make rpm handling context aware * update test * Refactor AR/deb archive handler to use an existing library instead of shelling out * Update tests * add max size check * add filename and size to context kvp * move skip file check and is binary check before opening file * fix test * preserve existing funcitonality of not handling non-archive files in HandleFile * Handle non-archive data within the DefaultHandler * rebase * Remove non-archive data handling within sources * Handle non-archive data within the DefaultHandler * add gzip * move diskbuffered rereader setup into handler pkg * remove DiskBuffereReader creation logic within sources * update comment * move rewind closer * reduce log verbosity * make defaultBufferSize a const * add metrics for file handling * add metrics for errors * fix tests * add metrics for max archive depth and skipped files * update error * skip symlinks and dirs * update err * Address incompatible reader to openArchive * remove nil check * fix err assignment * wrap compReader with DiskbufferReader * Allow git cat-file blob to complete before trying to handle the file * updates * use buffer writer * update * refactor * update context pkg * revert stuff * update test * remove * rebase * go mod tidy * lint check * update metric to ms * update metric * update comments * dont use ptr * update * fix * Remove specialized handler and archive struct and restructure handlers pkg. * Refactor RPM archive handlers to use a library instead of shelling out * make rpm handling context aware * update test * Refactor AR/deb archive handler to use an existing library instead of shelling out * Update tests * add max size check * add filename and size to context kvp * move skip file check and is binary check before opening file * fix test * preserve existing funcitonality of not handling non-archive files in HandleFile * Adjust check for rpm/deb archive type * add additional deb mime type * update comment * go mod tidy * update go mod * Add a buffered file reader * update comments * use Buffered File Readder * return buffer * update * fix * return * go mod tidy * merge * use a shared pool * use sync.Once * reorganzie * remove unused code * fix double init * fix stuff * nil check * reduce allocations * updates * update metrics * updates * reset buffer instead of putting it back * skip binaries * skip * concurrently process diffs * close chan * concurrently enumerate orgs * increase workers * ignore pbix and vsdx files * add metrics for gitparse's Diffchan * fix metric * update metrics * update * fix checks * fix * inc * update * reduce * Create workers to handle binary files * modify workers * updates * add check * delete code * use custom reader * rename struct * add nonarchive handler * fix break * add comments * add tests * refactor * remove log * do not scan rpm links * simplify * rename var * rename * fix benchmark * add buffer * buffer * buffer * handle panic * merge main * merge main * add recover * revert stuff * revert * revert to using reader * fixes * remove * update * fixes * linter * fix test * fix comment * update field name * fix
95 lines
2.4 KiB
Go
95 lines
2.4 KiB
Go
package handlers
|
|
|
|
import (
|
|
"errors"
|
|
"fmt"
|
|
"io"
|
|
"time"
|
|
|
|
"pault.ag/go/debian/deb"
|
|
|
|
logContext "github.com/trufflesecurity/trufflehog/v3/pkg/context"
|
|
)
|
|
|
|
// arHandler handles AR archive formats.
|
|
type arHandler struct{ *defaultHandler }
|
|
|
|
// newARHandler creates an arHandler.
|
|
func newARHandler() *arHandler {
|
|
return &arHandler{defaultHandler: newDefaultHandler(arHandlerType)}
|
|
}
|
|
|
|
// HandleFile processes AR formatted files. This function needs to be implemented to extract or
|
|
// manage data from AR files according to specific requirements.
|
|
func (h *arHandler) HandleFile(ctx logContext.Context, input fileReader) (chan []byte, error) {
|
|
archiveChan := make(chan []byte, defaultBufferSize)
|
|
|
|
go func() {
|
|
ctx, cancel := logContext.WithTimeout(ctx, maxTimeout)
|
|
defer cancel()
|
|
defer close(archiveChan)
|
|
|
|
// Update the metrics for the file processing.
|
|
start := time.Now()
|
|
var err error
|
|
defer func() {
|
|
h.measureLatencyAndHandleErrors(start, err)
|
|
h.metrics.incFilesProcessed()
|
|
}()
|
|
|
|
// Defer a panic recovery to handle any panics that occur during the AR processing.
|
|
defer func() {
|
|
if r := recover(); r != nil {
|
|
// Return the panic as an error.
|
|
if e, ok := r.(error); ok {
|
|
err = e
|
|
} else {
|
|
err = fmt.Errorf("panic occurred: %v", r)
|
|
}
|
|
ctx.Logger().Error(err, "Panic occurred when reading ar archive")
|
|
}
|
|
}()
|
|
|
|
var arReader *deb.Ar
|
|
arReader, err = deb.LoadAr(input)
|
|
if err != nil {
|
|
ctx.Logger().Error(err, "error reading AR")
|
|
return
|
|
}
|
|
|
|
if err = h.processARFiles(ctx, arReader, archiveChan); err != nil {
|
|
ctx.Logger().Error(err, "error processing AR files")
|
|
}
|
|
}()
|
|
|
|
return archiveChan, nil
|
|
}
|
|
|
|
func (h *arHandler) processARFiles(ctx logContext.Context, reader *deb.Ar, archiveChan chan []byte) error {
|
|
for {
|
|
select {
|
|
case <-ctx.Done():
|
|
return ctx.Err()
|
|
default:
|
|
arEntry, err := reader.Next()
|
|
if err != nil {
|
|
if errors.Is(err, io.EOF) {
|
|
ctx.Logger().V(3).Info("AR archive fully processed")
|
|
return nil
|
|
}
|
|
return fmt.Errorf("error reading AR payload: %w", err)
|
|
}
|
|
|
|
fileSize := arEntry.Size
|
|
fileCtx := logContext.WithValues(ctx, "filename", arEntry.Name, "size", fileSize)
|
|
|
|
if err := h.handleNonArchiveContent(fileCtx, arEntry.Data, archiveChan); err != nil {
|
|
fileCtx.Logger().Error(err, "error handling archive content in AR")
|
|
h.metrics.incErrors()
|
|
}
|
|
|
|
h.metrics.incFilesProcessed()
|
|
h.metrics.observeFileSize(fileSize)
|
|
}
|
|
}
|
|
}
|