trufflehog/pkg/handlers/ar.go
ahrav ead9dd5748
[refactor] - Create separate handler for non-archive data (#2825)
* Remove specialized handler and archive struct and restructure handlers pkg.

* Refactor RPM archive handlers to use a library instead of shelling out

* make rpm handling context aware

* update test

* Refactor AR/deb archive handler to use an existing library instead of shelling out

* Update tests

* Handle non-archive data within the DefaultHandler

* make structs and methods private

* Remove non-archive data handling within sources

* add max size check

* add filename and size to context kvp

* move skip file check and is binary check before opening file

* fix test

* preserve existing funcitonality of not handling non-archive files in HandleFile

* Handle non-archive data within the DefaultHandler

* rebase

* Remove non-archive data handling within sources

* Adjust check for rpm/deb archive type

* add additional deb mime type

* add gzip

* move diskbuffered rereader setup into handler pkg

* remove DiskBuffereReader creation logic within sources

* update comment

* move rewind closer

* reduce log verbosity

* add metrics for file handling

* add metrics for errors

* make defaultBufferSize a const

* add metrics for file handling

* add metrics for errors

* fix tests

* add metrics for max archive depth and skipped files

* update error

* skip symlinks and dirs

* update err

* Address incompatible reader to openArchive

* remove nil check

* fix err assignment

* Allow git cat-file blob to complete before trying to handle the file

* wrap compReader with DiskbufferReader

* Allow git cat-file blob to complete before trying to handle the file

* updates

* use buffer writer

* update

* refactor

* update context pkg

* revert stuff

* update test

* fix test

* remove

* use correct reader

* add metrics for file handling

* add metrics for errors

* fix tests

* rebase

* add metrics for errors

* add metrics for max archive depth and skipped files

* update error

* skip symlinks and dirs

* update err

* fix err assignment

* rebase

* remove

* Update write method in contentWriter interface

* Add bufferReadSeekCloser

* update name

* update comment

* fix lint

* Remove specialized handler and archive struct and restructure handlers pkg.

* Refactor RPM archive handlers to use a library instead of shelling out

* make rpm handling context aware

* update test

* Refactor AR/deb archive handler to use an existing library instead of shelling out

* Update tests

* add max size check

* add filename and size to context kvp

* move skip file check and is binary check before opening file

* fix test

* preserve existing funcitonality of not handling non-archive files in HandleFile

* Handle non-archive data within the DefaultHandler

* rebase

* Remove non-archive data handling within sources

* Handle non-archive data within the DefaultHandler

* add gzip

* move diskbuffered rereader setup into handler pkg

* remove DiskBuffereReader creation logic within sources

* update comment

* move rewind closer

* reduce log verbosity

* make defaultBufferSize a const

* add metrics for file handling

* add metrics for errors

* fix tests

* add metrics for max archive depth and skipped files

* update error

* skip symlinks and dirs

* update err

* Address incompatible reader to openArchive

* remove nil check

* fix err assignment

* wrap compReader with DiskbufferReader

* Allow git cat-file blob to complete before trying to handle the file

* updates

* use buffer writer

* update

* refactor

* update context pkg

* revert stuff

* update test

* remove

* rebase

* go mod tidy

* lint check

* update metric to ms

* update metric

* update comments

* dont use ptr

* update

* fix

* Remove specialized handler and archive struct and restructure handlers pkg.

* Refactor RPM archive handlers to use a library instead of shelling out

* make rpm handling context aware

* update test

* Refactor AR/deb archive handler to use an existing library instead of shelling out

* Update tests

* add max size check

* add filename and size to context kvp

* move skip file check and is binary check before opening file

* fix test

* preserve existing funcitonality of not handling non-archive files in HandleFile

* Adjust check for rpm/deb archive type

* add additional deb mime type

* update comment

* go mod tidy

* update go mod

* Add a buffered file reader

* update comments

* use Buffered File Readder

* return buffer

* update

* fix

* return

* go mod tidy

* merge

* use a shared pool

* use sync.Once

* reorganzie

* remove unused code

* fix double init

* fix stuff

* nil check

* reduce allocations

* updates

* update metrics

* updates

* reset buffer instead of putting it back

* skip binaries

* skip

* concurrently process diffs

* close chan

* concurrently enumerate orgs

* increase workers

* ignore pbix and vsdx files

* add metrics for gitparse's Diffchan

* fix metric

* update metrics

* update

* fix checks

* fix

* inc

* update

* reduce

* Create workers to handle binary files

* modify workers

* updates

* add check

* delete code

* use custom reader

* rename struct

* add nonarchive handler

* fix break

* add comments

* add tests

* refactor

* remove log

* do not scan rpm links

* simplify

* rename var

* rename

* fix benchmark

* add buffer

* buffer

* buffer

* handle panic

* merge main

* merge main

* add recover

* revert stuff

* revert

* revert to using reader

* fixes

* remove

* update

* fixes

* linter

* fix test

* fix comment

* update field name

* fix
2024-05-15 13:40:16 -07:00

95 lines
2.4 KiB
Go

package handlers
import (
"errors"
"fmt"
"io"
"time"
"pault.ag/go/debian/deb"
logContext "github.com/trufflesecurity/trufflehog/v3/pkg/context"
)
// arHandler handles AR archive formats.
type arHandler struct{ *defaultHandler }
// newARHandler creates an arHandler.
func newARHandler() *arHandler {
return &arHandler{defaultHandler: newDefaultHandler(arHandlerType)}
}
// HandleFile processes AR formatted files. This function needs to be implemented to extract or
// manage data from AR files according to specific requirements.
func (h *arHandler) HandleFile(ctx logContext.Context, input fileReader) (chan []byte, error) {
archiveChan := make(chan []byte, defaultBufferSize)
go func() {
ctx, cancel := logContext.WithTimeout(ctx, maxTimeout)
defer cancel()
defer close(archiveChan)
// Update the metrics for the file processing.
start := time.Now()
var err error
defer func() {
h.measureLatencyAndHandleErrors(start, err)
h.metrics.incFilesProcessed()
}()
// Defer a panic recovery to handle any panics that occur during the AR processing.
defer func() {
if r := recover(); r != nil {
// Return the panic as an error.
if e, ok := r.(error); ok {
err = e
} else {
err = fmt.Errorf("panic occurred: %v", r)
}
ctx.Logger().Error(err, "Panic occurred when reading ar archive")
}
}()
var arReader *deb.Ar
arReader, err = deb.LoadAr(input)
if err != nil {
ctx.Logger().Error(err, "error reading AR")
return
}
if err = h.processARFiles(ctx, arReader, archiveChan); err != nil {
ctx.Logger().Error(err, "error processing AR files")
}
}()
return archiveChan, nil
}
func (h *arHandler) processARFiles(ctx logContext.Context, reader *deb.Ar, archiveChan chan []byte) error {
for {
select {
case <-ctx.Done():
return ctx.Err()
default:
arEntry, err := reader.Next()
if err != nil {
if errors.Is(err, io.EOF) {
ctx.Logger().V(3).Info("AR archive fully processed")
return nil
}
return fmt.Errorf("error reading AR payload: %w", err)
}
fileSize := arEntry.Size
fileCtx := logContext.WithValues(ctx, "filename", arEntry.Name, "size", fileSize)
if err := h.handleNonArchiveContent(fileCtx, arEntry.Data, archiveChan); err != nil {
fileCtx.Logger().Error(err, "error handling archive content in AR")
h.metrics.incErrors()
}
h.metrics.incFilesProcessed()
h.metrics.observeFileSize(fileSize)
}
}
}