trufflehog/pkg/handlers/archive_test.go

129 lines
2.8 KiB
Go
Raw Normal View History

[refactor] - Create separate handler for non-archive data (#2825) * Remove specialized handler and archive struct and restructure handlers pkg. * Refactor RPM archive handlers to use a library instead of shelling out * make rpm handling context aware * update test * Refactor AR/deb archive handler to use an existing library instead of shelling out * Update tests * Handle non-archive data within the DefaultHandler * make structs and methods private * Remove non-archive data handling within sources * add max size check * add filename and size to context kvp * move skip file check and is binary check before opening file * fix test * preserve existing funcitonality of not handling non-archive files in HandleFile * Handle non-archive data within the DefaultHandler * rebase * Remove non-archive data handling within sources * Adjust check for rpm/deb archive type * add additional deb mime type * add gzip * move diskbuffered rereader setup into handler pkg * remove DiskBuffereReader creation logic within sources * update comment * move rewind closer * reduce log verbosity * add metrics for file handling * add metrics for errors * make defaultBufferSize a const * add metrics for file handling * add metrics for errors * fix tests * add metrics for max archive depth and skipped files * update error * skip symlinks and dirs * update err * Address incompatible reader to openArchive * remove nil check * fix err assignment * Allow git cat-file blob to complete before trying to handle the file * wrap compReader with DiskbufferReader * Allow git cat-file blob to complete before trying to handle the file * updates * use buffer writer * update * refactor * update context pkg * revert stuff * update test * fix test * remove * use correct reader * add metrics for file handling * add metrics for errors * fix tests * rebase * add metrics for errors * add metrics for max archive depth and skipped files * update error * skip symlinks and dirs * update err * fix err assignment * rebase * remove * Update write method in contentWriter interface * Add bufferReadSeekCloser * update name * update comment * fix lint * Remove specialized handler and archive struct and restructure handlers pkg. * Refactor RPM archive handlers to use a library instead of shelling out * make rpm handling context aware * update test * Refactor AR/deb archive handler to use an existing library instead of shelling out * Update tests * add max size check * add filename and size to context kvp * move skip file check and is binary check before opening file * fix test * preserve existing funcitonality of not handling non-archive files in HandleFile * Handle non-archive data within the DefaultHandler * rebase * Remove non-archive data handling within sources * Handle non-archive data within the DefaultHandler * add gzip * move diskbuffered rereader setup into handler pkg * remove DiskBuffereReader creation logic within sources * update comment * move rewind closer * reduce log verbosity * make defaultBufferSize a const * add metrics for file handling * add metrics for errors * fix tests * add metrics for max archive depth and skipped files * update error * skip symlinks and dirs * update err * Address incompatible reader to openArchive * remove nil check * fix err assignment * wrap compReader with DiskbufferReader * Allow git cat-file blob to complete before trying to handle the file * updates * use buffer writer * update * refactor * update context pkg * revert stuff * update test * remove * rebase * go mod tidy * lint check * update metric to ms * update metric * update comments * dont use ptr * update * fix * Remove specialized handler and archive struct and restructure handlers pkg. * Refactor RPM archive handlers to use a library instead of shelling out * make rpm handling context aware * update test * Refactor AR/deb archive handler to use an existing library instead of shelling out * Update tests * add max size check * add filename and size to context kvp * move skip file check and is binary check before opening file * fix test * preserve existing funcitonality of not handling non-archive files in HandleFile * Adjust check for rpm/deb archive type * add additional deb mime type * update comment * go mod tidy * update go mod * Add a buffered file reader * update comments * use Buffered File Readder * return buffer * update * fix * return * go mod tidy * merge * use a shared pool * use sync.Once * reorganzie * remove unused code * fix double init * fix stuff * nil check * reduce allocations * updates * update metrics * updates * reset buffer instead of putting it back * skip binaries * skip * concurrently process diffs * close chan * concurrently enumerate orgs * increase workers * ignore pbix and vsdx files * add metrics for gitparse's Diffchan * fix metric * update metrics * update * fix checks * fix * inc * update * reduce * Create workers to handle binary files * modify workers * updates * add check * delete code * use custom reader * rename struct * add nonarchive handler * fix break * add comments * add tests * refactor * remove log * do not scan rpm links * simplify * rename var * rename * fix benchmark * add buffer * buffer * buffer * handle panic * merge main * merge main * add recover * revert stuff * revert * revert to using reader * fixes * remove * update * fixes * linter * fix test * fix comment * update field name * fix
2024-05-15 20:40:16 +00:00
package handlers
import (
"context"
"io"
"net/http"
"regexp"
"strings"
"testing"
"github.com/stretchr/testify/assert"
logContext "github.com/trufflesecurity/trufflehog/v3/pkg/context"
)
func TestArchiveHandler(t *testing.T) {
tests := map[string]struct {
archiveURL string
expectedChunks int
matchString string
expectErr bool
}{
"gzip-single": {
"https://raw.githubusercontent.com/bill-rich/bad-secrets/master/one-zip.gz",
1,
"AKIAYVP4CIPPH5TNP3SW",
false,
},
"gzip-nested": {
"https://raw.githubusercontent.com/bill-rich/bad-secrets/master/double-zip.gz",
1,
"AKIAYVP4CIPPH5TNP3SW",
false,
},
"gzip-too-deep": {
"https://raw.githubusercontent.com/bill-rich/bad-secrets/master/six-zip.gz",
0,
"",
true,
},
"tar-single": {
"https://raw.githubusercontent.com/bill-rich/bad-secrets/master/one.tar",
1,
"AKIAYVP4CIPPH5TNP3SW",
false,
},
"tar-nested": {
"https://raw.githubusercontent.com/bill-rich/bad-secrets/master/two.tar",
1,
"AKIAYVP4CIPPH5TNP3SW",
false,
},
"tar-too-deep": {
"https://raw.githubusercontent.com/bill-rich/bad-secrets/master/six.tar",
0,
"",
true,
},
"targz-single": {
"https://raw.githubusercontent.com/bill-rich/bad-secrets/master/tar-archive.tar.gz",
1,
"AKIAYVP4CIPPH5TNP3SW",
false,
},
"gzip-large": {
"https://raw.githubusercontent.com/bill-rich/bad-secrets/master/FifteenMB.gz",
1543,
"AKIAYVP4CIPPH5TNP3SW",
false,
},
"zip-single": {
"https://raw.githubusercontent.com/bill-rich/bad-secrets/master/aws-canary-creds.zip",
1,
"AKIAYVP4CIPPH5TNP3SW",
false,
},
}
for name, testCase := range tests {
t.Run(name, func(t *testing.T) {
resp, err := http.Get(testCase.archiveURL)
assert.NoError(t, err)
assert.Equal(t, http.StatusOK, resp.StatusCode)
defer resp.Body.Close()
handler := newArchiveHandler()
newReader, err := newFileReader(resp.Body)
if err != nil {
t.Errorf("error creating reusable reader: %s", err)
}
archiveChan, err := handler.HandleFile(logContext.Background(), newReader)
if testCase.expectErr {
assert.NoError(t, err)
return
}
count := 0
re := regexp.MustCompile(testCase.matchString)
matched := false
for chunk := range archiveChan {
count++
if re.Match(chunk) {
matched = true
}
}
assert.True(t, matched)
assert.Equal(t, testCase.expectedChunks, count)
})
}
}
func TestOpenInvalidArchive(t *testing.T) {
reader := strings.NewReader("invalid archive")
ctx := logContext.AddLogger(context.Background())
handler := archiveHandler{}
rdr, err := newFileReader(io.NopCloser(reader))
assert.NoError(t, err)
defer rdr.Close()
archiveChan := make(chan []byte)
err = handler.openArchive(ctx, 0, rdr, archiveChan)
assert.Error(t, err)
}