From 7273dc905868ea7c7c941527deb38b19300130fc Mon Sep 17 00:00:00 2001 From: Bill Rich Date: Tue, 2 Aug 2022 20:36:21 -0700 Subject: [PATCH] Archive decoder (#683) * Archive decoder * Fix reader handling * Seek error handling * Add tests * Fix extra empty chunk * Sync chunk size --- go.mod | 10 ++ go.sum | 24 ++++ pkg/handlers/archive.go | 159 +++++++++++++++++++++++++++ pkg/handlers/archive_test.go | 85 ++++++++++++++ pkg/handlers/handlers.go | 38 +++++++ pkg/sources/filesystem/filesystem.go | 34 +++++- 6 files changed, 345 insertions(+), 5 deletions(-) create mode 100644 pkg/handlers/archive.go create mode 100644 pkg/handlers/archive_test.go create mode 100644 pkg/handlers/handlers.go diff --git a/go.mod b/go.mod index 17e960b7d..aac956286 100644 --- a/go.mod +++ b/go.mod @@ -31,6 +31,7 @@ require ( github.com/jpillora/overseer v1.1.6 github.com/kylelemons/godebug v1.1.0 github.com/mattn/go-colorable v0.1.12 + github.com/mholt/archiver/v4 v4.0.0-alpha.7 github.com/paulbellamy/ratecounter v0.2.0 github.com/pkg/errors v0.9.1 github.com/razorpay/razorpay-go v0.0.0-20210728161131-0341409a6ab2 @@ -67,8 +68,10 @@ require ( github.com/acomagu/bufpipe v1.0.3 // indirect github.com/alecthomas/template v0.0.0-20190718012654-fb15b899a751 // indirect github.com/alecthomas/units v0.0.0-20211218093645-b94a6e3cc137 // indirect + github.com/andybalholm/brotli v1.0.4 // indirect github.com/davecgh/go-spew v1.1.1 // indirect github.com/dimchansky/utfbom v1.1.1 // indirect + github.com/dsnet/compress v0.0.1 // indirect github.com/emirpasic/gods v1.12.0 // indirect github.com/go-git/gcfg v1.5.0 // indirect github.com/go-git/go-billy/v5 v5.3.1 // indirect @@ -76,6 +79,7 @@ require ( github.com/golang-jwt/jwt/v4 v4.4.1 // indirect github.com/golang/groupcache v0.0.0-20200121045136-8c9f03a8e57e // indirect github.com/golang/protobuf v1.5.2 // indirect + github.com/golang/snappy v0.0.4 // indirect github.com/google/go-cmp v0.5.8 // indirect github.com/google/go-github/v45 v45.2.0 // indirect github.com/google/go-querystring v1.1.0 // indirect @@ -89,10 +93,16 @@ require ( github.com/jmespath/go-jmespath v0.4.0 // indirect github.com/jpillora/s3 v1.1.4 // indirect github.com/kevinburke/ssh_config v0.0.0-20201106050909-4977a11b4351 // indirect + github.com/klauspost/compress v1.15.5 // indirect + github.com/klauspost/pgzip v1.2.5 // indirect github.com/mattn/go-isatty v0.0.14 // indirect github.com/mitchellh/go-homedir v1.1.0 // indirect + github.com/nwaples/rardecode/v2 v2.0.0-beta.2 // indirect + github.com/pierrec/lz4/v4 v4.1.14 // indirect github.com/pkg/diff v0.0.0-20200914180035-5b29258ca4f7 // indirect github.com/pmezard/go-difflib v1.0.0 // indirect + github.com/therootcompany/xz v1.0.1 // indirect + github.com/ulikunitz/xz v0.5.10 // indirect github.com/xanzy/ssh-agent v0.3.0 // indirect github.com/yusufpapurcu/wmi v1.2.2 // indirect go.opencensus.io v0.23.0 // indirect diff --git a/go.sum b/go.sum index b7b758823..4a8352659 100644 --- a/go.sum +++ b/go.sum @@ -91,6 +91,8 @@ github.com/alecthomas/template v0.0.0-20190718012654-fb15b899a751 h1:JYp7IbQjafo github.com/alecthomas/template v0.0.0-20190718012654-fb15b899a751/go.mod h1:LOuyumcjzFXgccqObfd/Ljyb9UuFJ6TxHnclSeseNhc= github.com/alecthomas/units v0.0.0-20211218093645-b94a6e3cc137 h1:s6gZFSlWYmbqAuRjVTiNNhvNRfY2Wxp9nhfyel4rklc= github.com/alecthomas/units v0.0.0-20211218093645-b94a6e3cc137/go.mod h1:OMCwj8VM1Kc9e19TLln2VL61YJF0x1XFtfdL4JdbSyE= +github.com/andybalholm/brotli v1.0.4 h1:V7DdXeJtZscaqfNuAdSRuRFzuiKlHSC/Zh3zl9qY3JY= +github.com/andybalholm/brotli v1.0.4/go.mod h1:fO7iG3H7G2nSZ7m0zPUDn85XEX2GTukHGRSepvi9Eig= github.com/anmitsu/go-shlex v0.0.0-20161002113705-648efa622239 h1:kFOfPq6dUM1hTo4JG6LR5AXSUEsOjtdm0kw0FtQtMJA= github.com/anmitsu/go-shlex v0.0.0-20161002113705-648efa622239/go.mod h1:2FmKhYUyUczH0OGQWaF5ceTx0UBShxjsH6f8oGKYe2c= github.com/antihax/optional v1.0.0/go.mod h1:uupD/76wgC+ih3iEmQUL+0Ugr19nfwCT1kdvxnR2qWY= @@ -129,6 +131,9 @@ github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= github.com/dimchansky/utfbom v1.1.1 h1:vV6w1AhK4VMnhBno/TPVCoK9U/LP0PkLCS9tbxHdi/U= github.com/dimchansky/utfbom v1.1.1/go.mod h1:SxdoEBH5qIqFocHMyGOXVAybYJdr71b1Q/j0mACtrfE= +github.com/dsnet/compress v0.0.1 h1:PlZu0n3Tuv04TzpfPbrnI0HW/YwodEXDS+oPKahKF0Q= +github.com/dsnet/compress v0.0.1/go.mod h1:Aw8dCMJ7RioblQeTqt88akK31OvO8Dhf5JflhBbQEHo= +github.com/dsnet/golib v0.0.0-20171103203638-1ea166775780/go.mod h1:Lj+Z9rebOhdfkVLjJ8T6VcRQv3SXugXy999NBtR9aFY= github.com/emirpasic/gods v1.12.0 h1:QAUIPSaCu4G+POclxeqb3F+WPpdKqFGlw36+yOzGlrg= github.com/emirpasic/gods v1.12.0/go.mod h1:YfzfFFoVP/catgzJb4IKIqXjX78Ha8FMSDh3ymbK86o= github.com/envoyproxy/go-control-plane v0.9.0/go.mod h1:YTl/9mNaCwkRvm6d1a2C3ymFceY/DCBVvsKhRF0iEA4= @@ -210,6 +215,8 @@ github.com/golang/protobuf v1.5.1/go.mod h1:DopwsBzvsk0Fs44TXzsVbJyPhcCPeIwnvohx github.com/golang/protobuf v1.5.2 h1:ROPKBNFfQgOUMifHyP+KYbvpjbdoFNs+aK7DXlji0Tw= github.com/golang/protobuf v1.5.2/go.mod h1:XVQd3VNwM+JqD3oG2Ue2ip4fOMUkwXdXDdiuN0vRsmY= github.com/golang/snappy v0.0.3/go.mod h1:/XxbfmMg8lxefKM7IXC3fBNl/7bRcc72aCRzEWrmP2Q= +github.com/golang/snappy v0.0.4 h1:yAGX7huGHXlcLOEtBnF4w7FQwA26wojNCwOYAEhLjQM= +github.com/golang/snappy v0.0.4/go.mod h1:/XxbfmMg8lxefKM7IXC3fBNl/7bRcc72aCRzEWrmP2Q= github.com/google/btree v0.0.0-20180813153112-4030bb1f1f0c/go.mod h1:lNA+9X1NB3Zf8V7Ke586lFgjr2dZNuvo3lPJSGZ5JPQ= github.com/google/btree v1.0.0/go.mod h1:lNA+9X1NB3Zf8V7Ke586lFgjr2dZNuvo3lPJSGZ5JPQ= github.com/google/go-cmp v0.2.0/go.mod h1:oXzfMopK8JAjlY9xF4vHSVASa0yLyX7SntLO5aqRK0M= @@ -306,6 +313,12 @@ github.com/jstemmer/go-junit-report v0.9.1/go.mod h1:Brl9GWCQeLvo8nXZwPNNblvFj/X github.com/kevinburke/ssh_config v0.0.0-20201106050909-4977a11b4351 h1:DowS9hvgyYSX4TO5NpyC606/Z4SxnNYbT+WX27or6Ck= github.com/kevinburke/ssh_config v0.0.0-20201106050909-4977a11b4351/go.mod h1:CT57kijsi8u/K/BOFA39wgDQJ9CxiF4nAY/ojJ6r6mM= github.com/kisielk/gotool v1.0.0/go.mod h1:XhKaO+MFFWcvkIS/tQcRk01m1F5IRFswLeQ+oQHNcck= +github.com/klauspost/compress v1.4.1/go.mod h1:RyIbtBH6LamlWaDj8nUwkbUhJ87Yi3uG0guNDohfE1A= +github.com/klauspost/compress v1.15.5 h1:qyCLMz2JCrKADihKOh9FxnW3houKeNsp2h5OEz0QSEA= +github.com/klauspost/compress v1.15.5/go.mod h1:PhcZ0MbTNciWF3rruxRgKxI5NkcHHrHUDtV4Yw2GlzU= +github.com/klauspost/cpuid v1.2.0/go.mod h1:Pj4uuM528wm8OyEC2QMXAi2YiTZ96dNQPGgoMS4s3ek= +github.com/klauspost/pgzip v1.2.5 h1:qnWYvvKqedOF2ulHpMG72XQol4ILEJ8k2wwRl/Km8oE= +github.com/klauspost/pgzip v1.2.5/go.mod h1:Ch1tH69qFZu15pkjo5kYi6mth2Zzwzt50oCQKQE9RUs= github.com/konsorten/go-windows-terminal-sequences v1.0.1/go.mod h1:T0+1ngSBFLxvqU3pZ+m/2kptfBszLMUkC4ZK/EgS/cQ= github.com/kr/fs v0.1.0/go.mod h1:FFnZGqtBN9Gxj7eW1uZ42v5BccTP0vu6NEaFoC2HwRg= github.com/kr/pretty v0.1.0/go.mod h1:dAy3ld7l9f0ibDNOQOHHMYYIIbhfbHSm3C4ZsoJORNo= @@ -326,14 +339,20 @@ github.com/mattn/go-colorable v0.1.12/go.mod h1:u5H1YNBxpqRaxsYJYSkiCWKzEfiAb1Gb github.com/mattn/go-isatty v0.0.12/go.mod h1:cbi8OIDigv2wuxKPP5vlRcQ1OAZbq2CE4Kysco4FUpU= github.com/mattn/go-isatty v0.0.14 h1:yVuAays6BHfxijgZPzw+3Zlu5yQgKGP2/hcQbHb7S9Y= github.com/mattn/go-isatty v0.0.14/go.mod h1:7GGIvUiUoEMVVmxf/4nioHXj79iQHKdU27kJ6hsGG94= +github.com/mholt/archiver/v4 v4.0.0-alpha.7 h1:xzByj8G8tj0Oq7ZYYU4+ixL/CVb5ruWCm0EZQ1PjOkE= +github.com/mholt/archiver/v4 v4.0.0-alpha.7/go.mod h1:Fs8qUkO74HHaidabihzYephJH8qmGD/nCP6tE5xC9BM= github.com/mitchellh/go-homedir v1.1.0 h1:lukF9ziXFxDFPkA1vsr5zpc1XuPDn/wFntq5mG+4E0Y= github.com/mitchellh/go-homedir v1.1.0/go.mod h1:SfyaCUpYCn1Vlf4IUYiD9fPX4A5wJrkLzIz1N1q0pr0= github.com/nbio/st v0.0.0-20140626010706-e9e8d9816f32 h1:W6apQkHrMkS0Muv8G/TipAy/FJl/rCYT0+EuS8+Z0z4= github.com/nbio/st v0.0.0-20140626010706-e9e8d9816f32/go.mod h1:9wM+0iRr9ahx58uYLpLIr5fm8diHn0JbqRycJi6w0Ms= github.com/niemeyer/pretty v0.0.0-20200227124842-a10e7caefd8e/go.mod h1:zD1mROLANZcx1PVRCS0qkT7pwLkGfwJo4zjcN/Tysno= +github.com/nwaples/rardecode/v2 v2.0.0-beta.2 h1:e3mzJFJs4k83GXBEiTaQ5HgSc/kOK8q0rDaRO0MPaOk= +github.com/nwaples/rardecode/v2 v2.0.0-beta.2/go.mod h1:yntwv/HfMc/Hbvtq9I19D1n58te3h6KsqCf3GxyfBGY= github.com/op/go-logging v0.0.0-20160315200505-970db520ece7/go.mod h1:HzydrMdWErDVzsI23lYNej1Htcns9BCg93Dk0bBINWk= github.com/paulbellamy/ratecounter v0.2.0 h1:2L/RhJq+HA8gBQImDXtLPrDXK5qAj6ozWVK/zFXVJGs= github.com/paulbellamy/ratecounter v0.2.0/go.mod h1:Hfx1hDpSGoqxkVVpBi/IlYD7kChlfo5C6hzIHwPqfFE= +github.com/pierrec/lz4/v4 v4.1.14 h1:+fL8AQEZtz/ijeNnpduH0bROTu0O3NZAlPjQxGn8LwE= +github.com/pierrec/lz4/v4 v4.1.14/go.mod h1:gZWDp/Ze/IJXGXf23ltt2EXimqmTUXEy0GFuRQyBid4= github.com/pkg/diff v0.0.0-20200914180035-5b29258ca4f7 h1:+/+DxvQaYifJ+grD4klzrS5y+KJXldn/2YTl5JG+vZ8= github.com/pkg/diff v0.0.0-20200914180035-5b29258ca4f7/go.mod h1:zO8QMzTeZd5cpnIkz/Gn6iK0jDfGicM1nynOkkPIl28= github.com/pkg/errors v0.8.1/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0= @@ -377,12 +396,17 @@ github.com/stretchr/testify v1.8.0 h1:pSgiaMZlXftHpm5L7V1+rVB+AZJydKsMxsQBIJw4PK github.com/stretchr/testify v1.8.0/go.mod h1:yNjHg4UonilssWZ8iaSj1OCr/vHnekPRkoO+kdMU+MU= github.com/tailscale/depaware v0.0.0-20210622194025-720c4b409502 h1:34icjjmqJ2HPjrSuJYEkdZ+0ItmGQAQ75cRHIiftIyE= github.com/tailscale/depaware v0.0.0-20210622194025-720c4b409502/go.mod h1:p9lPsd+cx33L3H9nNoecRRxPssFKUwwI50I3pZ0yT+8= +github.com/therootcompany/xz v1.0.1 h1:CmOtsn1CbtmyYiusbfmhmkpAAETj0wBIH6kCYaX+xzw= +github.com/therootcompany/xz v1.0.1/go.mod h1:3K3UH1yCKgBneZYhuQUvJ9HPD19UEXEI0BWbMn8qNMY= github.com/trufflesecurity/gitleaks/v8 v8.6.1-custom9 h1:OvS9aj6Fasot5FaTpSyCV4WNq/8SMov9/bNUMoZFwEI= github.com/trufflesecurity/gitleaks/v8 v8.6.1-custom9/go.mod h1:2iZpX4Epnmx7VK2atbIMEjHW9rivie5RRe0ZhPWUFvM= github.com/trufflesecurity/go-gitdiff v0.7.6-zombies2 h1:srCJzbE3b44+ZIPcgJSfvinHCOQlkMwVghtKf23un6o= github.com/trufflesecurity/go-gitdiff v0.7.6-zombies2/go.mod h1:pKz0X4YzCKZs30BL+weqBIG7mx0jl4tF1uXV9ZyNvrA= github.com/trufflesecurity/overseer v1.1.7-custom5 h1:xu+Fg6fkSRifUPzUCl7N8HmobJ6WGOkIApGnM7mJS6w= github.com/trufflesecurity/overseer v1.1.7-custom5/go.mod h1:nT9w37AiO1Nop2VhVhNfzAFaPjthvxgpDV3XKsxYkcI= +github.com/ulikunitz/xz v0.5.6/go.mod h1:2bypXElzHzzJZwzH67Y6wb67pO62Rzfn7BSiF4ABRW8= +github.com/ulikunitz/xz v0.5.10 h1:t92gobL9l3HE202wg3rlk19F6X+JOxl9BBrCCMYEYd8= +github.com/ulikunitz/xz v0.5.10/go.mod h1:nbz6k7qbPmH4IRqmfOplQw/tblSgqTqBwxkY0oWt/14= github.com/xanzy/go-gitlab v0.69.0 h1:sPci9xHzlX+lcJvPqNu3y3BQpePuR2R694Bal4AeyB8= github.com/xanzy/go-gitlab v0.69.0/go.mod h1:o4yExCtdaqlM8YGdDJWuZoBmfxBsmA9TPEjs9mx1UO4= github.com/xanzy/ssh-agent v0.3.0 h1:wUMzuKtKilRgBAD1sUb8gOwwRr2FGoBVumcjoOACClI= diff --git a/pkg/handlers/archive.go b/pkg/handlers/archive.go new file mode 100644 index 000000000..699ecdb7c --- /dev/null +++ b/pkg/handlers/archive.go @@ -0,0 +1,159 @@ +package handlers + +import ( + "bytes" + "context" + "errors" + "fmt" + "io" + + "github.com/mholt/archiver/v4" + log "github.com/sirupsen/logrus" +) + +type ctxKey int + +const ( + depthKey ctxKey = iota +) + +var ( + maxDepth = 5 +) + +// Archive is a handler for extracting and decompressing archives. +type Archive struct { + maxSize int + size int +} + +// New sets a default maximum size and current size counter. +func (d *Archive) New() { + d.maxSize = 20 * 1024 * 1024 // 20MB + d.size = 0 +} + +// FromFile extracts the files from an archive. +func (d *Archive) FromFile(data io.Reader) chan ([]byte) { + ctx := context.Background() + archiveChan := make(chan ([]byte), 512) + go func() { + defer close(archiveChan) + err := d.openArchive(ctx, 0, data, archiveChan) + if err != nil { + if errors.Is(err, archiver.ErrNoMatch) { + return + } + log.WithError(err).Debug("Error unarchiving chunk.") + } + }() + return archiveChan +} + +// openArchive takes a reader and extracts the contents up to the maximum depth. +func (d *Archive) openArchive(ctx context.Context, depth int, reader io.Reader, archiveChan chan ([]byte)) error { + if depth >= maxDepth { + return fmt.Errorf("max archive depth reached") + } + format, reader, err := archiver.Identify("", reader) + if err != nil { + if errors.Is(err, archiver.ErrNoMatch) && depth > 0 { + chunkSize := 10 * 1024 + for { + chunk := make([]byte, chunkSize) + n, _ := reader.Read(chunk) + archiveChan <- chunk + if n < chunkSize { + break + } + } + return nil + } + return err + } + switch archive := format.(type) { + case archiver.Extractor: + err := archive.Extract(context.WithValue(ctx, depthKey, depth+1), reader, nil, d.extractorHandler(archiveChan)) + if err != nil { + return err + } + return nil + case archiver.Decompressor: + compReader, err := archive.OpenReader(reader) + if err != nil { + return err + } + fileBytes, err := d.ReadToMax(compReader) + if err != nil { + return err + } + newReader := bytes.NewReader(fileBytes) + return d.openArchive(ctx, depth+1, newReader, archiveChan) + } + return fmt.Errorf("Unknown archive type: %s", format.Name()) +} + +// IsFiletype returns true if the provided reader is an archive. +func (d *Archive) IsFiletype(reader io.Reader) (io.Reader, bool) { + format, readerB, err := archiver.Identify("", reader) + if err != nil { + return readerB, false + } + switch format.(type) { + case archiver.Extractor: + return readerB, true + case archiver.Decompressor: + return readerB, true + } + return readerB, false +} + +// extractorHandler is applied to each file in an archiver.Extractor file. +func (d *Archive) extractorHandler(archiveChan chan ([]byte)) func(context.Context, archiver.File) error { + return func(ctx context.Context, f archiver.File) error { + log.WithField("filename", f.Name()).Trace("Handling extracted file.") + depth := 0 + if ctxDepth, ok := ctx.Value(depthKey).(int); ok { + depth = ctxDepth + } + + fReader, err := f.Open() + if err != nil { + return err + } + fileBytes, err := d.ReadToMax(fReader) + if err != nil { + return err + } + fileContent := bytes.NewReader(fileBytes) + + err = d.openArchive(ctx, depth, fileContent, archiveChan) + if err != nil { + return err + } + return nil + } +} + +// ReadToMax reads up to the max size. +func (d *Archive) ReadToMax(reader io.Reader) ([]byte, error) { + fileContent := bytes.Buffer{} + log.Tracef("Remaining buffer capacity: %d", d.maxSize-d.size) + for i := 0; i <= d.maxSize/512; i++ { + fileChunk := make([]byte, 512) + bRead, err := reader.Read(fileChunk) + if err != nil && !errors.Is(err, io.EOF) { + return []byte{}, err + } + d.size += bRead + fileContent.Write(fileChunk[0:bRead]) + if bRead < 512 { + break + } + if d.size >= d.maxSize && bRead == 512 { + log.Debug("Max archive size reached.") + break + } + } + return fileContent.Bytes(), nil +} diff --git a/pkg/handlers/archive_test.go b/pkg/handlers/archive_test.go new file mode 100644 index 000000000..96a68472c --- /dev/null +++ b/pkg/handlers/archive_test.go @@ -0,0 +1,85 @@ +package handlers + +import ( + "net/http" + "regexp" + "testing" +) + +func TestArchiveHandler(t *testing.T) { + tests := map[string]struct { + archiveURL string + expectedChunks int + matchString string + }{ + "gzip-single": { + "https://raw.githubusercontent.com/bill-rich/bad-secrets/master/one-zip.gz", + 1, + "AKIAYVP4CIPPH5TNP3SW", + }, + "gzip-nested": { + "https://raw.githubusercontent.com/bill-rich/bad-secrets/master/double-zip.gz", + 1, + "AKIAYVP4CIPPH5TNP3SW", + }, + "gzip-too-deep": { + "https://raw.githubusercontent.com/bill-rich/bad-secrets/master/six-zip.gz", + 0, + "", + }, + "tar-single": { + "https://raw.githubusercontent.com/bill-rich/bad-secrets/master/one.tar", + 1, + "AKIAYVP4CIPPH5TNP3SW", + }, + "tar-nested": { + "https://raw.githubusercontent.com/bill-rich/bad-secrets/master/two.tar", + 1, + "AKIAYVP4CIPPH5TNP3SW", + }, + "tar-too-deep": { + "https://raw.githubusercontent.com/bill-rich/bad-secrets/master/six.tar", + 0, + "", + }, + "targz-single": { + "https://raw.githubusercontent.com/bill-rich/bad-secrets/master/tar-archive.tar.gz", + 1, + "AKIAYVP4CIPPH5TNP3SW", + }, + "gzip-large": { + "https://raw.githubusercontent.com/bill-rich/bad-secrets/master/FifteenMB.gz", + 1543, + "AKIAYVP4CIPPH5TNP3SW", + }, + } + + for name, testCase := range tests { + resp, err := http.Get(testCase.archiveURL) + if err != nil || resp.StatusCode != http.StatusOK { + t.Error(err) + } + defer resp.Body.Close() + + archive := Archive{} + archive.New() + + archiveChan := archive.FromFile(resp.Body) + + count := 0 + re := regexp.MustCompile(testCase.matchString) + matched := false + for chunk := range archiveChan { + count++ + if re.Match(chunk) { + matched = true + } + } + if !matched && len(testCase.matchString) > 0 { + t.Errorf("%s: Expected string not found in archive.", name) + } + if count != testCase.expectedChunks { + t.Errorf("%s: Unexpected number of chunks. Got %d, expected: %d", name, count, testCase.expectedChunks) + } + } +} diff --git a/pkg/handlers/handlers.go b/pkg/handlers/handlers.go new file mode 100644 index 000000000..2d120ebe2 --- /dev/null +++ b/pkg/handlers/handlers.go @@ -0,0 +1,38 @@ +package handlers + +import ( + "io" + + "github.com/trufflesecurity/trufflehog/v3/pkg/sources" +) + +func DefaultHandlers() []Handler { + return []Handler{ + &Archive{}, + } +} + +type Handler interface { + FromFile(io.Reader) chan ([]byte) + IsFiletype(io.Reader) (io.Reader, bool) + New() +} + +func HandleFile(file io.Reader, chunkSkel *sources.Chunk, chunksChan chan (*sources.Chunk)) bool { + for _, handler := range DefaultHandlers() { + handler.New() + var isType bool + file, isType = handler.IsFiletype(file) + if !isType { + continue + } + handlerChan := handler.FromFile(file) + for data := range handlerChan { + chunk := *chunkSkel + chunk.Data = data + chunksChan <- &chunk + } + return true + } + return false +} diff --git a/pkg/sources/filesystem/filesystem.go b/pkg/sources/filesystem/filesystem.go index af5feabe0..ee2bdd1ae 100644 --- a/pkg/sources/filesystem/filesystem.go +++ b/pkg/sources/filesystem/filesystem.go @@ -11,21 +11,22 @@ import ( "github.com/go-errors/errors" log "github.com/sirupsen/logrus" - "google.golang.org/protobuf/proto" - "google.golang.org/protobuf/types/known/anypb" - "github.com/trufflesecurity/trufflehog/v3/pkg/common" + "github.com/trufflesecurity/trufflehog/v3/pkg/handlers" "github.com/trufflesecurity/trufflehog/v3/pkg/pb/source_metadatapb" "github.com/trufflesecurity/trufflehog/v3/pkg/pb/sourcespb" "github.com/trufflesecurity/trufflehog/v3/pkg/sanitizer" "github.com/trufflesecurity/trufflehog/v3/pkg/sources" + "google.golang.org/protobuf/proto" + "google.golang.org/protobuf/types/known/anypb" ) const ( // These buffer sizes are mainly driven by our largest credential size, which is GCP @ ~2.25KB. // Having a peek size larger than that ensures that we have complete credential coverage in our chunks. - BufferSize = 10 * 1024 // 10KB - PeekSize = 3 * 1024 // 3KB + BufferSize = 10 * 1024 // 10KB + PeekSize = 3 * 1024 // 3KB + MaxArchiveSize = 20 * 1024 * 1024 // 20MB ) type Source struct { @@ -112,7 +113,30 @@ func (s *Source) Chunks(ctx context.Context, chunksChan chan *sources.Chunk) err } defer inputFile.Close() + chunkSkel := &sources.Chunk{ + SourceType: s.Type(), + SourceName: s.name, + SourceID: s.SourceID(), + SourceMetadata: &source_metadatapb.MetaData{ + Data: &source_metadatapb.MetaData_Filesystem{ + Filesystem: &source_metadatapb.Filesystem{ + File: sanitizer.UTF8(path), + }, + }, + }, + Verify: s.verify, + } + if handlers.HandleFile(inputFile, chunkSkel, chunksChan) { + return nil + } + + _, err = inputFile.Seek(0, io.SeekStart) + if err != nil { + return err + } + reader := bufio.NewReaderSize(bufio.NewReader(inputFile), BufferSize) + firstChunk := true for { if done {