Archive decoder (#683)

* Archive decoder

* Fix reader handling

* Seek error handling

* Add tests

* Fix extra empty chunk

* Sync chunk size
This commit is contained in:
Bill Rich 2022-08-02 20:36:21 -07:00 committed by GitHub
parent 047e2b4607
commit 7273dc9058
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
6 changed files with 345 additions and 5 deletions

10
go.mod
View file

@ -31,6 +31,7 @@ require (
github.com/jpillora/overseer v1.1.6
github.com/kylelemons/godebug v1.1.0
github.com/mattn/go-colorable v0.1.12
github.com/mholt/archiver/v4 v4.0.0-alpha.7
github.com/paulbellamy/ratecounter v0.2.0
github.com/pkg/errors v0.9.1
github.com/razorpay/razorpay-go v0.0.0-20210728161131-0341409a6ab2
@ -67,8 +68,10 @@ require (
github.com/acomagu/bufpipe v1.0.3 // indirect
github.com/alecthomas/template v0.0.0-20190718012654-fb15b899a751 // indirect
github.com/alecthomas/units v0.0.0-20211218093645-b94a6e3cc137 // indirect
github.com/andybalholm/brotli v1.0.4 // indirect
github.com/davecgh/go-spew v1.1.1 // indirect
github.com/dimchansky/utfbom v1.1.1 // indirect
github.com/dsnet/compress v0.0.1 // indirect
github.com/emirpasic/gods v1.12.0 // indirect
github.com/go-git/gcfg v1.5.0 // indirect
github.com/go-git/go-billy/v5 v5.3.1 // indirect
@ -76,6 +79,7 @@ require (
github.com/golang-jwt/jwt/v4 v4.4.1 // indirect
github.com/golang/groupcache v0.0.0-20200121045136-8c9f03a8e57e // indirect
github.com/golang/protobuf v1.5.2 // indirect
github.com/golang/snappy v0.0.4 // indirect
github.com/google/go-cmp v0.5.8 // indirect
github.com/google/go-github/v45 v45.2.0 // indirect
github.com/google/go-querystring v1.1.0 // indirect
@ -89,10 +93,16 @@ require (
github.com/jmespath/go-jmespath v0.4.0 // indirect
github.com/jpillora/s3 v1.1.4 // indirect
github.com/kevinburke/ssh_config v0.0.0-20201106050909-4977a11b4351 // indirect
github.com/klauspost/compress v1.15.5 // indirect
github.com/klauspost/pgzip v1.2.5 // indirect
github.com/mattn/go-isatty v0.0.14 // indirect
github.com/mitchellh/go-homedir v1.1.0 // indirect
github.com/nwaples/rardecode/v2 v2.0.0-beta.2 // indirect
github.com/pierrec/lz4/v4 v4.1.14 // indirect
github.com/pkg/diff v0.0.0-20200914180035-5b29258ca4f7 // indirect
github.com/pmezard/go-difflib v1.0.0 // indirect
github.com/therootcompany/xz v1.0.1 // indirect
github.com/ulikunitz/xz v0.5.10 // indirect
github.com/xanzy/ssh-agent v0.3.0 // indirect
github.com/yusufpapurcu/wmi v1.2.2 // indirect
go.opencensus.io v0.23.0 // indirect

24
go.sum
View file

@ -91,6 +91,8 @@ github.com/alecthomas/template v0.0.0-20190718012654-fb15b899a751 h1:JYp7IbQjafo
github.com/alecthomas/template v0.0.0-20190718012654-fb15b899a751/go.mod h1:LOuyumcjzFXgccqObfd/Ljyb9UuFJ6TxHnclSeseNhc=
github.com/alecthomas/units v0.0.0-20211218093645-b94a6e3cc137 h1:s6gZFSlWYmbqAuRjVTiNNhvNRfY2Wxp9nhfyel4rklc=
github.com/alecthomas/units v0.0.0-20211218093645-b94a6e3cc137/go.mod h1:OMCwj8VM1Kc9e19TLln2VL61YJF0x1XFtfdL4JdbSyE=
github.com/andybalholm/brotli v1.0.4 h1:V7DdXeJtZscaqfNuAdSRuRFzuiKlHSC/Zh3zl9qY3JY=
github.com/andybalholm/brotli v1.0.4/go.mod h1:fO7iG3H7G2nSZ7m0zPUDn85XEX2GTukHGRSepvi9Eig=
github.com/anmitsu/go-shlex v0.0.0-20161002113705-648efa622239 h1:kFOfPq6dUM1hTo4JG6LR5AXSUEsOjtdm0kw0FtQtMJA=
github.com/anmitsu/go-shlex v0.0.0-20161002113705-648efa622239/go.mod h1:2FmKhYUyUczH0OGQWaF5ceTx0UBShxjsH6f8oGKYe2c=
github.com/antihax/optional v1.0.0/go.mod h1:uupD/76wgC+ih3iEmQUL+0Ugr19nfwCT1kdvxnR2qWY=
@ -129,6 +131,9 @@ github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c
github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
github.com/dimchansky/utfbom v1.1.1 h1:vV6w1AhK4VMnhBno/TPVCoK9U/LP0PkLCS9tbxHdi/U=
github.com/dimchansky/utfbom v1.1.1/go.mod h1:SxdoEBH5qIqFocHMyGOXVAybYJdr71b1Q/j0mACtrfE=
github.com/dsnet/compress v0.0.1 h1:PlZu0n3Tuv04TzpfPbrnI0HW/YwodEXDS+oPKahKF0Q=
github.com/dsnet/compress v0.0.1/go.mod h1:Aw8dCMJ7RioblQeTqt88akK31OvO8Dhf5JflhBbQEHo=
github.com/dsnet/golib v0.0.0-20171103203638-1ea166775780/go.mod h1:Lj+Z9rebOhdfkVLjJ8T6VcRQv3SXugXy999NBtR9aFY=
github.com/emirpasic/gods v1.12.0 h1:QAUIPSaCu4G+POclxeqb3F+WPpdKqFGlw36+yOzGlrg=
github.com/emirpasic/gods v1.12.0/go.mod h1:YfzfFFoVP/catgzJb4IKIqXjX78Ha8FMSDh3ymbK86o=
github.com/envoyproxy/go-control-plane v0.9.0/go.mod h1:YTl/9mNaCwkRvm6d1a2C3ymFceY/DCBVvsKhRF0iEA4=
@ -210,6 +215,8 @@ github.com/golang/protobuf v1.5.1/go.mod h1:DopwsBzvsk0Fs44TXzsVbJyPhcCPeIwnvohx
github.com/golang/protobuf v1.5.2 h1:ROPKBNFfQgOUMifHyP+KYbvpjbdoFNs+aK7DXlji0Tw=
github.com/golang/protobuf v1.5.2/go.mod h1:XVQd3VNwM+JqD3oG2Ue2ip4fOMUkwXdXDdiuN0vRsmY=
github.com/golang/snappy v0.0.3/go.mod h1:/XxbfmMg8lxefKM7IXC3fBNl/7bRcc72aCRzEWrmP2Q=
github.com/golang/snappy v0.0.4 h1:yAGX7huGHXlcLOEtBnF4w7FQwA26wojNCwOYAEhLjQM=
github.com/golang/snappy v0.0.4/go.mod h1:/XxbfmMg8lxefKM7IXC3fBNl/7bRcc72aCRzEWrmP2Q=
github.com/google/btree v0.0.0-20180813153112-4030bb1f1f0c/go.mod h1:lNA+9X1NB3Zf8V7Ke586lFgjr2dZNuvo3lPJSGZ5JPQ=
github.com/google/btree v1.0.0/go.mod h1:lNA+9X1NB3Zf8V7Ke586lFgjr2dZNuvo3lPJSGZ5JPQ=
github.com/google/go-cmp v0.2.0/go.mod h1:oXzfMopK8JAjlY9xF4vHSVASa0yLyX7SntLO5aqRK0M=
@ -306,6 +313,12 @@ github.com/jstemmer/go-junit-report v0.9.1/go.mod h1:Brl9GWCQeLvo8nXZwPNNblvFj/X
github.com/kevinburke/ssh_config v0.0.0-20201106050909-4977a11b4351 h1:DowS9hvgyYSX4TO5NpyC606/Z4SxnNYbT+WX27or6Ck=
github.com/kevinburke/ssh_config v0.0.0-20201106050909-4977a11b4351/go.mod h1:CT57kijsi8u/K/BOFA39wgDQJ9CxiF4nAY/ojJ6r6mM=
github.com/kisielk/gotool v1.0.0/go.mod h1:XhKaO+MFFWcvkIS/tQcRk01m1F5IRFswLeQ+oQHNcck=
github.com/klauspost/compress v1.4.1/go.mod h1:RyIbtBH6LamlWaDj8nUwkbUhJ87Yi3uG0guNDohfE1A=
github.com/klauspost/compress v1.15.5 h1:qyCLMz2JCrKADihKOh9FxnW3houKeNsp2h5OEz0QSEA=
github.com/klauspost/compress v1.15.5/go.mod h1:PhcZ0MbTNciWF3rruxRgKxI5NkcHHrHUDtV4Yw2GlzU=
github.com/klauspost/cpuid v1.2.0/go.mod h1:Pj4uuM528wm8OyEC2QMXAi2YiTZ96dNQPGgoMS4s3ek=
github.com/klauspost/pgzip v1.2.5 h1:qnWYvvKqedOF2ulHpMG72XQol4ILEJ8k2wwRl/Km8oE=
github.com/klauspost/pgzip v1.2.5/go.mod h1:Ch1tH69qFZu15pkjo5kYi6mth2Zzwzt50oCQKQE9RUs=
github.com/konsorten/go-windows-terminal-sequences v1.0.1/go.mod h1:T0+1ngSBFLxvqU3pZ+m/2kptfBszLMUkC4ZK/EgS/cQ=
github.com/kr/fs v0.1.0/go.mod h1:FFnZGqtBN9Gxj7eW1uZ42v5BccTP0vu6NEaFoC2HwRg=
github.com/kr/pretty v0.1.0/go.mod h1:dAy3ld7l9f0ibDNOQOHHMYYIIbhfbHSm3C4ZsoJORNo=
@ -326,14 +339,20 @@ github.com/mattn/go-colorable v0.1.12/go.mod h1:u5H1YNBxpqRaxsYJYSkiCWKzEfiAb1Gb
github.com/mattn/go-isatty v0.0.12/go.mod h1:cbi8OIDigv2wuxKPP5vlRcQ1OAZbq2CE4Kysco4FUpU=
github.com/mattn/go-isatty v0.0.14 h1:yVuAays6BHfxijgZPzw+3Zlu5yQgKGP2/hcQbHb7S9Y=
github.com/mattn/go-isatty v0.0.14/go.mod h1:7GGIvUiUoEMVVmxf/4nioHXj79iQHKdU27kJ6hsGG94=
github.com/mholt/archiver/v4 v4.0.0-alpha.7 h1:xzByj8G8tj0Oq7ZYYU4+ixL/CVb5ruWCm0EZQ1PjOkE=
github.com/mholt/archiver/v4 v4.0.0-alpha.7/go.mod h1:Fs8qUkO74HHaidabihzYephJH8qmGD/nCP6tE5xC9BM=
github.com/mitchellh/go-homedir v1.1.0 h1:lukF9ziXFxDFPkA1vsr5zpc1XuPDn/wFntq5mG+4E0Y=
github.com/mitchellh/go-homedir v1.1.0/go.mod h1:SfyaCUpYCn1Vlf4IUYiD9fPX4A5wJrkLzIz1N1q0pr0=
github.com/nbio/st v0.0.0-20140626010706-e9e8d9816f32 h1:W6apQkHrMkS0Muv8G/TipAy/FJl/rCYT0+EuS8+Z0z4=
github.com/nbio/st v0.0.0-20140626010706-e9e8d9816f32/go.mod h1:9wM+0iRr9ahx58uYLpLIr5fm8diHn0JbqRycJi6w0Ms=
github.com/niemeyer/pretty v0.0.0-20200227124842-a10e7caefd8e/go.mod h1:zD1mROLANZcx1PVRCS0qkT7pwLkGfwJo4zjcN/Tysno=
github.com/nwaples/rardecode/v2 v2.0.0-beta.2 h1:e3mzJFJs4k83GXBEiTaQ5HgSc/kOK8q0rDaRO0MPaOk=
github.com/nwaples/rardecode/v2 v2.0.0-beta.2/go.mod h1:yntwv/HfMc/Hbvtq9I19D1n58te3h6KsqCf3GxyfBGY=
github.com/op/go-logging v0.0.0-20160315200505-970db520ece7/go.mod h1:HzydrMdWErDVzsI23lYNej1Htcns9BCg93Dk0bBINWk=
github.com/paulbellamy/ratecounter v0.2.0 h1:2L/RhJq+HA8gBQImDXtLPrDXK5qAj6ozWVK/zFXVJGs=
github.com/paulbellamy/ratecounter v0.2.0/go.mod h1:Hfx1hDpSGoqxkVVpBi/IlYD7kChlfo5C6hzIHwPqfFE=
github.com/pierrec/lz4/v4 v4.1.14 h1:+fL8AQEZtz/ijeNnpduH0bROTu0O3NZAlPjQxGn8LwE=
github.com/pierrec/lz4/v4 v4.1.14/go.mod h1:gZWDp/Ze/IJXGXf23ltt2EXimqmTUXEy0GFuRQyBid4=
github.com/pkg/diff v0.0.0-20200914180035-5b29258ca4f7 h1:+/+DxvQaYifJ+grD4klzrS5y+KJXldn/2YTl5JG+vZ8=
github.com/pkg/diff v0.0.0-20200914180035-5b29258ca4f7/go.mod h1:zO8QMzTeZd5cpnIkz/Gn6iK0jDfGicM1nynOkkPIl28=
github.com/pkg/errors v0.8.1/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0=
@ -377,12 +396,17 @@ github.com/stretchr/testify v1.8.0 h1:pSgiaMZlXftHpm5L7V1+rVB+AZJydKsMxsQBIJw4PK
github.com/stretchr/testify v1.8.0/go.mod h1:yNjHg4UonilssWZ8iaSj1OCr/vHnekPRkoO+kdMU+MU=
github.com/tailscale/depaware v0.0.0-20210622194025-720c4b409502 h1:34icjjmqJ2HPjrSuJYEkdZ+0ItmGQAQ75cRHIiftIyE=
github.com/tailscale/depaware v0.0.0-20210622194025-720c4b409502/go.mod h1:p9lPsd+cx33L3H9nNoecRRxPssFKUwwI50I3pZ0yT+8=
github.com/therootcompany/xz v1.0.1 h1:CmOtsn1CbtmyYiusbfmhmkpAAETj0wBIH6kCYaX+xzw=
github.com/therootcompany/xz v1.0.1/go.mod h1:3K3UH1yCKgBneZYhuQUvJ9HPD19UEXEI0BWbMn8qNMY=
github.com/trufflesecurity/gitleaks/v8 v8.6.1-custom9 h1:OvS9aj6Fasot5FaTpSyCV4WNq/8SMov9/bNUMoZFwEI=
github.com/trufflesecurity/gitleaks/v8 v8.6.1-custom9/go.mod h1:2iZpX4Epnmx7VK2atbIMEjHW9rivie5RRe0ZhPWUFvM=
github.com/trufflesecurity/go-gitdiff v0.7.6-zombies2 h1:srCJzbE3b44+ZIPcgJSfvinHCOQlkMwVghtKf23un6o=
github.com/trufflesecurity/go-gitdiff v0.7.6-zombies2/go.mod h1:pKz0X4YzCKZs30BL+weqBIG7mx0jl4tF1uXV9ZyNvrA=
github.com/trufflesecurity/overseer v1.1.7-custom5 h1:xu+Fg6fkSRifUPzUCl7N8HmobJ6WGOkIApGnM7mJS6w=
github.com/trufflesecurity/overseer v1.1.7-custom5/go.mod h1:nT9w37AiO1Nop2VhVhNfzAFaPjthvxgpDV3XKsxYkcI=
github.com/ulikunitz/xz v0.5.6/go.mod h1:2bypXElzHzzJZwzH67Y6wb67pO62Rzfn7BSiF4ABRW8=
github.com/ulikunitz/xz v0.5.10 h1:t92gobL9l3HE202wg3rlk19F6X+JOxl9BBrCCMYEYd8=
github.com/ulikunitz/xz v0.5.10/go.mod h1:nbz6k7qbPmH4IRqmfOplQw/tblSgqTqBwxkY0oWt/14=
github.com/xanzy/go-gitlab v0.69.0 h1:sPci9xHzlX+lcJvPqNu3y3BQpePuR2R694Bal4AeyB8=
github.com/xanzy/go-gitlab v0.69.0/go.mod h1:o4yExCtdaqlM8YGdDJWuZoBmfxBsmA9TPEjs9mx1UO4=
github.com/xanzy/ssh-agent v0.3.0 h1:wUMzuKtKilRgBAD1sUb8gOwwRr2FGoBVumcjoOACClI=

159
pkg/handlers/archive.go Normal file
View file

@ -0,0 +1,159 @@
package handlers
import (
"bytes"
"context"
"errors"
"fmt"
"io"
"github.com/mholt/archiver/v4"
log "github.com/sirupsen/logrus"
)
type ctxKey int
const (
depthKey ctxKey = iota
)
var (
maxDepth = 5
)
// Archive is a handler for extracting and decompressing archives.
type Archive struct {
maxSize int
size int
}
// New sets a default maximum size and current size counter.
func (d *Archive) New() {
d.maxSize = 20 * 1024 * 1024 // 20MB
d.size = 0
}
// FromFile extracts the files from an archive.
func (d *Archive) FromFile(data io.Reader) chan ([]byte) {
ctx := context.Background()
archiveChan := make(chan ([]byte), 512)
go func() {
defer close(archiveChan)
err := d.openArchive(ctx, 0, data, archiveChan)
if err != nil {
if errors.Is(err, archiver.ErrNoMatch) {
return
}
log.WithError(err).Debug("Error unarchiving chunk.")
}
}()
return archiveChan
}
// openArchive takes a reader and extracts the contents up to the maximum depth.
func (d *Archive) openArchive(ctx context.Context, depth int, reader io.Reader, archiveChan chan ([]byte)) error {
if depth >= maxDepth {
return fmt.Errorf("max archive depth reached")
}
format, reader, err := archiver.Identify("", reader)
if err != nil {
if errors.Is(err, archiver.ErrNoMatch) && depth > 0 {
chunkSize := 10 * 1024
for {
chunk := make([]byte, chunkSize)
n, _ := reader.Read(chunk)
archiveChan <- chunk
if n < chunkSize {
break
}
}
return nil
}
return err
}
switch archive := format.(type) {
case archiver.Extractor:
err := archive.Extract(context.WithValue(ctx, depthKey, depth+1), reader, nil, d.extractorHandler(archiveChan))
if err != nil {
return err
}
return nil
case archiver.Decompressor:
compReader, err := archive.OpenReader(reader)
if err != nil {
return err
}
fileBytes, err := d.ReadToMax(compReader)
if err != nil {
return err
}
newReader := bytes.NewReader(fileBytes)
return d.openArchive(ctx, depth+1, newReader, archiveChan)
}
return fmt.Errorf("Unknown archive type: %s", format.Name())
}
// IsFiletype returns true if the provided reader is an archive.
func (d *Archive) IsFiletype(reader io.Reader) (io.Reader, bool) {
format, readerB, err := archiver.Identify("", reader)
if err != nil {
return readerB, false
}
switch format.(type) {
case archiver.Extractor:
return readerB, true
case archiver.Decompressor:
return readerB, true
}
return readerB, false
}
// extractorHandler is applied to each file in an archiver.Extractor file.
func (d *Archive) extractorHandler(archiveChan chan ([]byte)) func(context.Context, archiver.File) error {
return func(ctx context.Context, f archiver.File) error {
log.WithField("filename", f.Name()).Trace("Handling extracted file.")
depth := 0
if ctxDepth, ok := ctx.Value(depthKey).(int); ok {
depth = ctxDepth
}
fReader, err := f.Open()
if err != nil {
return err
}
fileBytes, err := d.ReadToMax(fReader)
if err != nil {
return err
}
fileContent := bytes.NewReader(fileBytes)
err = d.openArchive(ctx, depth, fileContent, archiveChan)
if err != nil {
return err
}
return nil
}
}
// ReadToMax reads up to the max size.
func (d *Archive) ReadToMax(reader io.Reader) ([]byte, error) {
fileContent := bytes.Buffer{}
log.Tracef("Remaining buffer capacity: %d", d.maxSize-d.size)
for i := 0; i <= d.maxSize/512; i++ {
fileChunk := make([]byte, 512)
bRead, err := reader.Read(fileChunk)
if err != nil && !errors.Is(err, io.EOF) {
return []byte{}, err
}
d.size += bRead
fileContent.Write(fileChunk[0:bRead])
if bRead < 512 {
break
}
if d.size >= d.maxSize && bRead == 512 {
log.Debug("Max archive size reached.")
break
}
}
return fileContent.Bytes(), nil
}

View file

@ -0,0 +1,85 @@
package handlers
import (
"net/http"
"regexp"
"testing"
)
func TestArchiveHandler(t *testing.T) {
tests := map[string]struct {
archiveURL string
expectedChunks int
matchString string
}{
"gzip-single": {
"https://raw.githubusercontent.com/bill-rich/bad-secrets/master/one-zip.gz",
1,
"AKIAYVP4CIPPH5TNP3SW",
},
"gzip-nested": {
"https://raw.githubusercontent.com/bill-rich/bad-secrets/master/double-zip.gz",
1,
"AKIAYVP4CIPPH5TNP3SW",
},
"gzip-too-deep": {
"https://raw.githubusercontent.com/bill-rich/bad-secrets/master/six-zip.gz",
0,
"",
},
"tar-single": {
"https://raw.githubusercontent.com/bill-rich/bad-secrets/master/one.tar",
1,
"AKIAYVP4CIPPH5TNP3SW",
},
"tar-nested": {
"https://raw.githubusercontent.com/bill-rich/bad-secrets/master/two.tar",
1,
"AKIAYVP4CIPPH5TNP3SW",
},
"tar-too-deep": {
"https://raw.githubusercontent.com/bill-rich/bad-secrets/master/six.tar",
0,
"",
},
"targz-single": {
"https://raw.githubusercontent.com/bill-rich/bad-secrets/master/tar-archive.tar.gz",
1,
"AKIAYVP4CIPPH5TNP3SW",
},
"gzip-large": {
"https://raw.githubusercontent.com/bill-rich/bad-secrets/master/FifteenMB.gz",
1543,
"AKIAYVP4CIPPH5TNP3SW",
},
}
for name, testCase := range tests {
resp, err := http.Get(testCase.archiveURL)
if err != nil || resp.StatusCode != http.StatusOK {
t.Error(err)
}
defer resp.Body.Close()
archive := Archive{}
archive.New()
archiveChan := archive.FromFile(resp.Body)
count := 0
re := regexp.MustCompile(testCase.matchString)
matched := false
for chunk := range archiveChan {
count++
if re.Match(chunk) {
matched = true
}
}
if !matched && len(testCase.matchString) > 0 {
t.Errorf("%s: Expected string not found in archive.", name)
}
if count != testCase.expectedChunks {
t.Errorf("%s: Unexpected number of chunks. Got %d, expected: %d", name, count, testCase.expectedChunks)
}
}
}

38
pkg/handlers/handlers.go Normal file
View file

@ -0,0 +1,38 @@
package handlers
import (
"io"
"github.com/trufflesecurity/trufflehog/v3/pkg/sources"
)
func DefaultHandlers() []Handler {
return []Handler{
&Archive{},
}
}
type Handler interface {
FromFile(io.Reader) chan ([]byte)
IsFiletype(io.Reader) (io.Reader, bool)
New()
}
func HandleFile(file io.Reader, chunkSkel *sources.Chunk, chunksChan chan (*sources.Chunk)) bool {
for _, handler := range DefaultHandlers() {
handler.New()
var isType bool
file, isType = handler.IsFiletype(file)
if !isType {
continue
}
handlerChan := handler.FromFile(file)
for data := range handlerChan {
chunk := *chunkSkel
chunk.Data = data
chunksChan <- &chunk
}
return true
}
return false
}

View file

@ -11,21 +11,22 @@ import (
"github.com/go-errors/errors"
log "github.com/sirupsen/logrus"
"google.golang.org/protobuf/proto"
"google.golang.org/protobuf/types/known/anypb"
"github.com/trufflesecurity/trufflehog/v3/pkg/common"
"github.com/trufflesecurity/trufflehog/v3/pkg/handlers"
"github.com/trufflesecurity/trufflehog/v3/pkg/pb/source_metadatapb"
"github.com/trufflesecurity/trufflehog/v3/pkg/pb/sourcespb"
"github.com/trufflesecurity/trufflehog/v3/pkg/sanitizer"
"github.com/trufflesecurity/trufflehog/v3/pkg/sources"
"google.golang.org/protobuf/proto"
"google.golang.org/protobuf/types/known/anypb"
)
const (
// These buffer sizes are mainly driven by our largest credential size, which is GCP @ ~2.25KB.
// Having a peek size larger than that ensures that we have complete credential coverage in our chunks.
BufferSize = 10 * 1024 // 10KB
PeekSize = 3 * 1024 // 3KB
BufferSize = 10 * 1024 // 10KB
PeekSize = 3 * 1024 // 3KB
MaxArchiveSize = 20 * 1024 * 1024 // 20MB
)
type Source struct {
@ -112,7 +113,30 @@ func (s *Source) Chunks(ctx context.Context, chunksChan chan *sources.Chunk) err
}
defer inputFile.Close()
chunkSkel := &sources.Chunk{
SourceType: s.Type(),
SourceName: s.name,
SourceID: s.SourceID(),
SourceMetadata: &source_metadatapb.MetaData{
Data: &source_metadatapb.MetaData_Filesystem{
Filesystem: &source_metadatapb.Filesystem{
File: sanitizer.UTF8(path),
},
},
},
Verify: s.verify,
}
if handlers.HandleFile(inputFile, chunkSkel, chunksChan) {
return nil
}
_, err = inputFile.Seek(0, io.SeekStart)
if err != nil {
return err
}
reader := bufio.NewReaderSize(bufio.NewReader(inputFile), BufferSize)
firstChunk := true
for {
if done {