mirror of
https://github.com/trufflesecurity/trufflehog.git
synced 2024-11-10 07:04:24 +00:00
Add skip archive support (#2257)
This commit is contained in:
parent
f699f60e89
commit
7d93adc1d0
10 changed files with 593 additions and 459 deletions
|
@ -203,6 +203,7 @@ func main() {
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
true,
|
true,
|
||||||
|
false,
|
||||||
)
|
)
|
||||||
|
|
||||||
logger.Info("scanning repo", "repo", r)
|
logger.Info("scanning repo", "repo", r)
|
||||||
|
|
|
@ -46,6 +46,7 @@ type Archive struct {
|
||||||
size int
|
size int
|
||||||
currentDepth int
|
currentDepth int
|
||||||
skipBinaries bool
|
skipBinaries bool
|
||||||
|
skipArchives bool
|
||||||
}
|
}
|
||||||
|
|
||||||
// New creates a new Archive handler with the provided options.
|
// New creates a new Archive handler with the provided options.
|
||||||
|
@ -72,6 +73,10 @@ func SetArchiveMaxTimeout(timeout time.Duration) {
|
||||||
|
|
||||||
// FromFile extracts the files from an archive.
|
// FromFile extracts the files from an archive.
|
||||||
func (a *Archive) FromFile(originalCtx logContext.Context, data io.Reader) chan []byte {
|
func (a *Archive) FromFile(originalCtx logContext.Context, data io.Reader) chan []byte {
|
||||||
|
if a.skipArchives {
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
archiveChan := make(chan []byte, defaultBufferSize)
|
archiveChan := make(chan []byte, defaultBufferSize)
|
||||||
go func() {
|
go func() {
|
||||||
ctx, cancel := logContext.WithTimeout(originalCtx, maxTimeout)
|
ctx, cancel := logContext.WithTimeout(originalCtx, maxTimeout)
|
||||||
|
|
|
@ -311,6 +311,31 @@ func TestExtractDebContent(t *testing.T) {
|
||||||
assert.Equal(t, expectedLength, len(string(content)))
|
assert.Equal(t, expectedLength, len(string(content)))
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func TestSkipArchive(t *testing.T) {
|
||||||
|
file, err := os.Open("testdata/test.tgz")
|
||||||
|
assert.Nil(t, err)
|
||||||
|
defer file.Close()
|
||||||
|
|
||||||
|
reader, err := diskbufferreader.New(file)
|
||||||
|
assert.NoError(t, err)
|
||||||
|
|
||||||
|
ctx := logContext.Background()
|
||||||
|
|
||||||
|
chunkCh := make(chan *sources.Chunk)
|
||||||
|
go func() {
|
||||||
|
defer close(chunkCh)
|
||||||
|
ok := HandleFile(ctx, reader, &sources.Chunk{}, sources.ChanReporter{Ch: chunkCh}, WithSkipArchives(true))
|
||||||
|
assert.False(t, ok)
|
||||||
|
}()
|
||||||
|
|
||||||
|
wantCount := 0
|
||||||
|
count := 0
|
||||||
|
for range chunkCh {
|
||||||
|
count++
|
||||||
|
}
|
||||||
|
assert.Equal(t, wantCount, count)
|
||||||
|
}
|
||||||
|
|
||||||
func TestExtractTarContent(t *testing.T) {
|
func TestExtractTarContent(t *testing.T) {
|
||||||
file, err := os.Open("testdata/test.tgz")
|
file, err := os.Open("testdata/test.tgz")
|
||||||
assert.Nil(t, err)
|
assert.Nil(t, err)
|
||||||
|
|
|
@ -36,6 +36,15 @@ func WithSkipBinaries(skip bool) Option {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// WithSkipArchives returns a Option that configures whether to skip archive files.
|
||||||
|
func WithSkipArchives(skip bool) Option {
|
||||||
|
return func(h Handler) {
|
||||||
|
if a, ok := h.(*Archive); ok {
|
||||||
|
a.skipArchives = skip
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
type Handler interface {
|
type Handler interface {
|
||||||
FromFile(logContext.Context, io.Reader) chan []byte
|
FromFile(logContext.Context, io.Reader) chan []byte
|
||||||
IsFiletype(logContext.Context, io.Reader) (io.Reader, bool)
|
IsFiletype(logContext.Context, io.Reader) (io.Reader, bool)
|
||||||
|
@ -84,6 +93,10 @@ func processHandler(ctx logContext.Context, h Handler, reReader *diskbufferreade
|
||||||
}
|
}
|
||||||
|
|
||||||
func handleChunks(ctx logContext.Context, handlerChan chan []byte, chunkSkel *sources.Chunk, reporter sources.ChunkReporter) bool {
|
func handleChunks(ctx logContext.Context, handlerChan chan []byte, chunkSkel *sources.Chunk, reporter sources.ChunkReporter) bool {
|
||||||
|
if handlerChan == nil {
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
|
||||||
for {
|
for {
|
||||||
select {
|
select {
|
||||||
case data, open := <-handlerChan:
|
case data, open := <-handlerChan:
|
||||||
|
|
File diff suppressed because it is too large
Load diff
|
@ -587,6 +587,8 @@ func (m *Bitbucket) validate(all bool) error {
|
||||||
|
|
||||||
// no validation rules for SkipBinaries
|
// no validation rules for SkipBinaries
|
||||||
|
|
||||||
|
// no validation rules for SkipArchives
|
||||||
|
|
||||||
switch m.Credential.(type) {
|
switch m.Credential.(type) {
|
||||||
|
|
||||||
case *Bitbucket_Token:
|
case *Bitbucket_Token:
|
||||||
|
@ -1806,6 +1808,8 @@ func (m *Git) validate(all bool) error {
|
||||||
|
|
||||||
// no validation rules for SkipBinaries
|
// no validation rules for SkipBinaries
|
||||||
|
|
||||||
|
// no validation rules for SkipArchives
|
||||||
|
|
||||||
switch m.Credential.(type) {
|
switch m.Credential.(type) {
|
||||||
|
|
||||||
case *Git_BasicAuth:
|
case *Git_BasicAuth:
|
||||||
|
@ -2015,6 +2019,8 @@ func (m *GitLab) validate(all bool) error {
|
||||||
|
|
||||||
// no validation rules for SkipBinaries
|
// no validation rules for SkipBinaries
|
||||||
|
|
||||||
|
// no validation rules for SkipArchives
|
||||||
|
|
||||||
switch m.Credential.(type) {
|
switch m.Credential.(type) {
|
||||||
|
|
||||||
case *GitLab_Token:
|
case *GitLab_Token:
|
||||||
|
@ -2210,6 +2216,8 @@ func (m *GitHub) validate(all bool) error {
|
||||||
|
|
||||||
// no validation rules for SkipBinaries
|
// no validation rules for SkipBinaries
|
||||||
|
|
||||||
|
// no validation rules for SkipArchives
|
||||||
|
|
||||||
switch m.Credential.(type) {
|
switch m.Credential.(type) {
|
||||||
|
|
||||||
case *GitHub_GithubApp:
|
case *GitHub_GithubApp:
|
||||||
|
@ -3594,6 +3602,8 @@ func (m *Gerrit) validate(all bool) error {
|
||||||
|
|
||||||
// no validation rules for SkipBinaries
|
// no validation rules for SkipBinaries
|
||||||
|
|
||||||
|
// no validation rules for SkipArchives
|
||||||
|
|
||||||
switch m.Credential.(type) {
|
switch m.Credential.(type) {
|
||||||
|
|
||||||
case *Gerrit_BasicAuth:
|
case *Gerrit_BasicAuth:
|
||||||
|
@ -4681,6 +4691,8 @@ func (m *AzureRepos) validate(all bool) error {
|
||||||
|
|
||||||
// no validation rules for SkipBinaries
|
// no validation rules for SkipBinaries
|
||||||
|
|
||||||
|
// no validation rules for SkipArchives
|
||||||
|
|
||||||
switch m.Credential.(type) {
|
switch m.Credential.(type) {
|
||||||
|
|
||||||
case *AzureRepos_Token:
|
case *AzureRepos_Token:
|
||||||
|
|
|
@ -58,6 +58,7 @@ type Git struct {
|
||||||
metrics metrics
|
metrics metrics
|
||||||
concurrency *semaphore.Weighted
|
concurrency *semaphore.Weighted
|
||||||
skipBinaries bool
|
skipBinaries bool
|
||||||
|
skipArchives bool
|
||||||
}
|
}
|
||||||
|
|
||||||
type metrics struct {
|
type metrics struct {
|
||||||
|
@ -66,6 +67,7 @@ type metrics struct {
|
||||||
|
|
||||||
func NewGit(sourceType sourcespb.SourceType, jobID sources.JobID, sourceID sources.SourceID, sourceName string, verify bool, concurrency int,
|
func NewGit(sourceType sourcespb.SourceType, jobID sources.JobID, sourceID sources.SourceID, sourceName string, verify bool, concurrency int,
|
||||||
sourceMetadataFunc func(file, email, commit, timestamp, repository string, line int64) *source_metadatapb.MetaData, skipBinaries bool,
|
sourceMetadataFunc func(file, email, commit, timestamp, repository string, line int64) *source_metadatapb.MetaData, skipBinaries bool,
|
||||||
|
skipArchives bool,
|
||||||
) *Git {
|
) *Git {
|
||||||
return &Git{
|
return &Git{
|
||||||
sourceType: sourceType,
|
sourceType: sourceType,
|
||||||
|
@ -76,6 +78,7 @@ func NewGit(sourceType sourcespb.SourceType, jobID sources.JobID, sourceID sourc
|
||||||
verify: verify,
|
verify: verify,
|
||||||
concurrency: semaphore.NewWeighted(int64(concurrency)),
|
concurrency: semaphore.NewWeighted(int64(concurrency)),
|
||||||
skipBinaries: skipBinaries,
|
skipBinaries: skipBinaries,
|
||||||
|
skipArchives: skipArchives,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -178,6 +181,7 @@ func (s *Source) Init(aCtx context.Context, name string, jobId sources.JobID, so
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
conn.GetSkipBinaries(),
|
conn.GetSkipBinaries(),
|
||||||
|
conn.GetSkipArchives(),
|
||||||
)
|
)
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
@ -1014,6 +1018,10 @@ func (s *Git) handleBinary(ctx context.Context, gitDir string, reporter sources.
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if s.skipArchives {
|
||||||
|
handlerOpts = append(handlerOpts, handlers.WithSkipArchives(true))
|
||||||
|
}
|
||||||
|
|
||||||
cmd := exec.Command("git", "-C", gitDir, "cat-file", "blob", commitHash.String()+":"+path)
|
cmd := exec.Command("git", "-C", gitDir, "cat-file", "blob", commitHash.String()+":"+path)
|
||||||
|
|
||||||
var stderr bytes.Buffer
|
var stderr bytes.Buffer
|
||||||
|
|
|
@ -277,6 +277,7 @@ func (s *Source) Init(aCtx context.Context, name string, jobID sources.JobID, so
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
conn.GetSkipBinaries(),
|
conn.GetSkipBinaries(),
|
||||||
|
conn.GetSkipArchives(),
|
||||||
)
|
)
|
||||||
|
|
||||||
return nil
|
return nil
|
||||||
|
|
|
@ -138,6 +138,7 @@ func (s *Source) Init(_ context.Context, name string, jobId sources.JobID, sourc
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
conn.GetSkipBinaries(),
|
conn.GetSkipBinaries(),
|
||||||
|
conn.GetSkipArchives(),
|
||||||
)
|
)
|
||||||
|
|
||||||
return nil
|
return nil
|
||||||
|
|
|
@ -96,6 +96,7 @@ message Bitbucket {
|
||||||
repeated string repositories = 5;
|
repeated string repositories = 5;
|
||||||
repeated string ignore_repos = 6;
|
repeated string ignore_repos = 6;
|
||||||
bool skip_binaries = 7;
|
bool skip_binaries = 7;
|
||||||
|
bool skip_archives = 8;
|
||||||
}
|
}
|
||||||
|
|
||||||
message CircleCI {
|
message CircleCI {
|
||||||
|
@ -196,6 +197,7 @@ message Git {
|
||||||
// like head, base, bare, etc.
|
// like head, base, bare, etc.
|
||||||
string uri = 13; // repository URL. https://, file://, or ssh://
|
string uri = 13; // repository URL. https://, file://, or ssh://
|
||||||
bool skip_binaries = 14;
|
bool skip_binaries = 14;
|
||||||
|
bool skip_archives = 15;
|
||||||
}
|
}
|
||||||
|
|
||||||
message GitLab {
|
message GitLab {
|
||||||
|
@ -208,6 +210,7 @@ message GitLab {
|
||||||
repeated string repositories = 5;
|
repeated string repositories = 5;
|
||||||
repeated string ignore_repos = 6;
|
repeated string ignore_repos = 6;
|
||||||
bool skip_binaries = 7;
|
bool skip_binaries = 7;
|
||||||
|
bool skip_archives = 8;
|
||||||
}
|
}
|
||||||
|
|
||||||
message GitHub {
|
message GitHub {
|
||||||
|
@ -230,6 +233,7 @@ message GitHub {
|
||||||
bool include_issue_comments = 15;
|
bool include_issue_comments = 15;
|
||||||
bool include_gist_comments = 16;
|
bool include_gist_comments = 16;
|
||||||
bool skip_binaries = 17;
|
bool skip_binaries = 17;
|
||||||
|
bool skip_archives = 18;
|
||||||
}
|
}
|
||||||
|
|
||||||
message GoogleDrive {
|
message GoogleDrive {
|
||||||
|
@ -301,6 +305,7 @@ message Gerrit {
|
||||||
}
|
}
|
||||||
repeated string projects = 4;
|
repeated string projects = 4;
|
||||||
bool skip_binaries = 5;
|
bool skip_binaries = 5;
|
||||||
|
bool skip_archives = 6;
|
||||||
}
|
}
|
||||||
|
|
||||||
message Jenkins {
|
message Jenkins {
|
||||||
|
@ -369,4 +374,5 @@ message AzureRepos {
|
||||||
repeated string include_projects = 10;
|
repeated string include_projects = 10;
|
||||||
repeated string ignore_projects = 11;
|
repeated string ignore_projects = 11;
|
||||||
bool skip_binaries = 12;
|
bool skip_binaries = 12;
|
||||||
|
bool skip_archives = 13;
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in a new issue