mirror of
https://github.com/trufflesecurity/trufflehog.git
synced 2024-11-10 07:04:24 +00:00
Support scanning binary files in git sources (#684)
* Scan binary files for git sources * Create data chunks in for loop * Linter feedback and newline commit result * Use disk buffered reader and chunker function
This commit is contained in:
parent
a473b9aa99
commit
4a93e49eea
4 changed files with 66 additions and 4 deletions
2
go.mod
2
go.mod
|
@ -4,7 +4,7 @@ go 1.18
|
|||
|
||||
replace github.com/jpillora/overseer => github.com/trufflesecurity/overseer v1.1.7-custom5
|
||||
|
||||
replace github.com/zricethezav/gitleaks/v8 => github.com/trufflesecurity/gitleaks/v8 v8.6.1-custom9
|
||||
replace github.com/zricethezav/gitleaks/v8 => github.com/trufflesecurity/gitleaks/v8 v8.6.1-custom10
|
||||
|
||||
replace github.com/gitleaks/go-gitdiff => github.com/trufflesecurity/go-gitdiff v0.7.6-zombies2
|
||||
|
||||
|
|
4
go.sum
4
go.sum
|
@ -400,8 +400,8 @@ github.com/tailscale/depaware v0.0.0-20210622194025-720c4b409502 h1:34icjjmqJ2HP
|
|||
github.com/tailscale/depaware v0.0.0-20210622194025-720c4b409502/go.mod h1:p9lPsd+cx33L3H9nNoecRRxPssFKUwwI50I3pZ0yT+8=
|
||||
github.com/therootcompany/xz v1.0.1 h1:CmOtsn1CbtmyYiusbfmhmkpAAETj0wBIH6kCYaX+xzw=
|
||||
github.com/therootcompany/xz v1.0.1/go.mod h1:3K3UH1yCKgBneZYhuQUvJ9HPD19UEXEI0BWbMn8qNMY=
|
||||
github.com/trufflesecurity/gitleaks/v8 v8.6.1-custom9 h1:OvS9aj6Fasot5FaTpSyCV4WNq/8SMov9/bNUMoZFwEI=
|
||||
github.com/trufflesecurity/gitleaks/v8 v8.6.1-custom9/go.mod h1:2iZpX4Epnmx7VK2atbIMEjHW9rivie5RRe0ZhPWUFvM=
|
||||
github.com/trufflesecurity/gitleaks/v8 v8.6.1-custom10 h1:QuGZ5bJcQpVz+3sfvKKPDkQwdUueiBg0V+2eMHzkryo=
|
||||
github.com/trufflesecurity/gitleaks/v8 v8.6.1-custom10/go.mod h1:2iZpX4Epnmx7VK2atbIMEjHW9rivie5RRe0ZhPWUFvM=
|
||||
github.com/trufflesecurity/go-gitdiff v0.7.6-zombies2 h1:srCJzbE3b44+ZIPcgJSfvinHCOQlkMwVghtKf23un6o=
|
||||
github.com/trufflesecurity/go-gitdiff v0.7.6-zombies2/go.mod h1:pKz0X4YzCKZs30BL+weqBIG7mx0jl4tF1uXV9ZyNvrA=
|
||||
github.com/trufflesecurity/overseer v1.1.7-custom5 h1:xu+Fg6fkSRifUPzUCl7N8HmobJ6WGOkIApGnM7mJS6w=
|
||||
|
|
|
@ -14,6 +14,7 @@ import (
|
|||
"strings"
|
||||
"time"
|
||||
|
||||
diskbufferreader "github.com/bill-rich/disk-buffer-reader"
|
||||
"github.com/gitleaks/go-gitdiff/gitdiff"
|
||||
"github.com/go-errors/errors"
|
||||
"github.com/go-git/go-git/v5"
|
||||
|
@ -27,6 +28,8 @@ import (
|
|||
"google.golang.org/protobuf/proto"
|
||||
"google.golang.org/protobuf/types/known/anypb"
|
||||
|
||||
"github.com/trufflesecurity/trufflehog/v3/pkg/common"
|
||||
"github.com/trufflesecurity/trufflehog/v3/pkg/handlers"
|
||||
"github.com/trufflesecurity/trufflehog/v3/pkg/pb/source_metadatapb"
|
||||
"github.com/trufflesecurity/trufflehog/v3/pkg/pb/sourcespb"
|
||||
"github.com/trufflesecurity/trufflehog/v3/pkg/sanitizer"
|
||||
|
@ -340,6 +343,23 @@ func (s *Git) ScanCommits(repo *git.Repository, path string, scanOptions *ScanOp
|
|||
when = file.PatchHeader.AuthorDate.String()
|
||||
}
|
||||
|
||||
// Handle binary files by reading the entire file rather than using the diff.
|
||||
if file.IsBinary {
|
||||
commitHash := plumbing.NewHash(hash)
|
||||
metadata := s.sourceMetadataFunc(fileName, email, hash, when, urlMetadata, 0)
|
||||
chunkSkel := &sources.Chunk{
|
||||
SourceName: s.sourceName,
|
||||
SourceID: s.sourceID,
|
||||
SourceType: s.sourceType,
|
||||
SourceMetadata: metadata,
|
||||
Verify: s.verify,
|
||||
}
|
||||
if err = handleBinary(repo, chunksChan, chunkSkel, commitHash, fileName); err != nil {
|
||||
log.WithError(err).Error("Error handling binary file")
|
||||
}
|
||||
continue
|
||||
}
|
||||
|
||||
for _, frag := range file.TextFragments {
|
||||
var sb strings.Builder
|
||||
newLineNumber := frag.NewPosition
|
||||
|
@ -615,3 +635,45 @@ func getSafeRemoteURL(repo *git.Repository, preferred string) string {
|
|||
}
|
||||
return safeURL
|
||||
}
|
||||
|
||||
func handleBinary(repo *git.Repository, chunksChan chan *sources.Chunk, chunkSkel *sources.Chunk, commitHash plumbing.Hash, path string) error {
|
||||
log.WithField("path", path).Trace("Binary file found in repository.")
|
||||
commit, err := repo.CommitObject(commitHash)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
file, err := commit.File(path)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
fileReader, err := file.Reader()
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
defer fileReader.Close()
|
||||
|
||||
reader, err := diskbufferreader.New(fileReader)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
if handlers.HandleFile(reader, chunkSkel, chunksChan) {
|
||||
return nil
|
||||
}
|
||||
|
||||
log.WithField("path", path).Trace("Binary file is not recognized by file handlers. Chunking raw.")
|
||||
if err := reader.Reset(); err != nil {
|
||||
return err
|
||||
}
|
||||
reader.Stop()
|
||||
|
||||
for chunkData := range common.ChunkReader(reader) {
|
||||
chunk := *chunkSkel
|
||||
chunk.Data = chunkData
|
||||
chunksChan <- &chunk
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
|
|
@ -229,7 +229,7 @@ func TestSource_Chunks_Integration(t *testing.T) {
|
|||
"2f251b8c1e72135a375b659951097ec7749d4af9-bump": {B: []byte(" \n")},
|
||||
"e6c8bbabd8796ea3cd85bfc2e55b27e0a491747f-bump": {B: []byte("oops \n")},
|
||||
"735b52b0eb40610002bb1088e902bd61824eb305-bump": {B: []byte("oops\n")},
|
||||
//"ce62d79908803153ef6e145e042d3e80488ef747-bump": {B: []byte("\n")},
|
||||
"ce62d79908803153ef6e145e042d3e80488ef747-bump": {B: []byte("\n")},
|
||||
// Normally we might expect to see this commit, and we may in the future.
|
||||
// But at the moment we're ignoring any commit unless it contains at least one non-space character.
|
||||
"27fbead3bf883cdb7de9d7825ed401f28f9398f1-slack": {B: []byte("yup, just did that\n\ngithub_lol: \"ffc7e0f9400fb6300167009e42d2f842cd7956e2\"\n\noh, goodness. there's another one!")},
|
||||
|
|
Loading…
Reference in a new issue