mirror of
https://github.com/trufflesecurity/trufflehog.git
synced 2024-11-10 07:04:24 +00:00
Download files when reverifying (#3252)
The previous implementation of targeted file scanning pulled patches out of commit data, which didn't work for binary files (because GitHub doesn't return patches for them). This PR changes the system to always just download the requested file and scan it, which means we get binary file support.
This commit is contained in:
parent
247b56ad0b
commit
dbc1464c63
3 changed files with 38 additions and 74 deletions
|
@ -16,6 +16,7 @@ import (
|
|||
"github.com/go-logr/logr"
|
||||
"github.com/gobwas/glob"
|
||||
"github.com/google/go-github/v63/github"
|
||||
"github.com/trufflesecurity/trufflehog/v3/pkg/handlers"
|
||||
"golang.org/x/exp/rand"
|
||||
"golang.org/x/sync/errgroup"
|
||||
"google.golang.org/protobuf/proto"
|
||||
|
@ -1392,35 +1393,34 @@ func (s *Source) scanTarget(ctx context.Context, target sources.ChunkingTarget,
|
|||
return fmt.Errorf("invalid GitHub URL")
|
||||
}
|
||||
|
||||
qry := commitQuery{
|
||||
repo: segments[2],
|
||||
owner: segments[1],
|
||||
sha: meta.GetCommit(),
|
||||
filename: meta.GetFile(),
|
||||
readCloser, resp, err := s.connector.APIClient().Repositories.DownloadContents(
|
||||
ctx,
|
||||
segments[1],
|
||||
segments[2],
|
||||
meta.GetFile(),
|
||||
&github.RepositoryContentGetOptions{Ref: meta.GetCommit()})
|
||||
// As of this writing, if the returned readCloser is not nil, it's just the Body of the returned github.Response, so
|
||||
// there's no need to independently close it.
|
||||
if resp != nil && resp.Body != nil {
|
||||
defer resp.Body.Close()
|
||||
}
|
||||
res, err := s.getDiffForFileInCommit(ctx, qry)
|
||||
if err != nil {
|
||||
return err
|
||||
return fmt.Errorf("could not download file for scan: %w", err)
|
||||
}
|
||||
chunk := &sources.Chunk{
|
||||
if resp.StatusCode != http.StatusOK {
|
||||
return fmt.Errorf("unexpected HTTP response status when trying to download file for scan: %v", resp.Status)
|
||||
}
|
||||
|
||||
reporter := sources.ChanReporter{Ch: chunksChan}
|
||||
chunkSkel := sources.Chunk{
|
||||
SourceType: s.Type(),
|
||||
SourceName: s.name,
|
||||
SourceID: s.SourceID(),
|
||||
JobID: s.JobID(),
|
||||
SecretID: target.SecretID,
|
||||
Data: []byte(stripLeadingPlusMinus(res)),
|
||||
SourceMetadata: &source_metadatapb.MetaData{
|
||||
Data: &source_metadatapb.MetaData_Github{Github: meta},
|
||||
},
|
||||
Verify: s.verify,
|
||||
}
|
||||
|
||||
return common.CancellableWrite(ctx, chunksChan, chunk)
|
||||
}
|
||||
|
||||
// stripLeadingPlusMinus removes leading + and - characters from lines in a diff string. These characters exist in the
|
||||
// diffs returned when performing a targeted scan and need to be removed so that detectors are operating on the correct
|
||||
// text.
|
||||
func stripLeadingPlusMinus(diff string) string {
|
||||
return strings.ReplaceAll(strings.ReplaceAll(diff, "\n+", "\n"), "\n-", "\n")
|
||||
Verify: s.verify}
|
||||
return handlers.HandleFile(ctx, readCloser, &chunkSkel, reporter)
|
||||
}
|
||||
|
|
|
@ -758,6 +758,24 @@ func TestSource_Chunks_TargetedScan(t *testing.T) {
|
|||
},
|
||||
wantChunks: 1,
|
||||
},
|
||||
{
|
||||
name: "targeted scan, binary file",
|
||||
init: init{
|
||||
name: "test source",
|
||||
connection: &sourcespb.GitHub{Credential: &sourcespb.GitHub_Token{Token: githubToken}},
|
||||
queryCriteria: &source_metadatapb.MetaData{
|
||||
Data: &source_metadatapb.MetaData_Github{
|
||||
Github: &source_metadatapb.Github{
|
||||
Repository: "https://github.com/truffle-sandbox/test-secrets.git",
|
||||
Link: "https://github.com/truffle-sandbox/test-secrets/blob/70bef8590f87257c0992eecc7db529827a12b801/null_text_w_ptp.ipynb",
|
||||
Commit: "70bef8590f87257c0992eecc7db529827a12b801",
|
||||
File: "null_text_w_ptp.ipynb",
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
wantChunks: 607,
|
||||
},
|
||||
{
|
||||
name: "no file in commit",
|
||||
init: init{
|
||||
|
|
|
@ -281,60 +281,6 @@ func (s *Source) wikiIsReachable(ctx context.Context, repoURL string) bool {
|
|||
return wikiURL == res.Request.URL.String()
|
||||
}
|
||||
|
||||
// commitQuery represents the details required to fetch a commit.
|
||||
type commitQuery struct {
|
||||
repo string
|
||||
owner string
|
||||
sha string
|
||||
filename string
|
||||
}
|
||||
|
||||
// getDiffForFileInCommit retrieves the diff for a specified file in a commit.
|
||||
// If the file or its diff is not found, it returns an error.
|
||||
func (s *Source) getDiffForFileInCommit(ctx context.Context, query commitQuery) (string, error) {
|
||||
var (
|
||||
commit *github.RepositoryCommit
|
||||
err error
|
||||
)
|
||||
for {
|
||||
commit, _, err = s.connector.APIClient().Repositories.GetCommit(ctx, query.owner, query.repo, query.sha, nil)
|
||||
if s.handleRateLimit(err) {
|
||||
continue
|
||||
}
|
||||
if err != nil {
|
||||
return "", fmt.Errorf("error fetching commit %s: %w", query.sha, err)
|
||||
}
|
||||
break
|
||||
}
|
||||
|
||||
if len(commit.Files) == 0 {
|
||||
return "", fmt.Errorf("commit %s does not contain any files", query.sha)
|
||||
}
|
||||
|
||||
res := new(strings.Builder)
|
||||
// Only return the diff if the file is in the commit.
|
||||
for _, file := range commit.Files {
|
||||
if *file.Filename != query.filename {
|
||||
continue
|
||||
}
|
||||
|
||||
if file.Patch == nil {
|
||||
return "", fmt.Errorf("commit %s file %s does not have a diff", query.sha, query.filename)
|
||||
}
|
||||
|
||||
if _, err := res.WriteString(*file.Patch); err != nil {
|
||||
return "", fmt.Errorf("buffer write error for commit %s file %s: %w", query.sha, query.filename, err)
|
||||
}
|
||||
res.WriteString("\n")
|
||||
}
|
||||
|
||||
if res.Len() == 0 {
|
||||
return "", fmt.Errorf("commit %s does not contain patch for file %s", query.sha, query.filename)
|
||||
}
|
||||
|
||||
return res.String(), nil
|
||||
}
|
||||
|
||||
func (s *Source) normalizeRepo(repo string) (string, error) {
|
||||
// If there's a '/', assume it's a URL and try to normalize it.
|
||||
if strings.ContainsRune(repo, '/') {
|
||||
|
|
Loading…
Reference in a new issue