From aa47e5e248d5ae590a7f3aa83697f1b0997a6aa3 Mon Sep 17 00:00:00 2001 From: ahrav Date: Wed, 1 Mar 2023 08:58:36 -0800 Subject: [PATCH] Only scanned staged git changes. (#1143) --- pkg/gitparse/gitparse.go | 3 ++- pkg/sources/git/git.go | 16 ++++++++-------- 2 files changed, 10 insertions(+), 9 deletions(-) diff --git a/pkg/gitparse/gitparse.go b/pkg/gitparse/gitparse.go index 2fae9ebcf..95a28e195 100644 --- a/pkg/gitparse/gitparse.go +++ b/pkg/gitparse/gitparse.go @@ -139,7 +139,8 @@ func (c *Parser) RepoPath(ctx context.Context, source string, head string, abbre // Unstaged parses the output of the `git diff` command for the `source` path. func (c *Parser) Unstaged(ctx context.Context, source string) (chan Commit, error) { - args := []string{"-C", source, "diff", "-p", "-U5", "--full-history", "--diff-filter=AM", "--date=format:%a %b %d %H:%M:%S %Y %z", "HEAD"} + // Provide the --cached flag to diff to get the diff of the staged changes. + args := []string{"-C", source, "diff", "-p", "-U5", "--cached", "--full-history", "--diff-filter=AM", "--date=format:%a %b %d %H:%M:%S %Y %z", "HEAD"} cmd := exec.Command("git", args...) diff --git a/pkg/sources/git/git.go b/pkg/sources/git/git.go index db1cef9cb..95b733493 100644 --- a/pkg/sources/git/git.go +++ b/pkg/sources/git/git.go @@ -472,9 +472,9 @@ func (s *Git) gitChunk(ctx context.Context, diff gitparse.Diff, fileName, email, } } -// ScanUnstaged chunks unstaged changes. -func (s *Git) ScanUnstaged(ctx context.Context, repo *git.Repository, path string, scanOptions *ScanOptions, chunksChan chan *sources.Chunk) error { - // get the URL metadata for reporting (may be empty) +// ScanStaged chunks staged changes. +func (s *Git) ScanStaged(ctx context.Context, repo *git.Repository, path string, scanOptions *ScanOptions, chunksChan chan *sources.Chunk) error { + // Get the URL metadata for reporting (may be empty). urlMetadata := getSafeRemoteURL(repo, "origin") commitChan, err := gitparse.NewParser().Unstaged(ctx, path) @@ -488,11 +488,11 @@ func (s *Git) ScanUnstaged(ctx context.Context, repo *git.Repository, path strin var depth int64 var reachedBase = false - ctx.Logger().V(1).Info("scanning unstaged changes", "path", path) + ctx.Logger().V(1).Info("scanning staged changes", "path", path) for commit := range commitChan { for _, diff := range commit.Diffs { logger := ctx.Logger().WithValues("filename", diff.PathB, "commit", commit.Hash, "file", diff.PathB) - logger.V(2).Info("scanning unstaged changes from git") + logger.V(2).Info("scanning staged changes from git") if scanOptions.MaxDepth > 0 && depth >= scanOptions.MaxDepth { logger.V(1).Info("reached max depth") @@ -525,7 +525,7 @@ func (s *Git) ScanUnstaged(ctx context.Context, repo *git.Repository, path strin // Handle binary files by reading the entire file rather than using the diff. if diff.IsBinary { commitHash := plumbing.NewHash(hash) - metadata := s.sourceMetadataFunc(fileName, email, "Unstaged", when, urlMetadata, 0) + metadata := s.sourceMetadataFunc(fileName, email, "Staged", when, urlMetadata, 0) chunkSkel := &sources.Chunk{ SourceName: s.sourceName, SourceID: s.sourceID, @@ -539,7 +539,7 @@ func (s *Git) ScanUnstaged(ctx context.Context, repo *git.Repository, path strin continue } - metadata := s.sourceMetadataFunc(fileName, email, "Unstaged", when, urlMetadata, int64(diff.LineStart)) + metadata := s.sourceMetadataFunc(fileName, email, "Staged", when, urlMetadata, int64(diff.LineStart)) chunksChan <- &sources.Chunk{ SourceName: s.sourceName, SourceID: s.sourceID, @@ -564,7 +564,7 @@ func (s *Git) ScanRepo(ctx context.Context, repo *git.Repository, repoPath strin if err := s.ScanCommits(ctx, repo, repoPath, scanOptions, chunksChan); err != nil { return err } - if err := s.ScanUnstaged(ctx, repo, repoPath, scanOptions, chunksChan); err != nil { + if err := s.ScanStaged(ctx, repo, repoPath, scanOptions, chunksChan); err != nil { ctx.Logger().V(1).Info("error scanning unstaged changes", "error", err) }