fix(gitparse): handle fromFileLine edge case (#2206)

This commit is contained in:
Richard Gomez 2024-01-04 17:53:08 -05:00 committed by GitHub
parent 7209002b6b
commit 241e153dfb
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
3 changed files with 99 additions and 17 deletions

View file

@ -4,6 +4,7 @@ import (
"bufio"
"bytes"
"fmt"
"github.com/go-logr/logr"
"io"
"os"
"os/exec"
@ -92,6 +93,7 @@ func (state ParseState) String() string {
"BinaryFileLine",
"HunkLineNumberLine",
"HunkContentLine",
"ParseFailure",
}[state]
}
@ -313,7 +315,7 @@ func (c *Parser) FromReader(ctx context.Context, stdOut io.Reader, commitChan ch
case isMessageLine(isStaged, latestState, line):
latestState = MessageLine
currentCommit.Message.Write(line[4:])
currentCommit.Message.Write(line[4:]) // Messages are indented with 4 spaces.
case isMessageEndLine(isStaged, latestState, line):
latestState = MessageEndLine
// NoOp
@ -425,13 +427,14 @@ func (c *Parser) FromReader(ctx context.Context, stdOut io.Reader, commitChan ch
// Here be dragons...
// Build an informative error message.
var err error
err := fmt.Errorf(`invalid line "%s" after state "%s"`, line, latestState)
var logger logr.Logger
if currentCommit != nil && currentCommit.Hash != "" {
err = fmt.Errorf(`failed to parse line "%s" after state "%s" (commit=%s)`, line, latestState, currentCommit.Hash)
logger = ctx.Logger().WithValues("commit", currentCommit.Hash)
} else {
err = fmt.Errorf(`failed to parse line "%s" after state "%s"`, line, latestState)
logger = ctx.Logger()
}
ctx.Logger().V(2).Error(err, "Recovering at the latest commit or diff...\n")
logger.Error(err, "failed to parse Git input. Recovering at the latest commit or diff...")
latestState = ParseFailure
}
@ -612,8 +615,9 @@ func pathFromBinaryLine(line []byte) string {
}
// --- a/internal/addrs/move_endpoint_module.go
// --- /dev/null
func isFromFileLine(isStaged bool, latestState ParseState, line []byte) bool {
if latestState != IndexLine {
if !(latestState == IndexLine || latestState == ModeLine) {
return false
}
if len(line) >= 6 && bytes.Equal(line[:4], []byte("--- ")) {

View file

@ -323,6 +323,13 @@ func TestLineChecks(t *testing.T) {
IndexLine,
[]byte("--- /dev/null"),
},
// New file (https://github.com/trufflesecurity/trufflehog/issues/2109)
// diff --git a/libs/Unfit-1.0 b/libs/Unfit-1.0
// new file mode 160000
{
ModeLine,
[]byte("--- /dev/null"),
},
// Uncommon but valid prefixes. Will these ever show up?
// https://stackoverflow.com/a/2530012
// https://git-scm.com/docs/git-config#Documentation/git-config.txt-diffmnemonicPrefix
@ -1148,7 +1155,45 @@ func TestMaxCommitSize(t *testing.T) {
}
const commitLog = `commit 4727ffb7ad6dc5130bf4b4dd166e00705abdd018 (HEAD -> master)
const commitLog = `commit fd6e99e7a80199b76a694603be57c5ade1de18e7
Author: Jaliborc <jaliborc@gmail.com>
Date: Mon Apr 25 16:28:06 2011 +0100
Added Unusable coloring
diff --git a/components/item.lua b/components/item.lua
index fc74534..f8d7d50 100755
--- a/components/item.lua
+++ b/components/item.lua
@@ -9,6 +9,7 @@ ItemSlot:Hide()
Bagnon.ItemSlot = ItemSlot
local ItemSearch = LibStub('LibItemSearch-1.0')
+local Unfit = LibStub('Unfit-1.0')
local function hasBlizzQuestHighlight()
return GetContainerItemQuestInfo and true or false
diff --git a/embeds.xml b/embeds.xml
index d3f4e7c..0c2df69 100755
--- a/embeds.xml
+++ b/embeds.xml
@@ -6,6 +6,7 @@
<Include file="libs\AceConsole-3.0\AceConsole-3.0.xml"/>
<Include file="libs\AceLocale-3.0\AceLocale-3.0.xml"/>
+ <Script file="libs\Unfit-1.0\Unfit-1.0.lua"/>
<Script file="libs\LibDataBroker-1.1.lua"/>
<Script file="libs\LibItemSearch-1.0\LibItemSearch-1.0.lua"/>
</Ui>
\ No newline at end of file
diff --git a/libs/Unfit-1.0 b/libs/Unfit-1.0
new file mode 160000
--- /dev/null
+++ b/libs/Unfit-1.0
@@ -0,0 +1 @@
+Subproject commit 0000000000000000000000000000000000000000
commit 4727ffb7ad6dc5130bf4b4dd166e00705abdd018 (HEAD -> master)
Author: John Smith <john.smith@example.com>
Date: Tue Jul 11 22:26:11 2023 -0400
@ -1539,6 +1584,33 @@ index 0000000..5af88a8
// This throws a nasty panic if it's a top-level var.
func expectedCommits() []Commit {
return []Commit{
// a
{
Hash: "fd6e99e7a80199b76a694603be57c5ade1de18e7",
Author: "Jaliborc <jaliborc@gmail.com>",
Date: newTime("Mon Apr 25 16:28:06 2011 +0100"),
Message: newStringBuilderValue("Added Unusable coloring\n"),
Diffs: []Diff{
{
PathB: "components/item.lua",
LineStart: 9,
Content: *bytes.NewBuffer([]byte("\n\nlocal Unfit = LibStub('Unfit-1.0')\n\n\n")),
IsBinary: false,
},
{
PathB: "embeds.xml",
LineStart: 6,
Content: *bytes.NewBuffer([]byte("\n\n <Script file=\"libs\\Unfit-1.0\\Unfit-1.0.lua\"/>\n\n\n\n")),
IsBinary: false,
},
{
PathB: "libs/Unfit-1.0",
LineStart: 1,
Content: *bytes.NewBuffer([]byte("Subproject commit 0000000000000000000000000000000000000000\n")),
IsBinary: false,
},
},
},
// Empty commit and message. Lord help us.
{
Hash: "4727ffb7ad6dc5130bf4b4dd166e00705abdd018",

View file

@ -18,12 +18,13 @@ import (
"github.com/go-git/go-git/v5/plumbing"
"github.com/go-git/go-git/v5/plumbing/object"
"github.com/google/go-github/v42/github"
diskbufferreader "github.com/trufflesecurity/disk-buffer-reader"
"golang.org/x/oauth2"
"golang.org/x/sync/semaphore"
"google.golang.org/protobuf/proto"
"google.golang.org/protobuf/types/known/anypb"
diskbufferreader "github.com/trufflesecurity/disk-buffer-reader"
"github.com/trufflesecurity/trufflehog/v3/pkg/cleantemp"
"github.com/trufflesecurity/trufflehog/v3/pkg/common"
"github.com/trufflesecurity/trufflehog/v3/pkg/context"
@ -462,7 +463,16 @@ func (s *Git) CommitsScanned() uint64 {
const gitDirName = ".git"
func (s *Git) ScanCommits(ctx context.Context, repo *git.Repository, path string, scanOptions *ScanOptions, reporter sources.ChunkReporter) error {
commitChan, err := gitparse.NewParser().RepoPath(ctx, path, scanOptions.HeadHash, scanOptions.BaseHash == "", scanOptions.ExcludeGlobs, scanOptions.Bare)
// Get the remote URL for reporting (may be empty)
remoteURL := getSafeRemoteURL(repo, "origin")
var repoCtx context.Context
if remoteURL != "" {
repoCtx = context.WithValue(ctx, "repo", remoteURL)
} else {
repoCtx = context.WithValue(ctx, "repo", path)
}
commitChan, err := gitparse.NewParser().RepoPath(repoCtx, path, scanOptions.HeadHash, scanOptions.BaseHash == "", scanOptions.ExcludeGlobs, scanOptions.Bare)
if err != nil {
return err
}
@ -470,14 +480,10 @@ func (s *Git) ScanCommits(ctx context.Context, repo *git.Repository, path string
return nil
}
// get the URL metadata for reporting (may be empty)
urlMetadata := getSafeRemoteURL(repo, "origin")
var depth int64
gitDir := filepath.Join(path, gitDirName)
logger := ctx.Logger().WithValues("repo", urlMetadata)
logger := repoCtx.Logger()
logger.V(1).Info("scanning repo", "base", scanOptions.BaseHash, "head", scanOptions.HeadHash)
for commit := range commitChan {
if len(scanOptions.BaseHash) > 0 {
@ -510,7 +516,7 @@ func (s *Git) ScanCommits(ctx context.Context, repo *git.Repository, path string
// Handle binary files by reading the entire file rather than using the diff.
if diff.IsBinary {
commitHash := plumbing.NewHash(hash)
metadata := s.sourceMetadataFunc(fileName, email, hash, when, urlMetadata, 0)
metadata := s.sourceMetadataFunc(fileName, email, hash, when, remoteURL, 0)
chunkSkel := &sources.Chunk{
SourceName: s.sourceName,
SourceID: s.sourceID,
@ -526,10 +532,10 @@ func (s *Git) ScanCommits(ctx context.Context, repo *git.Repository, path string
}
if diff.Content.Len() > sources.ChunkSize+sources.PeekSize {
s.gitChunk(ctx, diff, fileName, email, hash, when, urlMetadata, reporter)
s.gitChunk(ctx, diff, fileName, email, hash, when, remoteURL, reporter)
continue
}
metadata := s.sourceMetadataFunc(fileName, email, hash, when, urlMetadata, int64(diff.LineStart))
metadata := s.sourceMetadataFunc(fileName, email, hash, when, remoteURL, int64(diff.LineStart))
chunk := sources.Chunk{
SourceName: s.sourceName,
SourceID: s.sourceID,