mirror of
https://github.com/trufflesecurity/trufflehog.git
synced 2024-11-10 07:04:24 +00:00
fix(gitparse): handle fromFileLine edge case (#2206)
This commit is contained in:
parent
7209002b6b
commit
241e153dfb
3 changed files with 99 additions and 17 deletions
|
@ -4,6 +4,7 @@ import (
|
|||
"bufio"
|
||||
"bytes"
|
||||
"fmt"
|
||||
"github.com/go-logr/logr"
|
||||
"io"
|
||||
"os"
|
||||
"os/exec"
|
||||
|
@ -92,6 +93,7 @@ func (state ParseState) String() string {
|
|||
"BinaryFileLine",
|
||||
"HunkLineNumberLine",
|
||||
"HunkContentLine",
|
||||
"ParseFailure",
|
||||
}[state]
|
||||
}
|
||||
|
||||
|
@ -313,7 +315,7 @@ func (c *Parser) FromReader(ctx context.Context, stdOut io.Reader, commitChan ch
|
|||
case isMessageLine(isStaged, latestState, line):
|
||||
latestState = MessageLine
|
||||
|
||||
currentCommit.Message.Write(line[4:])
|
||||
currentCommit.Message.Write(line[4:]) // Messages are indented with 4 spaces.
|
||||
case isMessageEndLine(isStaged, latestState, line):
|
||||
latestState = MessageEndLine
|
||||
// NoOp
|
||||
|
@ -425,13 +427,14 @@ func (c *Parser) FromReader(ctx context.Context, stdOut io.Reader, commitChan ch
|
|||
|
||||
// Here be dragons...
|
||||
// Build an informative error message.
|
||||
var err error
|
||||
err := fmt.Errorf(`invalid line "%s" after state "%s"`, line, latestState)
|
||||
var logger logr.Logger
|
||||
if currentCommit != nil && currentCommit.Hash != "" {
|
||||
err = fmt.Errorf(`failed to parse line "%s" after state "%s" (commit=%s)`, line, latestState, currentCommit.Hash)
|
||||
logger = ctx.Logger().WithValues("commit", currentCommit.Hash)
|
||||
} else {
|
||||
err = fmt.Errorf(`failed to parse line "%s" after state "%s"`, line, latestState)
|
||||
logger = ctx.Logger()
|
||||
}
|
||||
ctx.Logger().V(2).Error(err, "Recovering at the latest commit or diff...\n")
|
||||
logger.Error(err, "failed to parse Git input. Recovering at the latest commit or diff...")
|
||||
|
||||
latestState = ParseFailure
|
||||
}
|
||||
|
@ -612,8 +615,9 @@ func pathFromBinaryLine(line []byte) string {
|
|||
}
|
||||
|
||||
// --- a/internal/addrs/move_endpoint_module.go
|
||||
// --- /dev/null
|
||||
func isFromFileLine(isStaged bool, latestState ParseState, line []byte) bool {
|
||||
if latestState != IndexLine {
|
||||
if !(latestState == IndexLine || latestState == ModeLine) {
|
||||
return false
|
||||
}
|
||||
if len(line) >= 6 && bytes.Equal(line[:4], []byte("--- ")) {
|
||||
|
|
|
@ -323,6 +323,13 @@ func TestLineChecks(t *testing.T) {
|
|||
IndexLine,
|
||||
[]byte("--- /dev/null"),
|
||||
},
|
||||
// New file (https://github.com/trufflesecurity/trufflehog/issues/2109)
|
||||
// diff --git a/libs/Unfit-1.0 b/libs/Unfit-1.0
|
||||
// new file mode 160000
|
||||
{
|
||||
ModeLine,
|
||||
[]byte("--- /dev/null"),
|
||||
},
|
||||
// Uncommon but valid prefixes. Will these ever show up?
|
||||
// https://stackoverflow.com/a/2530012
|
||||
// https://git-scm.com/docs/git-config#Documentation/git-config.txt-diffmnemonicPrefix
|
||||
|
@ -1148,7 +1155,45 @@ func TestMaxCommitSize(t *testing.T) {
|
|||
|
||||
}
|
||||
|
||||
const commitLog = `commit 4727ffb7ad6dc5130bf4b4dd166e00705abdd018 (HEAD -> master)
|
||||
const commitLog = `commit fd6e99e7a80199b76a694603be57c5ade1de18e7
|
||||
Author: Jaliborc <jaliborc@gmail.com>
|
||||
Date: Mon Apr 25 16:28:06 2011 +0100
|
||||
|
||||
Added Unusable coloring
|
||||
|
||||
diff --git a/components/item.lua b/components/item.lua
|
||||
index fc74534..f8d7d50 100755
|
||||
--- a/components/item.lua
|
||||
+++ b/components/item.lua
|
||||
@@ -9,6 +9,7 @@ ItemSlot:Hide()
|
||||
Bagnon.ItemSlot = ItemSlot
|
||||
|
||||
local ItemSearch = LibStub('LibItemSearch-1.0')
|
||||
+local Unfit = LibStub('Unfit-1.0')
|
||||
|
||||
local function hasBlizzQuestHighlight()
|
||||
return GetContainerItemQuestInfo and true or false
|
||||
diff --git a/embeds.xml b/embeds.xml
|
||||
index d3f4e7c..0c2df69 100755
|
||||
--- a/embeds.xml
|
||||
+++ b/embeds.xml
|
||||
@@ -6,6 +6,7 @@
|
||||
<Include file="libs\AceConsole-3.0\AceConsole-3.0.xml"/>
|
||||
<Include file="libs\AceLocale-3.0\AceLocale-3.0.xml"/>
|
||||
|
||||
+ <Script file="libs\Unfit-1.0\Unfit-1.0.lua"/>
|
||||
<Script file="libs\LibDataBroker-1.1.lua"/>
|
||||
<Script file="libs\LibItemSearch-1.0\LibItemSearch-1.0.lua"/>
|
||||
</Ui>
|
||||
\ No newline at end of file
|
||||
diff --git a/libs/Unfit-1.0 b/libs/Unfit-1.0
|
||||
new file mode 160000
|
||||
--- /dev/null
|
||||
+++ b/libs/Unfit-1.0
|
||||
@@ -0,0 +1 @@
|
||||
+Subproject commit 0000000000000000000000000000000000000000
|
||||
|
||||
commit 4727ffb7ad6dc5130bf4b4dd166e00705abdd018 (HEAD -> master)
|
||||
Author: John Smith <john.smith@example.com>
|
||||
Date: Tue Jul 11 22:26:11 2023 -0400
|
||||
|
||||
|
@ -1539,6 +1584,33 @@ index 0000000..5af88a8
|
|||
// This throws a nasty panic if it's a top-level var.
|
||||
func expectedCommits() []Commit {
|
||||
return []Commit{
|
||||
// a
|
||||
{
|
||||
Hash: "fd6e99e7a80199b76a694603be57c5ade1de18e7",
|
||||
Author: "Jaliborc <jaliborc@gmail.com>",
|
||||
Date: newTime("Mon Apr 25 16:28:06 2011 +0100"),
|
||||
Message: newStringBuilderValue("Added Unusable coloring\n"),
|
||||
Diffs: []Diff{
|
||||
{
|
||||
PathB: "components/item.lua",
|
||||
LineStart: 9,
|
||||
Content: *bytes.NewBuffer([]byte("\n\nlocal Unfit = LibStub('Unfit-1.0')\n\n\n")),
|
||||
IsBinary: false,
|
||||
},
|
||||
{
|
||||
PathB: "embeds.xml",
|
||||
LineStart: 6,
|
||||
Content: *bytes.NewBuffer([]byte("\n\n <Script file=\"libs\\Unfit-1.0\\Unfit-1.0.lua\"/>\n\n\n\n")),
|
||||
IsBinary: false,
|
||||
},
|
||||
{
|
||||
PathB: "libs/Unfit-1.0",
|
||||
LineStart: 1,
|
||||
Content: *bytes.NewBuffer([]byte("Subproject commit 0000000000000000000000000000000000000000\n")),
|
||||
IsBinary: false,
|
||||
},
|
||||
},
|
||||
},
|
||||
// Empty commit and message. Lord help us.
|
||||
{
|
||||
Hash: "4727ffb7ad6dc5130bf4b4dd166e00705abdd018",
|
||||
|
|
|
@ -18,12 +18,13 @@ import (
|
|||
"github.com/go-git/go-git/v5/plumbing"
|
||||
"github.com/go-git/go-git/v5/plumbing/object"
|
||||
"github.com/google/go-github/v42/github"
|
||||
diskbufferreader "github.com/trufflesecurity/disk-buffer-reader"
|
||||
"golang.org/x/oauth2"
|
||||
"golang.org/x/sync/semaphore"
|
||||
"google.golang.org/protobuf/proto"
|
||||
"google.golang.org/protobuf/types/known/anypb"
|
||||
|
||||
diskbufferreader "github.com/trufflesecurity/disk-buffer-reader"
|
||||
|
||||
"github.com/trufflesecurity/trufflehog/v3/pkg/cleantemp"
|
||||
"github.com/trufflesecurity/trufflehog/v3/pkg/common"
|
||||
"github.com/trufflesecurity/trufflehog/v3/pkg/context"
|
||||
|
@ -462,7 +463,16 @@ func (s *Git) CommitsScanned() uint64 {
|
|||
const gitDirName = ".git"
|
||||
|
||||
func (s *Git) ScanCommits(ctx context.Context, repo *git.Repository, path string, scanOptions *ScanOptions, reporter sources.ChunkReporter) error {
|
||||
commitChan, err := gitparse.NewParser().RepoPath(ctx, path, scanOptions.HeadHash, scanOptions.BaseHash == "", scanOptions.ExcludeGlobs, scanOptions.Bare)
|
||||
// Get the remote URL for reporting (may be empty)
|
||||
remoteURL := getSafeRemoteURL(repo, "origin")
|
||||
var repoCtx context.Context
|
||||
if remoteURL != "" {
|
||||
repoCtx = context.WithValue(ctx, "repo", remoteURL)
|
||||
} else {
|
||||
repoCtx = context.WithValue(ctx, "repo", path)
|
||||
}
|
||||
|
||||
commitChan, err := gitparse.NewParser().RepoPath(repoCtx, path, scanOptions.HeadHash, scanOptions.BaseHash == "", scanOptions.ExcludeGlobs, scanOptions.Bare)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
@ -470,14 +480,10 @@ func (s *Git) ScanCommits(ctx context.Context, repo *git.Repository, path string
|
|||
return nil
|
||||
}
|
||||
|
||||
// get the URL metadata for reporting (may be empty)
|
||||
urlMetadata := getSafeRemoteURL(repo, "origin")
|
||||
|
||||
var depth int64
|
||||
|
||||
gitDir := filepath.Join(path, gitDirName)
|
||||
|
||||
logger := ctx.Logger().WithValues("repo", urlMetadata)
|
||||
logger := repoCtx.Logger()
|
||||
logger.V(1).Info("scanning repo", "base", scanOptions.BaseHash, "head", scanOptions.HeadHash)
|
||||
for commit := range commitChan {
|
||||
if len(scanOptions.BaseHash) > 0 {
|
||||
|
@ -510,7 +516,7 @@ func (s *Git) ScanCommits(ctx context.Context, repo *git.Repository, path string
|
|||
// Handle binary files by reading the entire file rather than using the diff.
|
||||
if diff.IsBinary {
|
||||
commitHash := plumbing.NewHash(hash)
|
||||
metadata := s.sourceMetadataFunc(fileName, email, hash, when, urlMetadata, 0)
|
||||
metadata := s.sourceMetadataFunc(fileName, email, hash, when, remoteURL, 0)
|
||||
chunkSkel := &sources.Chunk{
|
||||
SourceName: s.sourceName,
|
||||
SourceID: s.sourceID,
|
||||
|
@ -526,10 +532,10 @@ func (s *Git) ScanCommits(ctx context.Context, repo *git.Repository, path string
|
|||
}
|
||||
|
||||
if diff.Content.Len() > sources.ChunkSize+sources.PeekSize {
|
||||
s.gitChunk(ctx, diff, fileName, email, hash, when, urlMetadata, reporter)
|
||||
s.gitChunk(ctx, diff, fileName, email, hash, when, remoteURL, reporter)
|
||||
continue
|
||||
}
|
||||
metadata := s.sourceMetadataFunc(fileName, email, hash, when, urlMetadata, int64(diff.LineStart))
|
||||
metadata := s.sourceMetadataFunc(fileName, email, hash, when, remoteURL, int64(diff.LineStart))
|
||||
chunk := sources.Chunk{
|
||||
SourceName: s.sourceName,
|
||||
SourceID: s.sourceID,
|
||||
|
|
Loading…
Reference in a new issue