feat(git): scan commit metadata (#2713)

This fixes #2683. It scans the commit author, committer (which is typically GitHub <noreply@github.com> for GitHub, but can be different), and message.

It also scans Git notes.
This commit is contained in:
Richard Gomez 2024-04-25 10:13:09 -04:00 committed by GitHub
parent a5f04e65f7
commit 81a9c813a1
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
3 changed files with 501 additions and 186 deletions

View file

@ -22,7 +22,7 @@ import (
const (
// defaultDateFormat is the standard date format for git.
defaultDateFormat = "Mon Jan 02 15:04:05 2006 -0700"
defaultDateFormat = "Mon Jan 2 15:04:05 2006 -0700"
// defaultMaxDiffSize is the maximum size for a diff. Larger diffs will be cut off.
defaultMaxDiffSize = 2 * 1024 * 1024 * 1024 // 2GB
@ -108,6 +108,7 @@ func (d *Diff) finalize() error {
type Commit struct {
Hash string
Author string
Committer string
Date time.Time
Message strings.Builder
Size int // in bytes
@ -131,10 +132,15 @@ const (
CommitLine
MergeLine
AuthorLine
DateLine
AuthorDateLine
CommitterLine
CommitterDateLine
MessageStartLine
MessageLine
MessageEndLine
NotesStartLine
NotesLine
NotesEndLine
DiffLine
ModeLine
IndexLine
@ -152,10 +158,15 @@ func (state ParseState) String() string {
"CommitLine",
"MergeLine",
"AuthorLine",
"DateLine",
"AuthorDateLine",
"CommitterLine",
"CommitterDateLine",
"MessageStartLine",
"MessageLine",
"MessageEndLine",
"NotesStartLine",
"NotesLine",
"NotesEndLine",
"DiffLine",
"ModeLine",
"IndexLine",
@ -209,7 +220,15 @@ func NewParser(options ...Option) *Parser {
// RepoPath parses the output of the `git log` command for the `source` path.
// The Diff chan will return diffs in the order they are parsed from the log.
func (c *Parser) RepoPath(ctx context.Context, source string, head string, abbreviatedLog bool, excludedGlobs []string, isBare bool) (chan *Diff, error) {
args := []string{"-C", source, "log", "-p", "--full-history", "--date=format:%a %b %d %H:%M:%S %Y %z"}
args := []string{
"-C", source,
"log",
"--patch", // https://git-scm.com/docs/git-log#Documentation/git-log.txt---patch
"--full-history",
"--date=format:%a %b %d %H:%M:%S %Y %z",
"--pretty=fuller", // https://git-scm.com/docs/git-log#_pretty_formats
"--notes", // https://git-scm.com/docs/git-log#Documentation/git-log.txt---notesltrefgt
}
if abbreviatedLog {
args = append(args, "--diff-filter=AM")
}
@ -373,16 +392,23 @@ func (c *Parser) FromReader(ctx context.Context, stdOut io.Reader, diffChan chan
latestState = MergeLine
case isAuthorLine(isStaged, latestState, line):
latestState = AuthorLine
currentCommit.Author = strings.TrimRight(string(line[8:]), "\n")
currentCommit.Author = strings.TrimSpace(string(line[8:]))
case isAuthorDateLine(isStaged, latestState, line):
latestState = AuthorDateLine
case isDateLine(isStaged, latestState, line):
latestState = DateLine
date, err := time.Parse(c.dateFormat, strings.TrimSpace(string(line[6:])))
date, err := time.Parse(c.dateFormat, strings.TrimSpace(string(line[12:])))
if err != nil {
ctx.Logger().V(2).Info("Could not parse date from git stream.", "error", err)
ctx.Logger().Error(err, "failed to parse commit date", "commit", currentCommit.Hash, "latestState", latestState.String())
latestState = ParseFailure
continue
}
currentCommit.Date = date
case isCommitterLine(isStaged, latestState, line):
latestState = CommitterLine
currentCommit.Committer = strings.TrimSpace(string(line[8:]))
case isCommitterDateLine(isStaged, latestState, line):
latestState = CommitterDateLine
// NoOp
case isMessageStartLine(isStaged, latestState, line):
latestState = MessageStartLine
// NoOp
@ -393,6 +419,17 @@ func (c *Parser) FromReader(ctx context.Context, stdOut io.Reader, diffChan chan
case isMessageEndLine(isStaged, latestState, line):
latestState = MessageEndLine
// NoOp
case isNotesStartLine(isStaged, latestState, line):
latestState = NotesStartLine
currentCommit.Message.WriteString("\n")
currentCommit.Message.Write(line)
case isNotesLine(isStaged, latestState, line):
latestState = NotesLine
currentCommit.Message.Write(line[4:]) // Notes are indented by 4 spaces.
case isNotesEndLine(isStaged, latestState, line):
latestState = NotesEndLine
// NoOp
case isDiffLine(isStaged, latestState, line):
latestState = DiffLine
@ -577,20 +614,42 @@ func isAuthorLine(isStaged bool, latestState ParseState, line []byte) bool {
return false
}
// Date: Tue Aug 10 15:20:40 2021 +0100
func isDateLine(isStaged bool, latestState ParseState, line []byte) bool {
// AuthorDate: Tue Aug 10 15:20:40 2021 +0100
func isAuthorDateLine(isStaged bool, latestState ParseState, line []byte) bool {
if isStaged || latestState != AuthorLine {
return false
}
if len(line) > 7 && bytes.Equal(line[:5], []byte("Date:")) {
if len(line) > 10 && bytes.Equal(line[:11], []byte("AuthorDate:")) {
return true
}
return false
}
// Line directly after Date with only a newline.
// Commit: Bill Rich <bill.rich@trufflesec.com>
func isCommitterLine(isStaged bool, latestState ParseState, line []byte) bool {
if isStaged || latestState != AuthorDateLine {
return false
}
if len(line) > 8 && bytes.Equal(line[:7], []byte("Commit:")) {
return true
}
return false
}
// CommitDate: Wed Apr 17 19:59:28 2024 -0400
func isCommitterDateLine(isStaged bool, latestState ParseState, line []byte) bool {
if isStaged || latestState != CommitterLine {
return false
}
if len(line) > 10 && bytes.Equal(line[:11], []byte("CommitDate:")) {
return true
}
return false
}
// Line directly after CommitterDate with only a newline.
func isMessageStartLine(isStaged bool, latestState ParseState, line []byte) bool {
if isStaged || latestState != DateLine {
if isStaged || latestState != CommitterDateLine {
return false
}
// TODO: Improve the implementation of this and isMessageEndLine
@ -622,15 +681,51 @@ func isMessageEndLine(isStaged bool, latestState ParseState, line []byte) bool {
return false
}
// `Notes:` or `Notes (context):`
// See https://tylercipriani.com/blog/2022/11/19/git-notes-gits-coolest-most-unloved-feature/
func isNotesStartLine(isStaged bool, latestState ParseState, line []byte) bool {
if isStaged || latestState != MessageEndLine {
return false
}
if len(line) > 5 && bytes.Equal(line[:5], []byte("Notes")) {
return true
}
return false
}
// Line after NotesStartLine that starts with 4 spaces
func isNotesLine(isStaged bool, latestState ParseState, line []byte) bool {
if isStaged || !(latestState == NotesStartLine || latestState == NotesLine) {
return false
}
if len(line) > 4 && bytes.Equal(line[:4], []byte(" ")) {
return true
}
return false
}
// Line directly after NotesLine with only a newline.
func isNotesEndLine(isStaged bool, latestState ParseState, line []byte) bool {
if isStaged || latestState != NotesLine {
return false
}
if len(strings.TrimRight(string(line[:]), "\r\n")) == 0 {
return true
}
return false
}
// diff --git a/internal/addrs/move_endpoint_module.go b/internal/addrs/move_endpoint_module.go
func isDiffLine(isStaged bool, latestState ParseState, line []byte) bool {
if !(latestState == MessageStartLine || // Empty commit messages can go from MessageStart->Diff
latestState == MessageEndLine ||
latestState == NotesEndLine ||
latestState == BinaryFileLine ||
latestState == ModeLine ||
latestState == IndexLine ||
latestState == HunkContentLine ||
latestState == ParseFailure) {
if latestState == Initial && !isStaged {
if !(isStaged && latestState == Initial) {
return false
}
}

View file

@ -78,7 +78,7 @@ func TestLineChecksWithStaged(t *testing.T) {
},
fails: []testCaseLine{
{
DateLine,
CommitterDateLine,
[]byte(" Merge pull request #34511 from cescoffier/duplicated-context-doc"),
},
{
@ -111,16 +111,16 @@ func TestLineChecksWithStaged(t *testing.T) {
},
function: isAuthorLine,
},
"dateLine": {
"authorDateLine": {
passes: []testCaseLine{
{
AuthorLine,
[]byte("Date: Tue Jan 18 16:59:18 2022 -0800"),
[]byte("AuthorDate: Tue Jan 18 16:59:18 2022 -0800"),
},
},
fails: []testCaseLine{
{
DateLine,
AuthorDateLine,
[]byte(""),
},
{
@ -128,12 +128,54 @@ func TestLineChecksWithStaged(t *testing.T) {
[]byte("notcorrect"),
},
},
function: isDateLine,
function: isAuthorDateLine,
},
"committerLine": {
passes: []testCaseLine{
{
AuthorDateLine,
[]byte("Commit: Zachary Rice <zachary.rice@trufflesec.com>"),
},
{
AuthorDateLine,
[]byte("Commit: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>"),
},
},
fails: []testCaseLine{
{
CommitLine,
[]byte("Date: Tue Jun 20 13:55:31 2023 -0500"),
},
{
AuthorLine,
[]byte("Author: Bill Rich <bill.rich@trufflesec.com>"),
},
},
function: isCommitterLine,
},
"committerDateLine": {
passes: []testCaseLine{
{
CommitterLine,
[]byte("CommitDate: Tue Jan 18 16:59:18 2022 -0800"),
},
},
fails: []testCaseLine{
{
CommitterDateLine,
[]byte(""),
},
{
CommitterLine,
[]byte("notcorrect"),
},
},
function: isCommitterDateLine,
},
"messageStartLine": {
passes: []testCaseLine{
{
DateLine,
CommitterDateLine,
[]byte(""),
},
},
@ -143,7 +185,7 @@ func TestLineChecksWithStaged(t *testing.T) {
[]byte("Date: Tue Jun 20 13:21:19 2023 -0700"),
},
{
DateLine,
CommitterDateLine,
[]byte("notcorrect"),
},
},
@ -166,7 +208,7 @@ func TestLineChecksWithStaged(t *testing.T) {
[]byte("Date: Tue Jun 20 13:21:19 2023 -0700"),
},
{
DateLine,
CommitterDateLine,
[]byte("notcorrect"),
},
},
@ -191,6 +233,67 @@ func TestLineChecksWithStaged(t *testing.T) {
},
function: isMessageEndLine,
},
"notesStartLine": {
passes: []testCaseLine{
{
MessageEndLine,
[]byte("Notes:"),
},
{
MessageEndLine,
[]byte("Notes (review):"),
},
},
fails: []testCaseLine{
{
MessageStartLine,
[]byte(""),
},
{
MessageEndLine,
[]byte("notcorrect"),
},
},
function: isNotesStartLine,
},
"notesLine": {
passes: []testCaseLine{
{
NotesStartLine,
[]byte(" Submitted-by: Random J Developer <random@developer.example.org>"),
},
},
fails: []testCaseLine{
{
MessageEndLine,
[]byte(""),
},
{
MessageEndLine,
[]byte("notcorrect"),
},
},
function: isNotesLine,
},
"notesEndLine": {
passes: []testCaseLine{
{
NotesLine,
[]byte("\n"),
},
},
fails: []testCaseLine{
{
MessageEndLine,
[]byte("\n"),
},
{
NotesLine,
[]byte("notcorrect"),
},
},
function: isNotesEndLine,
},
"diffLine": {
passes: []testCaseLine{
{
@ -201,6 +304,10 @@ func TestLineChecksWithStaged(t *testing.T) {
MessageEndLine,
[]byte("diff --git a/ Lunch and Learn - HCDiag.pdf b/ Lunch and Learn - HCDiag.pdf"),
},
{
NotesEndLine,
[]byte("diff --git \"a/one.txt\" \"b/one.txt\""),
},
{
BinaryFileLine,
[]byte("diff --git a/pkg/decoders/utf16_test.go b/pkg/decoders/utf16_test.go"),
@ -209,16 +316,24 @@ func TestLineChecksWithStaged(t *testing.T) {
HunkContentLine,
[]byte("diff --git a/pkg/decoders/utf8.go b/pkg/decoders/utf8.go"),
},
{
ModeLine,
[]byte("diff --git a/pkg/decoders/utf8.go b/pkg/decoders/utf8.go"),
},
},
fails: []testCaseLine{
{
DateLine,
CommitterDateLine,
[]byte(" Make trace error message so newlines aren't escaped (#1396)"),
},
{
MessageLine,
[]byte("notcorrect"),
},
{
NotesLine,
[]byte("diff --git a/pkg/decoders/utf8.go b/pkg/decoders/utf8.go"),
},
},
function: isDiffLine,
},
@ -930,6 +1045,7 @@ func TestCommitParseFailureRecovery(t *testing.T) {
Commit: &Commit{
Hash: "df393b4125c2aa217211b2429b8963d0cefcee27",
Author: "Stephen <stephen@egroat.com>",
Committer: "Stephen <stephen@egroat.com>",
Date: newTime("Wed Dec 06 14:44:41 2017 -0800"),
Message: newStringBuilderValue("Add travis testing\n"),
},
@ -940,6 +1056,7 @@ func TestCommitParseFailureRecovery(t *testing.T) {
Commit: &Commit{
Hash: "3d76a97faad96e0f326afb61c232b9c2a18dca35",
Author: "John Smith <john.smith@example.com>",
Committer: "John Smith <john.smith@example.com>",
Date: newTime("Tue Jul 11 18:03:54 2023 -0400"),
Message: strings.Builder{},
},
@ -950,6 +1067,7 @@ func TestCommitParseFailureRecovery(t *testing.T) {
Commit: &Commit{
Hash: "7bd16429f1f708746dabf970e54b05d2b4734997",
Author: "John Smith <john.smith@example.com>",
Committer: "John Smith <john.smith@example.com>",
Date: newTime("Tue Jul 11 18:10:49 2023 -0400"),
Message: newStringBuilderValue("Change file\n"),
},
@ -981,6 +1099,7 @@ func TestCommitParseFailureRecoveryBufferedFileWriter(t *testing.T) {
Commit: &Commit{
Hash: "df393b4125c2aa217211b2429b8963d0cefcee27",
Author: "Stephen <stephen@egroat.com>",
Committer: "Stephen <stephen@egroat.com>",
Date: newTime("Wed Dec 06 14:44:41 2017 -0800"),
Message: newStringBuilderValue("Add travis testing\n"),
},
@ -991,6 +1110,7 @@ func TestCommitParseFailureRecoveryBufferedFileWriter(t *testing.T) {
Commit: &Commit{
Hash: "3d76a97faad96e0f326afb61c232b9c2a18dca35",
Author: "John Smith <john.smith@example.com>",
Committer: "John Smith <john.smith@example.com>",
Date: newTime("Tue Jul 11 18:03:54 2023 -0400"),
Message: strings.Builder{},
},
@ -1001,6 +1121,7 @@ func TestCommitParseFailureRecoveryBufferedFileWriter(t *testing.T) {
Commit: &Commit{
Hash: "7bd16429f1f708746dabf970e54b05d2b4734997",
Author: "John Smith <john.smith@example.com>",
Committer: "John Smith <john.smith@example.com>",
Date: newTime("Tue Jul 11 18:10:49 2023 -0400"),
Message: newStringBuilderValue("Change file\n"),
},
@ -1031,7 +1152,9 @@ func TestCommitParseFailureRecoveryBufferedFileWriter(t *testing.T) {
const recoverableCommits = `commit df393b4125c2aa217211b2429b8963d0cefcee27
Author: Stephen <stephen@egroat.com>
Date: Wed Dec 06 14:44:41 2017 -0800
AuthorDate: Wed Dec 06 14:44:41 2017 -0800
Commit: Stephen <stephen@egroat.com>
CommitDate: Wed Dec 06 14:44:41 2017 -0800
Add travis testing
@ -1071,7 +1194,9 @@ index 00000000..e69de29b
commit 3d76a97faad96e0f326afb61c232b9c2a18dca35 (HEAD -> master)
Author: John Smith <john.smith@example.com>
Date: Tue Jul 11 18:03:54 2023 -0400
AuthorDate: Tue Jul 11 18:03:54 2023 -0400
Commit: John Smith <john.smith@example.com>
CommitDate: Tue Jul 11 18:03:54 2023 -0400
diff --git a/sample.txt b/sample.txt
new file mode 100644
@ -1083,7 +1208,9 @@ index 0000000..af5626b
commit 7bd16429f1f708746dabf970e54b05d2b4734997 (HEAD -> master)
Author: John Smith <john.smith@example.com>
Date: Tue Jul 11 18:10:49 2023 -0400
AuthorDate: Tue Jul 11 18:10:49 2023 -0400
Commit: John Smith <john.smith@example.com>
CommitDate: Tue Jul 11 18:10:49 2023 -0400
Change file
@ -1357,7 +1484,9 @@ func TestMaxCommitSize(t *testing.T) {
const commitLog = `commit e50b135fd29e91b2fbb25923797f5ecffe59f359
Author: lionzxy <nikita@kulikof.ru>
Date: Wed Mar 1 18:20:04 2017 +0300
AuthorDate: Wed Mar 1 18:20:04 2017 +0300
Commit: lionzxy <nikita@kulikof.ru>
CommitDate: Wed Mar 1 18:20:04 2017 +0300
Все работает, но он не принимает :(
@ -1379,10 +1508,15 @@ index 85bfb17..89b08b5 100644
commit fd6e99e7a80199b76a694603be57c5ade1de18e7
Author: Jaliborc <jaliborc@gmail.com>
Date: Mon Apr 25 16:28:06 2011 +0100
AuthorDate: Mon Apr 25 16:28:06 2011 +0100
Commit: Jaliborc <jaliborc@gmail.com>
CommitDate: Mon Apr 25 16:28:06 2011 +0100
Added Unusable coloring
Notes:
Message-Id: <1264640755-22447-1-git-send-email-user@example.de>
diff --git a/components/item.lua b/components/item.lua
index fc74534..f8d7d50 100755
--- a/components/item.lua
@ -1417,17 +1551,23 @@ new file mode 160000
commit 4727ffb7ad6dc5130bf4b4dd166e00705abdd018 (HEAD -> master)
Author: John Smith <john.smith@example.com>
Date: Tue Jul 11 22:26:11 2023 -0400
AuthorDate: Tue Jul 11 22:26:11 2023 -0400
Commit: John Smith <john.smith@example.com>
CommitDate: Tue Jul 11 22:26:11 2023 -0400
commit c904e0f5cd9f30ae520c66bd5f70806219fe7ca2 (HEAD -> master)
Author: John Smith <john.smith@example.com>
Date: Mon Jul 10 10:17:11 2023 -0400
AuthorDate: Mon Jul 10 10:17:11 2023 -0400
Commit: John Smith <john.smith@example.com>
CommitDate: Mon Jul 10 10:17:11 2023 -0400
Empty Commit
commit 3d76a97faad96e0f326afb61c232b9c2a18dca35 (HEAD -> master)
Author: John Smith <john.smith@example.com>
Date: Tue Jul 11 18:03:54 2023 -0400
AuthorDate: Tue Jul 11 18:03:54 2023 -0400
Commit: John Smith <john.smith@example.com>
CommitDate: Tue Jul 11 18:03:54 2023 -0400
diff --git a/sample.txt b/sample.txt
new file mode 100644
@ -1439,7 +1579,9 @@ index 0000000..af5626b
commit df393b4125c2aa217211b2429b8963d0cefcee27
Author: Stephen <stephen@egroat.com>
Date: Wed Dec 06 14:44:41 2017 -0800
AuthorDate: Wed Dec 06 14:44:41 2017 -0800
Commit: Stephen <stephen@egroat.com>
CommitDate: Wed Dec 06 14:44:41 2017 -0800
Add travis testing
@ -1479,7 +1621,9 @@ index 00000000..e69de29b
commit 4218c39d99b5f30153f62471c1be1c1596f0a4d4
Author: Dustin Decker <dustin@trufflesec.com>
Date: Thu Jan 13 12:02:24 2022 -0800
AuthorDate: Thu Jan 13 12:02:24 2022 -0800
Commit: Dustin Decker <dustin@trufflesec.com>
CommitDate: Thu Jan 13 12:02:24 2022 -0800
Initial CLI w/ partially implemented Git source and demo detector (#1)
@ -1535,7 +1679,9 @@ index 00000000..7fb2f73c
commit 934cf5d255fd8e28b33f5a6ba64276caf0b284bf (HEAD -> master)
Author: John Smith <john.smith@example.com>
Date: Tue Jul 11 18:43:22 2023 -0400
AuthorDate: Tue Jul 11 18:43:22 2023 -0400
Commit: John Smith <john.smith@example.com>
CommitDate: Tue Jul 11 18:43:22 2023 -0400
Test toFile/plusLine parsing
@ -1551,7 +1697,9 @@ index 0000000..451be67
commit 2a5d703b02b52d65c65ee9f7928f158b919ab741
Author: Sergey Beryozkin <sberyozkin@gmail.com>
Date: Fri Jul 7 17:44:26 2023 +0100
AuthorDate: Fri Jul 7 17:44:26 2023 +0100
Commit: Sergey Beryozkin <sberyozkin@gmail.com>
CommitDate: Fri Jul 7 17:44:26 2023 +0100
Do not refresh OIDC session if the user is requesting logout
@ -1659,7 +1807,9 @@ index 51e1b9a932d..472c2743bc4 100644
commit 2a057632d7f5fa3d1c77b9aa037263211c0e0290
Author: rjtmahinay <rjt.mahinay@gmail.com>
Date: Mon Jul 10 01:22:32 2023 +0800
AuthorDate: Mon Jul 10 01:22:32 2023 +0800
Commit: rjtmahinay <rjt.mahinay@gmail.com>
CommitDate: Mon Jul 10 01:22:32 2023 +0800
Add QuarkusApplication javadoc
@ -1677,7 +1827,9 @@ index 350685123d5..87d2220eb98 100644
commit bca2d17491015ea1522f34517223b5a366aea73c (HEAD -> master)
Author: John Smith <john.smith@example.com>
Date: Tue Jul 11 18:12:21 2023 -0400
AuthorDate: Tue Jul 11 18:12:21 2023 -0400
Commit: John Smith <john.smith@example.com>
CommitDate: Tue Jul 11 18:12:21 2023 -0400
Delete binary file
@ -1688,7 +1840,9 @@ Binary files a/trufflehog_3.42.0_linux_arm64.tar.gz and /dev/null differ
commit afc6dc5d47f28366638da877ecb6b819c69e659b
Author: John Smith <john.smith@example.com>
Date: Mon Jul 10 12:21:33 2023 -0400
AuthorDate: Mon Jul 10 12:21:33 2023 -0400
Commit: John Smith <john.smith@example.com>
CommitDate: Mon Jul 10 12:21:33 2023 -0400
Change binary file
@ -1698,7 +1852,9 @@ Binary files a/trufflehog_3.42.0_linux_arm64.tar.gz and b/trufflehog_3.42.0_linu
commit 638595917417c5c8a956937b28c5127719023363
Author: John Smith <john.smith@example.com>
Date: Mon Jul 10 12:20:35 2023 -0400
AuthorDate: Mon Jul 10 12:20:35 2023 -0400
Commit: John Smith <john.smith@example.com>
CommitDate: Mon Jul 10 12:20:35 2023 -0400
Add binary file
@ -1709,7 +1865,9 @@ Binary files /dev/null and b/trufflehog_3.42.0_linux_arm64.tar.gz differ
commit ce0f5d1fe0272f180ccb660196f439c0c2f4ec8e (HEAD -> master)
Author: John Smith <john.smith@example.com>
Date: Tue Jul 11 18:08:52 2023 -0400
AuthorDate: Tue Jul 11 18:08:52 2023 -0400
Commit: John Smith <john.smith@example.com>
CommitDate: Tue Jul 11 18:08:52 2023 -0400
Delete file
@ -1733,7 +1891,9 @@ index 635ef2c..0000000
commit d606a729383371558473b70a6a7b1ca264b0d205
Author: John Smith <john.smith@example.com>
Date: Mon Jul 10 14:17:04 2023 -0400
AuthorDate: Mon Jul 10 14:17:04 2023 -0400
Commit: John Smith <john.smith@example.com>
CommitDate: Mon Jul 10 14:17:04 2023 -0400
Rename file
@ -1744,7 +1904,9 @@ rename to tzu.txt
commit 7bd16429f1f708746dabf970e54b05d2b4734997 (HEAD -> master)
Author: John Smith <john.smith@example.com>
Date: Tue Jul 11 18:10:49 2023 -0400
AuthorDate: Tue Jul 11 18:10:49 2023 -0400
Commit: John Smith <john.smith@example.com>
CommitDate: Tue Jul 11 18:10:49 2023 -0400
Change file
@ -1761,7 +1923,9 @@ index 5af88a8..c729cdb 100644
commit c7062674c17192caa284615ab2fa9778c6602164 (HEAD -> master)
Author: John Smith <john.smith@example.com>
Date: Mon Jul 10 10:15:18 2023 -0400
AuthorDate: Mon Jul 10 10:15:18 2023 -0400
Commit: John Smith <john.smith@example.com>
CommitDate: Mon Jul 10 10:15:18 2023 -0400
Create files
@ -1823,6 +1987,7 @@ func expectedDiffs() []*Diff {
Commit: &Commit{
Hash: "e50b135fd29e91b2fbb25923797f5ecffe59f359",
Author: "lionzxy <nikita@kulikof.ru>",
Committer: "lionzxy <nikita@kulikof.ru>",
Date: newTime("Wed Mar 1 18:20:04 2017 +0300"),
Message: newStringBuilderValue("Все работает, но он не принимает :(\n"),
},
@ -1835,8 +2000,9 @@ func expectedDiffs() []*Diff {
Commit: &Commit{
Hash: "fd6e99e7a80199b76a694603be57c5ade1de18e7",
Author: "Jaliborc <jaliborc@gmail.com>",
Committer: "Jaliborc <jaliborc@gmail.com>",
Date: newTime("Mon Apr 25 16:28:06 2011 +0100"),
Message: newStringBuilderValue("Added Unusable coloring\n"),
Message: newStringBuilderValue("Added Unusable coloring\n\nNotes:\nMessage-Id: <1264640755-22447-1-git-send-email-user@example.de>\n"),
},
contentWriter: newBufferWithContent([]byte("\n\nlocal Unfit = LibStub('Unfit-1.0')\n\n\n")),
IsBinary: false,
@ -1848,8 +2014,9 @@ func expectedDiffs() []*Diff {
Commit: &Commit{
Hash: "fd6e99e7a80199b76a694603be57c5ade1de18e7",
Author: "Jaliborc <jaliborc@gmail.com>",
Committer: "Jaliborc <jaliborc@gmail.com>",
Date: newTime("Mon Apr 25 16:28:06 2011 +0100"),
Message: newStringBuilderValue("Added Unusable coloring\n"),
Message: newStringBuilderValue("Added Unusable coloring\n\nNotes:\nMessage-Id: <1264640755-22447-1-git-send-email-user@example.de>\n"),
},
IsBinary: false,
},
@ -1860,8 +2027,9 @@ func expectedDiffs() []*Diff {
Commit: &Commit{
Hash: "fd6e99e7a80199b76a694603be57c5ade1de18e7",
Author: "Jaliborc <jaliborc@gmail.com>",
Committer: "Jaliborc <jaliborc@gmail.com>",
Date: newTime("Mon Apr 25 16:28:06 2011 +0100"),
Message: newStringBuilderValue("Added Unusable coloring\n"),
Message: newStringBuilderValue("Added Unusable coloring\n\nNotes:\nMessage-Id: <1264640755-22447-1-git-send-email-user@example.de>\n"),
},
IsBinary: false,
},
@ -1869,6 +2037,7 @@ func expectedDiffs() []*Diff {
Commit: &Commit{
Hash: "4727ffb7ad6dc5130bf4b4dd166e00705abdd018",
Author: "John Smith <john.smith@example.com>",
Committer: "John Smith <john.smith@example.com>",
Date: newTime("Tue Jul 11 22:26:11 2023 -0400"),
Message: strings.Builder{},
},
@ -1877,6 +2046,7 @@ func expectedDiffs() []*Diff {
Commit: &Commit{
Hash: "c904e0f5cd9f30ae520c66bd5f70806219fe7ca2",
Author: "John Smith <john.smith@example.com>",
Committer: "John Smith <john.smith@example.com>",
Date: newTime("Mon Jul 10 10:17:11 2023 -0400"),
Message: newStringBuilderValue("Empty Commit\n"),
},
@ -1888,6 +2058,7 @@ func expectedDiffs() []*Diff {
Commit: &Commit{
Hash: "3d76a97faad96e0f326afb61c232b9c2a18dca35",
Author: "John Smith <john.smith@example.com>",
Committer: "John Smith <john.smith@example.com>",
Date: newTime("Tue Jul 11 18:03:54 2023 -0400"),
Message: strings.Builder{},
},
@ -1900,6 +2071,7 @@ func expectedDiffs() []*Diff {
Commit: &Commit{
Hash: "df393b4125c2aa217211b2429b8963d0cefcee27",
Author: "Stephen <stephen@egroat.com>",
Committer: "Stephen <stephen@egroat.com>",
Date: newTime("Wed Dec 06 14:44:41 2017 -0800"),
Message: newStringBuilderValue("Add travis testing\n"),
},
@ -1911,6 +2083,7 @@ func expectedDiffs() []*Diff {
Commit: &Commit{
Hash: "df393b4125c2aa217211b2429b8963d0cefcee27",
Author: "Stephen <stephen@egroat.com>",
Committer: "Stephen <stephen@egroat.com>",
Date: newTime("Wed Dec 06 14:44:41 2017 -0800"),
Message: newStringBuilderValue("Add travis testing\n"),
},
@ -1936,6 +2109,7 @@ python:
Commit: &Commit{
Hash: "4218c39d99b5f30153f62471c1be1c1596f0a4d4",
Author: "Dustin Decker <dustin@trufflesec.com>",
Committer: "Dustin Decker <dustin@trufflesec.com>",
Date: newTime("Thu Jan 13 12:02:24 2022 -0800"),
Message: newStringBuilderValue("Initial CLI w/ partially implemented Git source and demo detector (#1)\n"),
},
@ -1980,6 +2154,7 @@ protos:
Commit: &Commit{
Hash: "934cf5d255fd8e28b33f5a6ba64276caf0b284bf",
Author: "John Smith <john.smith@example.com>",
Committer: "John Smith <john.smith@example.com>",
Date: newTime("Tue Jul 11 18:43:22 2023 -0400"),
Message: newStringBuilderValue("Test toFile/plusLine parsing\n"),
},
@ -1992,6 +2167,7 @@ protos:
Commit: &Commit{
Hash: "2a5d703b02b52d65c65ee9f7928f158b919ab741",
Author: "Sergey Beryozkin <sberyozkin@gmail.com>",
Committer: "Sergey Beryozkin <sberyozkin@gmail.com>",
Date: newTime("Fri Jul 7 17:44:26 2023 +0100"),
Message: newStringBuilderValue("Do not refresh OIDC session if the user is requesting logout\n"),
},
@ -2004,6 +2180,7 @@ protos:
Commit: &Commit{
Hash: "2a5d703b02b52d65c65ee9f7928f158b919ab741",
Author: "Sergey Beryozkin <sberyozkin@gmail.com>",
Committer: "Sergey Beryozkin <sberyozkin@gmail.com>",
Date: newTime("Fri Jul 7 17:44:26 2023 +0100"),
Message: newStringBuilderValue("Do not refresh OIDC session if the user is requesting logout\n"),
},
@ -2016,6 +2193,7 @@ protos:
Commit: &Commit{
Hash: "2a5d703b02b52d65c65ee9f7928f158b919ab741",
Author: "Sergey Beryozkin <sberyozkin@gmail.com>",
Committer: "Sergey Beryozkin <sberyozkin@gmail.com>",
Date: newTime("Fri Jul 7 17:44:26 2023 +0100"),
Message: newStringBuilderValue("Do not refresh OIDC session if the user is requesting logout\n"),
},
@ -2028,6 +2206,7 @@ protos:
Commit: &Commit{
Hash: "2a5d703b02b52d65c65ee9f7928f158b919ab741",
Author: "Sergey Beryozkin <sberyozkin@gmail.com>",
Committer: "Sergey Beryozkin <sberyozkin@gmail.com>",
Date: newTime("Fri Jul 7 17:44:26 2023 +0100"),
Message: newStringBuilderValue("Do not refresh OIDC session if the user is requesting logout\n"),
},
@ -2041,6 +2220,7 @@ protos:
Commit: &Commit{
Hash: "2a5d703b02b52d65c65ee9f7928f158b919ab741",
Author: "Sergey Beryozkin <sberyozkin@gmail.com>",
Committer: "Sergey Beryozkin <sberyozkin@gmail.com>",
Date: newTime("Fri Jul 7 17:44:26 2023 +0100"),
Message: newStringBuilderValue("Do not refresh OIDC session if the user is requesting logout\n"),
},
@ -2053,6 +2233,7 @@ protos:
Commit: &Commit{
Hash: "2a5d703b02b52d65c65ee9f7928f158b919ab741",
Author: "Sergey Beryozkin <sberyozkin@gmail.com>",
Committer: "Sergey Beryozkin <sberyozkin@gmail.com>",
Date: newTime("Fri Jul 7 17:44:26 2023 +0100"),
Message: newStringBuilderValue("Do not refresh OIDC session if the user is requesting logout\n"),
},
@ -2065,6 +2246,7 @@ protos:
Commit: &Commit{
Hash: "2a057632d7f5fa3d1c77b9aa037263211c0e0290",
Author: "rjtmahinay <rjt.mahinay@gmail.com>",
Committer: "rjtmahinay <rjt.mahinay@gmail.com>",
Date: newTime("Mon Jul 10 01:22:32 2023 +0800"),
Message: newStringBuilderValue("Add QuarkusApplication javadoc\n\n* Fix #34463\n"),
},
@ -2075,6 +2257,7 @@ protos:
Commit: &Commit{
Hash: "bca2d17491015ea1522f34517223b5a366aea73c",
Author: "John Smith <john.smith@example.com>",
Committer: "John Smith <john.smith@example.com>",
Date: newTime("Tue Jul 11 18:12:21 2023 -0400"),
Message: newStringBuilderValue("Delete binary file\n"),
},
@ -2084,6 +2267,7 @@ protos:
Commit: &Commit{
Hash: "afc6dc5d47f28366638da877ecb6b819c69e659b",
Author: "John Smith <john.smith@example.com>",
Committer: "John Smith <john.smith@example.com>",
Date: newTime("Mon Jul 10 12:21:33 2023 -0400"),
Message: newStringBuilderValue("Change binary file\n"),
},
@ -2095,6 +2279,7 @@ protos:
Commit: &Commit{
Hash: "638595917417c5c8a956937b28c5127719023363",
Author: "John Smith <john.smith@example.com>",
Committer: "John Smith <john.smith@example.com>",
Date: newTime("Mon Jul 10 12:20:35 2023 -0400"),
Message: newStringBuilderValue("Add binary file\n"),
},
@ -2105,6 +2290,7 @@ protos:
Commit: &Commit{
Hash: "ce0f5d1fe0272f180ccb660196f439c0c2f4ec8e",
Author: "John Smith <john.smith@example.com>",
Committer: "John Smith <john.smith@example.com>",
Date: newTime("Tue Jul 11 18:08:52 2023 -0400"),
Message: newStringBuilderValue("Delete file\n"),
},
@ -2113,6 +2299,7 @@ protos:
Commit: &Commit{
Hash: "d606a729383371558473b70a6a7b1ca264b0d205",
Author: "John Smith <john.smith@example.com>",
Committer: "John Smith <john.smith@example.com>",
Date: newTime("Mon Jul 10 14:17:04 2023 -0400"),
Message: newStringBuilderValue("Rename file\n"),
},
@ -2123,6 +2310,7 @@ protos:
Commit: &Commit{
Hash: "7bd16429f1f708746dabf970e54b05d2b4734997",
Author: "John Smith <john.smith@example.com>",
Committer: "John Smith <john.smith@example.com>",
Date: newTime("Tue Jul 11 18:10:49 2023 -0400"),
Message: newStringBuilderValue("Change file\n"),
},
@ -2135,6 +2323,7 @@ protos:
Commit: &Commit{
Hash: "c7062674c17192caa284615ab2fa9778c6602164",
Author: "John Smith <john.smith@example.com>",
Committer: "John Smith <john.smith@example.com>",
Date: newTime("Mon Jul 10 10:15:18 2023 -0400"),
Message: newStringBuilderValue("Create files\n"),
},
@ -2147,6 +2336,7 @@ protos:
Commit: &Commit{
Hash: "c7062674c17192caa284615ab2fa9778c6602164",
Author: "John Smith <john.smith@example.com>",
Committer: "John Smith <john.smith@example.com>",
Date: newTime("Mon Jul 10 10:15:18 2023 -0400"),
Message: newStringBuilderValue("Create files\n"),
},
@ -2283,7 +2473,9 @@ index 0000000..5af88a8
const singleCommitMultiDiff = `commit 70001020fab32b1fcf2f1f0e5c66424eae649826 (HEAD -> master, origin/master, origin/HEAD)
Author: Dustin Decker <humanatcomputer@gmail.com>
Date: Mon Mar 15 23:27:16 2021 -0700
AuthorDate: Mon Mar 15 23:27:16 2021 -0700
Commit: Dustin Decker <humanatcomputer@gmail.com>
CommitDate: Mon Mar 15 23:27:16 2021 -0700
Update aws
@ -2324,7 +2516,9 @@ index 239b415..2ee133b 100644
const singleCommitSingleDiff = `commit 70001020fab32b1fcf2f1f0e5c66424eae649826 (HEAD -> master, origin/master, origin/HEAD)
Author: Dustin Decker <humanatcomputer@gmail.com>
Date: Mon Mar 15 23:27:16 2021 -0700
AuthorDate: Mon Mar 15 23:27:16 2021 -0700
Commit: Dustin Decker <humanatcomputer@gmail.com>
CommitDate: Mon Mar 15 23:27:16 2021 -0700
Update aws

View file

@ -562,29 +562,55 @@ func (s *Git) ScanCommits(ctx context.Context, repo *git.Repository, path string
break
}
fullHash := diff.Commit.Hash
commit := diff.Commit
fullHash := commit.Hash
if scanOptions.BaseHash != "" && scanOptions.BaseHash == fullHash {
logger.V(1).Info("reached base commit", "commit", fullHash)
break
}
email := commit.Author
when := commit.Date.UTC().Format("2006-01-02 15:04:05 -0700")
if fullHash != lastCommitHash {
depth++
lastCommitHash = fullHash
atomic.AddUint64(&s.metrics.commitsScanned, 1)
logger.V(5).Info("scanning commit", "commit", fullHash)
}
if !scanOptions.Filter.Pass(diff.PathB) {
continue
// Scan the commit metadata.
// See https://github.com/trufflesecurity/trufflehog/issues/2683
var (
metadata = s.sourceMetadataFunc("", email, fullHash, when, remoteURL, 0)
sb strings.Builder
)
sb.WriteString(email)
sb.WriteString("\n")
sb.WriteString(commit.Committer)
sb.WriteString("\n")
sb.WriteString(commit.Message.String())
chunk := sources.Chunk{
SourceName: s.sourceName,
SourceID: s.sourceID,
JobID: s.jobID,
SourceType: s.sourceType,
SourceMetadata: metadata,
Data: []byte(sb.String()),
Verify: s.verify,
}
if err := reporter.ChunkOk(ctx, chunk); err != nil {
return err
}
}
fileName := diff.PathB
if fileName == "" {
continue
}
email := diff.Commit.Author
when := diff.Commit.Date.UTC().Format("2006-01-02 15:04:05 -0700")
if !scanOptions.Filter.Pass(fileName) {
continue
}
// Handle binary files by reading the entire file rather than using the diff.
if diff.IsBinary {