2022-08-23 20:29:20 +00:00
|
|
|
package gitparse
|
|
|
|
|
|
|
|
import (
|
|
|
|
"bufio"
|
|
|
|
"bytes"
|
|
|
|
"fmt"
|
2022-09-02 18:02:38 +00:00
|
|
|
"io"
|
2023-08-03 16:23:41 +00:00
|
|
|
"os"
|
2022-08-23 20:29:20 +00:00
|
|
|
"os/exec"
|
|
|
|
"path/filepath"
|
|
|
|
"strconv"
|
|
|
|
"strings"
|
|
|
|
"time"
|
|
|
|
|
2024-01-30 20:30:51 +00:00
|
|
|
"github.com/go-logr/logr"
|
|
|
|
|
2022-08-29 18:45:37 +00:00
|
|
|
"github.com/trufflesecurity/trufflehog/v3/pkg/common"
|
|
|
|
"github.com/trufflesecurity/trufflehog/v3/pkg/context"
|
2024-02-15 19:40:34 +00:00
|
|
|
bufferwriter "github.com/trufflesecurity/trufflehog/v3/pkg/writers/buffer_writer"
|
2024-02-05 18:43:55 +00:00
|
|
|
bufferedfilewriter "github.com/trufflesecurity/trufflehog/v3/pkg/writers/buffered_file_writer"
|
2022-08-23 20:29:20 +00:00
|
|
|
)
|
|
|
|
|
2023-01-23 18:14:10 +00:00
|
|
|
const (
|
2023-02-04 21:19:23 +00:00
|
|
|
// defaultDateFormat is the standard date format for git.
|
2024-04-29 20:58:45 +00:00
|
|
|
defaultDateFormat = "Mon Jan 2 15:04:05 2006 -0700"
|
2023-01-23 18:14:10 +00:00
|
|
|
|
2023-02-04 21:19:23 +00:00
|
|
|
// defaultMaxDiffSize is the maximum size for a diff. Larger diffs will be cut off.
|
2024-01-30 20:30:51 +00:00
|
|
|
defaultMaxDiffSize = 2 * 1024 * 1024 * 1024 // 2GB
|
2023-02-04 21:19:23 +00:00
|
|
|
|
|
|
|
// defaultMaxCommitSize is the maximum size for a commit. Larger commits will be cut off.
|
2024-01-30 20:30:51 +00:00
|
|
|
defaultMaxCommitSize = 2 * 1024 * 1024 * 1024 // 2GB
|
|
|
|
)
|
|
|
|
|
|
|
|
// contentWriter defines a common interface for writing, reading, and managing diff content.
|
|
|
|
// It abstracts the underlying storage mechanism, allowing flexibility in how content is handled.
|
|
|
|
// This interface enables the use of different content storage strategies (e.g., in-memory buffer, file-based storage)
|
|
|
|
// based on performance needs or resource constraints, providing a unified way to interact with different content types.
|
|
|
|
type contentWriter interface { // Write appends data to the content storage.
|
|
|
|
// Write appends data to the content storage.
|
2024-04-23 15:47:53 +00:00
|
|
|
Write(data []byte) (int, error)
|
2024-01-30 20:30:51 +00:00
|
|
|
// ReadCloser provides a reader for accessing stored content.
|
|
|
|
ReadCloser() (io.ReadCloser, error)
|
|
|
|
// CloseForWriting closes the content storage for writing.
|
|
|
|
CloseForWriting() error
|
|
|
|
// Len returns the current size of the content.
|
|
|
|
Len() int
|
|
|
|
// String returns the content as a string or an error if the content cannot be converted to a string.
|
|
|
|
String() (string, error)
|
|
|
|
}
|
|
|
|
|
|
|
|
// Diff contains the information about a file diff in a commit.
|
|
|
|
// It abstracts the underlying content representation, allowing for flexible handling of diff content.
|
|
|
|
// The use of contentWriter enables the management of diff data either in memory or on disk,
|
|
|
|
// based on its size, optimizing resource usage and performance.
|
|
|
|
type Diff struct {
|
2024-02-05 18:43:55 +00:00
|
|
|
PathB string
|
|
|
|
LineStart int
|
|
|
|
IsBinary bool
|
|
|
|
|
2024-02-06 18:06:10 +00:00
|
|
|
Commit *Commit
|
|
|
|
|
2024-01-30 20:30:51 +00:00
|
|
|
contentWriter contentWriter
|
|
|
|
}
|
|
|
|
|
|
|
|
type diffOption func(*Diff)
|
|
|
|
|
|
|
|
// withPathB sets the PathB option.
|
|
|
|
func withPathB(pathB string) diffOption { return func(d *Diff) { d.PathB = pathB } }
|
|
|
|
|
2024-02-05 18:43:55 +00:00
|
|
|
// withCustomContentWriter sets the useCustomContentWriter option.
|
|
|
|
func withCustomContentWriter(cr contentWriter) diffOption {
|
|
|
|
return func(d *Diff) { d.contentWriter = cr }
|
|
|
|
}
|
|
|
|
|
2024-02-06 18:06:10 +00:00
|
|
|
// newDiff creates a new Diff with a threshold and an associated commit.
|
|
|
|
// All Diffs must have an associated commit.
|
|
|
|
// The contentWriter is used to manage the diff's content, allowing for flexible handling of diff data.
|
|
|
|
// By default, a buffer is used as the contentWriter, but this can be overridden with a custom contentWriter.
|
2024-04-25 15:27:15 +00:00
|
|
|
func newDiff(commit *Commit, opts ...diffOption) *Diff {
|
2024-04-25 15:01:38 +00:00
|
|
|
diff := &Diff{Commit: commit}
|
2024-01-30 20:30:51 +00:00
|
|
|
for _, opt := range opts {
|
|
|
|
opt(diff)
|
|
|
|
}
|
|
|
|
|
2024-04-25 15:01:38 +00:00
|
|
|
if diff.contentWriter == nil {
|
2024-04-25 15:27:15 +00:00
|
|
|
diff.contentWriter = bufferwriter.New()
|
2024-04-25 15:01:38 +00:00
|
|
|
}
|
|
|
|
|
2024-01-30 20:30:51 +00:00
|
|
|
return diff
|
|
|
|
}
|
|
|
|
|
|
|
|
// Len returns the length of the storage.
|
|
|
|
func (d *Diff) Len() int { return d.contentWriter.Len() }
|
|
|
|
|
|
|
|
// ReadCloser returns a ReadCloser for the contentWriter.
|
|
|
|
func (d *Diff) ReadCloser() (io.ReadCloser, error) { return d.contentWriter.ReadCloser() }
|
|
|
|
|
|
|
|
// write delegates to the contentWriter.
|
2024-04-23 15:47:53 +00:00
|
|
|
func (d *Diff) write(p []byte) error {
|
|
|
|
_, err := d.contentWriter.Write(p)
|
2024-01-30 20:30:51 +00:00
|
|
|
return err
|
|
|
|
}
|
|
|
|
|
|
|
|
// finalize ensures proper closure of resources associated with the Diff.
|
|
|
|
// handle the final flush in the finalize method, in case there's data remaining in the buffer.
|
|
|
|
// This method should be called to release resources, especially when writing to a file.
|
2024-04-25 15:27:15 +00:00
|
|
|
func (d *Diff) finalize() error { return d.contentWriter.CloseForWriting() }
|
2024-01-30 20:30:51 +00:00
|
|
|
|
2022-08-23 20:29:20 +00:00
|
|
|
// Commit contains commit header info and diffs.
|
|
|
|
type Commit struct {
|
2024-04-29 20:58:45 +00:00
|
|
|
Hash string
|
|
|
|
Author string
|
|
|
|
Committer string
|
|
|
|
Date time.Time
|
|
|
|
Message strings.Builder
|
|
|
|
Size int // in bytes
|
2024-02-13 15:21:22 +00:00
|
|
|
|
|
|
|
hasDiffs bool
|
2022-08-23 20:29:20 +00:00
|
|
|
}
|
|
|
|
|
2023-02-04 21:19:23 +00:00
|
|
|
// Parser sets values used in GitParse.
|
|
|
|
type Parser struct {
|
|
|
|
maxDiffSize int
|
|
|
|
maxCommitSize int
|
|
|
|
dateFormat string
|
2024-02-05 18:43:55 +00:00
|
|
|
|
|
|
|
useCustomContentWriter bool
|
2023-02-04 21:19:23 +00:00
|
|
|
}
|
|
|
|
|
2023-07-25 22:52:34 +00:00
|
|
|
type ParseState int
|
|
|
|
|
|
|
|
const (
|
|
|
|
Initial ParseState = iota
|
|
|
|
CommitLine
|
2023-07-27 14:24:49 +00:00
|
|
|
MergeLine
|
2023-07-25 22:52:34 +00:00
|
|
|
AuthorLine
|
2024-04-29 20:58:45 +00:00
|
|
|
AuthorDateLine
|
|
|
|
CommitterLine
|
|
|
|
CommitterDateLine
|
2023-07-25 22:52:34 +00:00
|
|
|
MessageStartLine
|
|
|
|
MessageLine
|
|
|
|
MessageEndLine
|
2024-04-29 20:58:45 +00:00
|
|
|
NotesStartLine
|
|
|
|
NotesLine
|
|
|
|
NotesEndLine
|
2023-07-25 22:52:34 +00:00
|
|
|
DiffLine
|
|
|
|
ModeLine
|
|
|
|
IndexLine
|
|
|
|
FromFileLine
|
|
|
|
ToFileLine
|
|
|
|
BinaryFileLine
|
|
|
|
HunkLineNumberLine
|
|
|
|
HunkContentLine
|
|
|
|
ParseFailure
|
|
|
|
)
|
|
|
|
|
|
|
|
func (state ParseState) String() string {
|
|
|
|
return [...]string{
|
|
|
|
"Initial",
|
|
|
|
"CommitLine",
|
2023-07-27 14:24:49 +00:00
|
|
|
"MergeLine",
|
2023-07-25 22:52:34 +00:00
|
|
|
"AuthorLine",
|
2024-04-29 20:58:45 +00:00
|
|
|
"AuthorDateLine",
|
|
|
|
"CommitterLine",
|
|
|
|
"CommitterDateLine",
|
2023-07-25 22:52:34 +00:00
|
|
|
"MessageStartLine",
|
|
|
|
"MessageLine",
|
|
|
|
"MessageEndLine",
|
2024-04-29 20:58:45 +00:00
|
|
|
"NotesStartLine",
|
|
|
|
"NotesLine",
|
|
|
|
"NotesEndLine",
|
2023-07-25 22:52:34 +00:00
|
|
|
"DiffLine",
|
|
|
|
"ModeLine",
|
|
|
|
"IndexLine",
|
|
|
|
"FromFileLine",
|
|
|
|
"ToFileLine",
|
|
|
|
"BinaryFileLine",
|
|
|
|
"HunkLineNumberLine",
|
|
|
|
"HunkContentLine",
|
2024-01-04 22:53:08 +00:00
|
|
|
"ParseFailure",
|
2023-07-25 22:52:34 +00:00
|
|
|
}[state]
|
|
|
|
}
|
|
|
|
|
2024-02-05 18:43:55 +00:00
|
|
|
// UseCustomContentWriter sets useCustomContentWriter option.
|
|
|
|
func UseCustomContentWriter() Option {
|
|
|
|
return func(parser *Parser) { parser.useCustomContentWriter = true }
|
2024-01-30 20:51:58 +00:00
|
|
|
}
|
|
|
|
|
2023-02-04 21:19:23 +00:00
|
|
|
// WithMaxDiffSize sets maxDiffSize option. Diffs larger than maxDiffSize will
|
|
|
|
// be truncated.
|
|
|
|
func WithMaxDiffSize(maxDiffSize int) Option {
|
|
|
|
return func(parser *Parser) {
|
|
|
|
parser.maxDiffSize = maxDiffSize
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
// WithMaxCommitSize sets maxCommitSize option. Commits larger than maxCommitSize
|
|
|
|
// will be put in the commit channel and additional diffs will be added to a
|
|
|
|
// new commit.
|
|
|
|
func WithMaxCommitSize(maxCommitSize int) Option {
|
|
|
|
return func(parser *Parser) {
|
|
|
|
parser.maxCommitSize = maxCommitSize
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
// Option is used for adding options to Config.
|
|
|
|
type Option func(*Parser)
|
|
|
|
|
|
|
|
// NewParser creates a GitParse config from options and sets defaults.
|
|
|
|
func NewParser(options ...Option) *Parser {
|
|
|
|
parser := &Parser{
|
|
|
|
dateFormat: defaultDateFormat,
|
|
|
|
maxDiffSize: defaultMaxDiffSize,
|
|
|
|
maxCommitSize: defaultMaxCommitSize,
|
|
|
|
}
|
|
|
|
for _, option := range options {
|
|
|
|
option(parser)
|
|
|
|
}
|
|
|
|
return parser
|
|
|
|
}
|
|
|
|
|
2022-08-23 20:29:20 +00:00
|
|
|
// RepoPath parses the output of the `git log` command for the `source` path.
|
2024-02-06 18:06:10 +00:00
|
|
|
// The Diff chan will return diffs in the order they are parsed from the log.
|
2024-08-01 00:30:51 +00:00
|
|
|
func (c *Parser) RepoPath(
|
|
|
|
ctx context.Context,
|
|
|
|
source string,
|
|
|
|
head string,
|
|
|
|
abbreviatedLog bool,
|
|
|
|
excludedGlobs []string,
|
|
|
|
isBare bool,
|
|
|
|
) (chan *Diff, error) {
|
2024-04-29 20:58:45 +00:00
|
|
|
args := []string{
|
|
|
|
"-C", source,
|
|
|
|
"log",
|
|
|
|
"--patch", // https://git-scm.com/docs/git-log#Documentation/git-log.txt---patch
|
|
|
|
"--full-history",
|
|
|
|
"--date=format:%a %b %d %H:%M:%S %Y %z",
|
|
|
|
"--pretty=fuller", // https://git-scm.com/docs/git-log#_pretty_formats
|
|
|
|
"--notes", // https://git-scm.com/docs/git-log#Documentation/git-log.txt---notesltrefgt
|
|
|
|
}
|
2023-01-26 17:17:54 +00:00
|
|
|
if abbreviatedLog {
|
|
|
|
args = append(args, "--diff-filter=AM")
|
|
|
|
}
|
2022-08-23 20:29:20 +00:00
|
|
|
if head != "" {
|
|
|
|
args = append(args, head)
|
|
|
|
} else {
|
|
|
|
args = append(args, "--all")
|
|
|
|
}
|
2023-03-28 15:46:03 +00:00
|
|
|
for _, glob := range excludedGlobs {
|
2024-08-01 00:30:51 +00:00
|
|
|
args = append(args, "--", ".", ":(exclude)"+glob)
|
2023-03-28 15:46:03 +00:00
|
|
|
}
|
2022-08-23 20:29:20 +00:00
|
|
|
|
|
|
|
cmd := exec.Command("git", args...)
|
|
|
|
absPath, err := filepath.Abs(source)
|
|
|
|
if err == nil {
|
2023-08-03 16:23:41 +00:00
|
|
|
if !isBare {
|
|
|
|
cmd.Env = append(cmd.Env, "GIT_DIR="+filepath.Join(absPath, ".git"))
|
|
|
|
} else {
|
|
|
|
cmd.Env = append(cmd.Env,
|
|
|
|
"GIT_DIR="+absPath,
|
|
|
|
)
|
|
|
|
// We need those variables to handle incoming commits
|
|
|
|
// while using trufflehog in pre-receive hooks
|
|
|
|
if dir := os.Getenv("GIT_OBJECT_DIRECTORY"); dir != "" {
|
|
|
|
cmd.Env = append(cmd.Env, "GIT_OBJECT_DIRECTORY="+dir)
|
|
|
|
}
|
|
|
|
if dir := os.Getenv("GIT_ALTERNATE_OBJECT_DIRECTORIES"); dir != "" {
|
|
|
|
cmd.Env = append(cmd.Env, "GIT_ALTERNATE_OBJECT_DIRECTORIES="+dir)
|
|
|
|
}
|
|
|
|
}
|
2022-08-23 20:29:20 +00:00
|
|
|
}
|
|
|
|
|
2023-07-25 22:52:34 +00:00
|
|
|
return c.executeCommand(ctx, cmd, false)
|
2022-09-04 01:01:36 +00:00
|
|
|
}
|
|
|
|
|
2023-06-29 20:33:30 +00:00
|
|
|
// Staged parses the output of the `git diff` command for the `source` path.
|
2024-02-06 18:06:10 +00:00
|
|
|
func (c *Parser) Staged(ctx context.Context, source string) (chan *Diff, error) {
|
2023-03-01 16:58:36 +00:00
|
|
|
// Provide the --cached flag to diff to get the diff of the staged changes.
|
2023-06-29 20:33:30 +00:00
|
|
|
args := []string{"-C", source, "diff", "-p", "--cached", "--full-history", "--diff-filter=AM", "--date=format:%a %b %d %H:%M:%S %Y %z"}
|
2022-09-04 01:01:36 +00:00
|
|
|
|
|
|
|
cmd := exec.Command("git", args...)
|
|
|
|
|
|
|
|
absPath, err := filepath.Abs(source)
|
|
|
|
if err == nil {
|
2024-08-01 00:30:51 +00:00
|
|
|
cmd.Env = append(cmd.Env, "GIT_DIR="+filepath.Join(absPath, ".git"))
|
2022-09-04 01:01:36 +00:00
|
|
|
}
|
|
|
|
|
2023-07-25 22:52:34 +00:00
|
|
|
return c.executeCommand(ctx, cmd, true)
|
2022-09-04 01:01:36 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
// executeCommand runs an exec.Cmd, reads stdout and stderr, and waits for the Cmd to complete.
|
2024-02-06 18:06:10 +00:00
|
|
|
func (c *Parser) executeCommand(ctx context.Context, cmd *exec.Cmd, isStaged bool) (chan *Diff, error) {
|
|
|
|
diffChan := make(chan *Diff, 64)
|
2022-09-04 01:01:36 +00:00
|
|
|
|
2022-08-23 20:29:20 +00:00
|
|
|
stdOut, err := cmd.StdoutPipe()
|
|
|
|
if err != nil {
|
2024-02-06 18:06:10 +00:00
|
|
|
return diffChan, err
|
2022-08-23 20:29:20 +00:00
|
|
|
}
|
|
|
|
stdErr, err := cmd.StderrPipe()
|
|
|
|
if err != nil {
|
2024-02-06 18:06:10 +00:00
|
|
|
return diffChan, err
|
2022-08-23 20:29:20 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
err = cmd.Start()
|
|
|
|
if err != nil {
|
2024-02-06 18:06:10 +00:00
|
|
|
return diffChan, err
|
2022-08-23 20:29:20 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
go func() {
|
|
|
|
scanner := bufio.NewScanner(stdErr)
|
|
|
|
for scanner.Scan() {
|
2023-02-14 23:00:07 +00:00
|
|
|
ctx.Logger().V(2).Info(scanner.Text())
|
2022-08-23 20:29:20 +00:00
|
|
|
}
|
|
|
|
}()
|
|
|
|
|
|
|
|
go func() {
|
2024-02-06 18:06:10 +00:00
|
|
|
c.FromReader(ctx, stdOut, diffChan, isStaged)
|
2024-02-05 23:28:49 +00:00
|
|
|
if err := stdOut.Close(); err != nil {
|
|
|
|
ctx.Logger().V(2).Info("Error closing git stdout pipe.", "error", err)
|
|
|
|
}
|
2022-09-02 18:02:38 +00:00
|
|
|
if err := cmd.Wait(); err != nil {
|
2023-02-14 23:00:07 +00:00
|
|
|
ctx.Logger().V(2).Info("Error waiting for git command to complete.", "error", err)
|
2022-09-02 18:02:38 +00:00
|
|
|
}
|
|
|
|
}()
|
|
|
|
|
2024-02-06 18:06:10 +00:00
|
|
|
return diffChan, nil
|
2022-09-02 18:02:38 +00:00
|
|
|
}
|
|
|
|
|
2024-02-06 18:06:10 +00:00
|
|
|
func (c *Parser) FromReader(ctx context.Context, stdOut io.Reader, diffChan chan *Diff, isStaged bool) {
|
2022-09-02 18:02:38 +00:00
|
|
|
outReader := bufio.NewReader(stdOut)
|
2023-05-02 23:36:39 +00:00
|
|
|
var (
|
2023-07-25 22:52:34 +00:00
|
|
|
currentCommit *Commit
|
|
|
|
|
|
|
|
totalLogSize int
|
2023-05-02 23:36:39 +00:00
|
|
|
)
|
2023-07-25 22:52:34 +00:00
|
|
|
var latestState = Initial
|
2024-02-05 18:43:55 +00:00
|
|
|
|
2024-02-06 18:06:10 +00:00
|
|
|
diff := func(c *Commit, opts ...diffOption) *Diff {
|
2024-04-25 15:27:15 +00:00
|
|
|
opts = append(opts, withCustomContentWriter(bufferwriter.New()))
|
|
|
|
return newDiff(c, opts...)
|
2024-02-05 18:43:55 +00:00
|
|
|
}
|
|
|
|
if c.useCustomContentWriter {
|
2024-02-06 18:06:10 +00:00
|
|
|
diff = func(c *Commit, opts ...diffOption) *Diff {
|
2024-04-25 15:27:15 +00:00
|
|
|
opts = append(opts, withCustomContentWriter(bufferedfilewriter.New()))
|
|
|
|
return newDiff(c, opts...)
|
2024-02-05 18:43:55 +00:00
|
|
|
}
|
|
|
|
}
|
2024-02-06 18:06:10 +00:00
|
|
|
currentDiff := diff(currentCommit)
|
2022-09-02 18:02:38 +00:00
|
|
|
|
2022-09-22 14:01:10 +00:00
|
|
|
defer common.RecoverWithExit(ctx)
|
2024-02-06 18:06:10 +00:00
|
|
|
defer close(diffChan)
|
2022-09-02 18:02:38 +00:00
|
|
|
for {
|
2023-02-07 23:25:00 +00:00
|
|
|
if common.IsDone(ctx) {
|
|
|
|
break
|
|
|
|
}
|
2023-07-25 22:52:34 +00:00
|
|
|
|
2022-09-02 18:02:38 +00:00
|
|
|
line, err := outReader.ReadBytes([]byte("\n")[0])
|
|
|
|
if err != nil && len(line) == 0 {
|
|
|
|
break
|
|
|
|
}
|
2023-07-25 22:52:34 +00:00
|
|
|
|
2022-09-02 18:02:38 +00:00
|
|
|
switch {
|
2023-07-25 22:52:34 +00:00
|
|
|
case isCommitLine(isStaged, latestState, line):
|
|
|
|
latestState = CommitLine
|
|
|
|
|
2022-09-02 18:02:38 +00:00
|
|
|
// If there is a currentDiff, add it to currentCommit.
|
2024-01-30 20:30:51 +00:00
|
|
|
if currentDiff.Len() > 0 || currentDiff.IsBinary {
|
|
|
|
if err := currentDiff.finalize(); err != nil {
|
|
|
|
ctx.Logger().Error(
|
|
|
|
err,
|
|
|
|
"failed to finalize diff",
|
|
|
|
"commit", currentCommit.Hash,
|
|
|
|
"diff", currentDiff.PathB,
|
|
|
|
"size", currentDiff.Len(),
|
|
|
|
"latest_state", latestState.String(),
|
|
|
|
)
|
|
|
|
}
|
2024-02-06 18:06:10 +00:00
|
|
|
diffChan <- currentDiff
|
2024-01-30 20:30:51 +00:00
|
|
|
currentCommit.Size += currentDiff.Len()
|
2024-02-13 15:21:22 +00:00
|
|
|
currentCommit.hasDiffs = true
|
2022-09-02 18:02:38 +00:00
|
|
|
}
|
|
|
|
// If there is a currentCommit, send it to the channel.
|
|
|
|
if currentCommit != nil {
|
2023-05-02 23:36:39 +00:00
|
|
|
totalLogSize += currentCommit.Size
|
2024-02-13 15:21:22 +00:00
|
|
|
if !currentCommit.hasDiffs {
|
|
|
|
// Initialize an empty Diff instance associated with the given commit.
|
|
|
|
// Since this diff represents "no changes", we only need to set the commit.
|
|
|
|
// This is required to ensure commits that have no diffs are still processed.
|
|
|
|
diffChan <- &Diff{Commit: currentCommit}
|
|
|
|
}
|
2022-09-02 18:02:38 +00:00
|
|
|
}
|
2024-02-13 15:21:22 +00:00
|
|
|
|
2022-09-02 18:02:38 +00:00
|
|
|
// Create a new currentDiff and currentCommit
|
2024-01-30 20:30:51 +00:00
|
|
|
currentCommit = &Commit{Message: strings.Builder{}}
|
2024-02-06 18:06:10 +00:00
|
|
|
currentDiff = diff(currentCommit)
|
2022-09-02 18:02:38 +00:00
|
|
|
// Check that the commit line contains a hash and set it.
|
|
|
|
if len(line) >= 47 {
|
|
|
|
currentCommit.Hash = string(line[7:47])
|
|
|
|
}
|
2023-07-27 14:24:49 +00:00
|
|
|
case isMergeLine(isStaged, latestState, line):
|
|
|
|
latestState = MergeLine
|
2023-07-25 22:52:34 +00:00
|
|
|
case isAuthorLine(isStaged, latestState, line):
|
|
|
|
latestState = AuthorLine
|
2024-04-29 20:58:45 +00:00
|
|
|
currentCommit.Author = strings.TrimSpace(string(line[8:]))
|
|
|
|
case isAuthorDateLine(isStaged, latestState, line):
|
|
|
|
latestState = AuthorDateLine
|
2023-07-25 22:52:34 +00:00
|
|
|
|
2024-04-29 20:58:45 +00:00
|
|
|
date, err := time.Parse(c.dateFormat, strings.TrimSpace(string(line[12:])))
|
2022-09-02 18:02:38 +00:00
|
|
|
if err != nil {
|
2024-04-29 20:58:45 +00:00
|
|
|
ctx.Logger().Error(err, "failed to parse commit date", "commit", currentCommit.Hash, "latestState", latestState.String())
|
|
|
|
latestState = ParseFailure
|
|
|
|
continue
|
2022-09-02 18:02:38 +00:00
|
|
|
}
|
|
|
|
currentCommit.Date = date
|
2024-04-29 20:58:45 +00:00
|
|
|
case isCommitterLine(isStaged, latestState, line):
|
|
|
|
latestState = CommitterLine
|
|
|
|
currentCommit.Committer = strings.TrimSpace(string(line[8:]))
|
|
|
|
case isCommitterDateLine(isStaged, latestState, line):
|
|
|
|
latestState = CommitterDateLine
|
|
|
|
// NoOp
|
2023-07-25 22:52:34 +00:00
|
|
|
case isMessageStartLine(isStaged, latestState, line):
|
|
|
|
latestState = MessageStartLine
|
|
|
|
// NoOp
|
|
|
|
case isMessageLine(isStaged, latestState, line):
|
|
|
|
latestState = MessageLine
|
2024-02-06 18:06:10 +00:00
|
|
|
currentCommit.Message.Write(line[4:]) // Messages are indented by 4 spaces.
|
2023-07-25 22:52:34 +00:00
|
|
|
|
|
|
|
case isMessageEndLine(isStaged, latestState, line):
|
|
|
|
latestState = MessageEndLine
|
|
|
|
// NoOp
|
2024-04-29 20:58:45 +00:00
|
|
|
case isNotesStartLine(isStaged, latestState, line):
|
|
|
|
latestState = NotesStartLine
|
|
|
|
|
|
|
|
currentCommit.Message.WriteString("\n")
|
|
|
|
currentCommit.Message.Write(line)
|
|
|
|
case isNotesLine(isStaged, latestState, line):
|
|
|
|
latestState = NotesLine
|
|
|
|
currentCommit.Message.Write(line[4:]) // Notes are indented by 4 spaces.
|
|
|
|
case isNotesEndLine(isStaged, latestState, line):
|
|
|
|
latestState = NotesEndLine
|
|
|
|
// NoOp
|
2023-07-25 22:52:34 +00:00
|
|
|
case isDiffLine(isStaged, latestState, line):
|
|
|
|
latestState = DiffLine
|
|
|
|
|
2024-01-30 20:30:51 +00:00
|
|
|
if currentDiff.Len() > 0 || currentDiff.IsBinary {
|
|
|
|
if err := currentDiff.finalize(); err != nil {
|
|
|
|
ctx.Logger().Error(err,
|
|
|
|
"failed to finalize diff",
|
|
|
|
"commit", currentCommit.Hash,
|
|
|
|
"diff", currentDiff.PathB,
|
|
|
|
"size", currentDiff.Len(),
|
|
|
|
"latest_state", latestState.String(),
|
|
|
|
)
|
|
|
|
}
|
2024-02-06 18:06:10 +00:00
|
|
|
diffChan <- currentDiff
|
2024-02-13 15:21:22 +00:00
|
|
|
currentCommit.hasDiffs = true
|
|
|
|
}
|
|
|
|
|
|
|
|
// This should never be nil, but check in case the stdin stream is messed up.
|
|
|
|
if currentCommit == nil {
|
|
|
|
currentCommit = &Commit{}
|
2022-09-02 18:02:38 +00:00
|
|
|
}
|
2024-02-06 18:06:10 +00:00
|
|
|
currentDiff = diff(currentCommit)
|
|
|
|
case isModeLine(latestState, line):
|
2023-07-25 22:52:34 +00:00
|
|
|
latestState = ModeLine
|
2022-09-02 18:02:38 +00:00
|
|
|
// NoOp
|
2024-02-06 18:06:10 +00:00
|
|
|
case isIndexLine(latestState, line):
|
2023-07-25 22:52:34 +00:00
|
|
|
latestState = IndexLine
|
2022-09-02 18:02:38 +00:00
|
|
|
// NoOp
|
2024-02-06 18:06:10 +00:00
|
|
|
case isBinaryLine(latestState, line):
|
2023-07-25 22:52:34 +00:00
|
|
|
latestState = BinaryFileLine
|
|
|
|
|
2024-03-22 15:35:10 +00:00
|
|
|
path, ok := pathFromBinaryLine(line)
|
2024-02-08 15:25:04 +00:00
|
|
|
if !ok {
|
|
|
|
err = fmt.Errorf(`expected line to match 'Binary files a/fileA and b/fileB differ', got "%s"`, line)
|
2024-03-19 15:50:27 +00:00
|
|
|
ctx.Logger().Error(err, "Failed to parse BinaryFileLine")
|
2024-02-08 15:25:04 +00:00
|
|
|
latestState = ParseFailure
|
|
|
|
continue
|
|
|
|
}
|
2023-10-26 17:07:02 +00:00
|
|
|
|
|
|
|
// Don't do anything if the file is deleted. (pathA has file path, pathB is /dev/null)
|
2024-02-08 15:25:04 +00:00
|
|
|
if path != "" {
|
|
|
|
currentDiff.PathB = path
|
2023-10-26 17:07:02 +00:00
|
|
|
currentDiff.IsBinary = true
|
|
|
|
}
|
2024-02-06 18:06:10 +00:00
|
|
|
case isFromFileLine(latestState, line):
|
2023-07-25 22:52:34 +00:00
|
|
|
latestState = FromFileLine
|
|
|
|
// NoOp
|
2024-02-06 18:06:10 +00:00
|
|
|
case isToFileLine(latestState, line):
|
2023-07-25 22:52:34 +00:00
|
|
|
latestState = ToFileLine
|
|
|
|
|
2024-03-22 15:35:10 +00:00
|
|
|
path, ok := pathFromToFileLine(line)
|
2024-03-19 15:50:27 +00:00
|
|
|
if !ok {
|
2024-03-22 15:35:10 +00:00
|
|
|
err = fmt.Errorf(`expected line to match format '+++ b/path/to/file.go', got '%s'`, line)
|
2024-03-19 15:50:27 +00:00
|
|
|
ctx.Logger().Error(err, "Failed to parse ToFileLine")
|
|
|
|
latestState = ParseFailure
|
|
|
|
continue
|
|
|
|
}
|
|
|
|
|
|
|
|
currentDiff.PathB = path
|
2024-02-06 18:06:10 +00:00
|
|
|
case isHunkLineNumberLine(latestState, line):
|
2023-07-25 22:52:34 +00:00
|
|
|
latestState = HunkLineNumberLine
|
|
|
|
|
2024-01-30 20:30:51 +00:00
|
|
|
if currentDiff.Len() > 0 || currentDiff.IsBinary {
|
|
|
|
if err := currentDiff.finalize(); err != nil {
|
|
|
|
ctx.Logger().Error(
|
|
|
|
err,
|
|
|
|
"failed to finalize diff",
|
|
|
|
"commit", currentCommit.Hash,
|
|
|
|
"diff", currentDiff.PathB,
|
|
|
|
"size", currentDiff.Len(),
|
|
|
|
"latest_state", latestState.String(),
|
|
|
|
)
|
|
|
|
}
|
2024-02-06 18:06:10 +00:00
|
|
|
diffChan <- currentDiff
|
2022-08-23 20:29:20 +00:00
|
|
|
}
|
2024-02-06 18:06:10 +00:00
|
|
|
currentDiff = diff(currentCommit, withPathB(currentDiff.PathB))
|
2022-08-23 20:29:20 +00:00
|
|
|
|
2022-09-02 18:02:38 +00:00
|
|
|
words := bytes.Split(line, []byte(" "))
|
|
|
|
if len(words) >= 3 {
|
|
|
|
startSlice := bytes.Split(words[2], []byte(","))
|
|
|
|
lineStart, err := strconv.Atoi(string(startSlice[0]))
|
|
|
|
if err == nil {
|
|
|
|
currentDiff.LineStart = lineStart
|
2022-08-23 20:29:20 +00:00
|
|
|
}
|
|
|
|
}
|
2024-02-06 18:06:10 +00:00
|
|
|
case isHunkContextLine(latestState, line):
|
2023-07-25 22:52:34 +00:00
|
|
|
if latestState != HunkContentLine {
|
|
|
|
latestState = HunkContentLine
|
|
|
|
}
|
|
|
|
// TODO: Why do we care about this? It creates empty lines in the diff. If there are no plusLines, it's just newlines.
|
2024-04-23 15:47:53 +00:00
|
|
|
if err := currentDiff.write([]byte("\n")); err != nil {
|
2024-01-30 20:30:51 +00:00
|
|
|
ctx.Logger().Error(err, "failed to write to diff")
|
|
|
|
}
|
2024-02-06 18:06:10 +00:00
|
|
|
case isHunkPlusLine(latestState, line):
|
2023-07-25 22:52:34 +00:00
|
|
|
if latestState != HunkContentLine {
|
|
|
|
latestState = HunkContentLine
|
|
|
|
}
|
|
|
|
|
2024-04-23 15:47:53 +00:00
|
|
|
if err := currentDiff.write(line[1:]); err != nil {
|
2024-01-30 20:30:51 +00:00
|
|
|
ctx.Logger().Error(err, "failed to write to diff")
|
2023-07-25 22:52:34 +00:00
|
|
|
}
|
|
|
|
// NoOp. We only care about additions.
|
2024-02-06 18:06:10 +00:00
|
|
|
case isHunkMinusLine(latestState, line),
|
|
|
|
isHunkNewlineWarningLine(latestState, line),
|
|
|
|
isHunkEmptyLine(latestState, line):
|
2023-07-25 22:52:34 +00:00
|
|
|
if latestState != HunkContentLine {
|
|
|
|
latestState = HunkContentLine
|
|
|
|
}
|
|
|
|
// NoOp
|
2024-02-06 18:06:10 +00:00
|
|
|
case isCommitSeparatorLine(latestState, line):
|
2023-07-25 22:52:34 +00:00
|
|
|
// NoOp
|
|
|
|
default:
|
|
|
|
// Skip ahead until we find the next diff or commit.
|
|
|
|
if latestState == ParseFailure {
|
|
|
|
continue
|
|
|
|
}
|
|
|
|
|
|
|
|
// Here be dragons...
|
|
|
|
// Build an informative error message.
|
2024-01-04 22:53:08 +00:00
|
|
|
err := fmt.Errorf(`invalid line "%s" after state "%s"`, line, latestState)
|
|
|
|
var logger logr.Logger
|
2023-07-25 22:52:34 +00:00
|
|
|
if currentCommit != nil && currentCommit.Hash != "" {
|
2024-01-04 22:53:08 +00:00
|
|
|
logger = ctx.Logger().WithValues("commit", currentCommit.Hash)
|
2023-07-25 22:52:34 +00:00
|
|
|
} else {
|
2024-01-04 22:53:08 +00:00
|
|
|
logger = ctx.Logger()
|
2023-07-25 22:52:34 +00:00
|
|
|
}
|
2024-01-04 22:53:08 +00:00
|
|
|
logger.Error(err, "failed to parse Git input. Recovering at the latest commit or diff...")
|
2023-07-25 22:52:34 +00:00
|
|
|
|
|
|
|
latestState = ParseFailure
|
2022-08-23 20:29:20 +00:00
|
|
|
}
|
2023-07-25 22:52:34 +00:00
|
|
|
|
2024-01-30 20:30:51 +00:00
|
|
|
if currentDiff.Len() > c.maxDiffSize {
|
2023-02-14 23:00:07 +00:00
|
|
|
ctx.Logger().V(2).Info(fmt.Sprintf(
|
|
|
|
"Diff for %s exceeded MaxDiffSize(%d)", currentDiff.PathB, c.maxDiffSize,
|
|
|
|
))
|
2023-01-23 18:14:10 +00:00
|
|
|
break
|
|
|
|
}
|
2022-09-02 18:02:38 +00:00
|
|
|
}
|
2024-02-06 18:06:10 +00:00
|
|
|
cleanupParse(ctx, currentCommit, currentDiff, diffChan, &totalLogSize)
|
2023-05-02 23:36:39 +00:00
|
|
|
|
|
|
|
ctx.Logger().V(2).Info("finished parsing git log.", "total_log_size", totalLogSize)
|
2023-02-07 23:25:00 +00:00
|
|
|
}
|
|
|
|
|
2023-07-27 14:24:49 +00:00
|
|
|
func isMergeLine(isStaged bool, latestState ParseState, line []byte) bool {
|
|
|
|
if isStaged || latestState != CommitLine {
|
|
|
|
return false
|
|
|
|
}
|
|
|
|
if len(line) > 6 && bytes.Equal(line[:6], []byte("Merge:")) {
|
|
|
|
return true
|
|
|
|
}
|
|
|
|
return false
|
|
|
|
}
|
|
|
|
|
2023-07-25 22:52:34 +00:00
|
|
|
// commit 7a95bbf0199e280a0e42dbb1d1a3f56cdd0f6e05
|
|
|
|
func isCommitLine(isStaged bool, latestState ParseState, line []byte) bool {
|
|
|
|
if isStaged || !(latestState == Initial ||
|
|
|
|
latestState == MessageStartLine ||
|
|
|
|
latestState == MessageEndLine ||
|
|
|
|
latestState == ModeLine ||
|
|
|
|
latestState == IndexLine ||
|
|
|
|
latestState == BinaryFileLine ||
|
|
|
|
latestState == ToFileLine ||
|
|
|
|
latestState == HunkContentLine ||
|
|
|
|
latestState == ParseFailure) {
|
|
|
|
return false
|
2022-09-02 18:02:38 +00:00
|
|
|
}
|
2023-07-25 22:52:34 +00:00
|
|
|
|
|
|
|
if len(line) > 7 && bytes.Equal(line[:7], []byte("commit ")) {
|
|
|
|
return true
|
2022-09-02 18:02:38 +00:00
|
|
|
}
|
2023-07-25 22:52:34 +00:00
|
|
|
return false
|
|
|
|
}
|
|
|
|
|
|
|
|
// Author: Bill Rich <bill.rich@trufflesec.com>
|
|
|
|
func isAuthorLine(isStaged bool, latestState ParseState, line []byte) bool {
|
2023-07-27 14:24:49 +00:00
|
|
|
if isStaged || !(latestState == CommitLine || latestState == MergeLine) {
|
2023-07-25 22:52:34 +00:00
|
|
|
return false
|
|
|
|
}
|
|
|
|
if len(line) > 8 && bytes.Equal(line[:7], []byte("Author:")) {
|
|
|
|
return true
|
|
|
|
}
|
|
|
|
return false
|
2022-08-23 20:29:20 +00:00
|
|
|
}
|
|
|
|
|
2024-04-29 20:58:45 +00:00
|
|
|
// AuthorDate: Tue Aug 10 15:20:40 2021 +0100
|
|
|
|
func isAuthorDateLine(isStaged bool, latestState ParseState, line []byte) bool {
|
2023-07-25 22:52:34 +00:00
|
|
|
if isStaged || latestState != AuthorLine {
|
|
|
|
return false
|
|
|
|
}
|
2024-04-29 20:58:45 +00:00
|
|
|
if len(line) > 10 && bytes.Equal(line[:11], []byte("AuthorDate:")) {
|
|
|
|
return true
|
|
|
|
}
|
|
|
|
return false
|
|
|
|
}
|
|
|
|
|
|
|
|
// Commit: Bill Rich <bill.rich@trufflesec.com>
|
|
|
|
func isCommitterLine(isStaged bool, latestState ParseState, line []byte) bool {
|
|
|
|
if isStaged || latestState != AuthorDateLine {
|
|
|
|
return false
|
|
|
|
}
|
|
|
|
if len(line) > 8 && bytes.Equal(line[:7], []byte("Commit:")) {
|
2022-08-23 20:29:20 +00:00
|
|
|
return true
|
|
|
|
}
|
|
|
|
return false
|
|
|
|
}
|
|
|
|
|
2024-04-29 20:58:45 +00:00
|
|
|
// CommitDate: Wed Apr 17 19:59:28 2024 -0400
|
|
|
|
func isCommitterDateLine(isStaged bool, latestState ParseState, line []byte) bool {
|
|
|
|
if isStaged || latestState != CommitterLine {
|
|
|
|
return false
|
|
|
|
}
|
|
|
|
if len(line) > 10 && bytes.Equal(line[:11], []byte("CommitDate:")) {
|
|
|
|
return true
|
|
|
|
}
|
|
|
|
return false
|
|
|
|
}
|
|
|
|
|
|
|
|
// Line directly after CommitterDate with only a newline.
|
2023-07-25 22:52:34 +00:00
|
|
|
func isMessageStartLine(isStaged bool, latestState ParseState, line []byte) bool {
|
2024-04-29 20:58:45 +00:00
|
|
|
if isStaged || latestState != CommitterDateLine {
|
2023-07-25 22:52:34 +00:00
|
|
|
return false
|
|
|
|
}
|
|
|
|
// TODO: Improve the implementation of this and isMessageEndLine
|
|
|
|
if len(strings.TrimRight(string(line[:]), "\r\n")) == 0 {
|
2022-08-23 20:29:20 +00:00
|
|
|
return true
|
|
|
|
}
|
|
|
|
return false
|
|
|
|
}
|
|
|
|
|
2023-07-25 22:52:34 +00:00
|
|
|
// Line that starts with 4 spaces
|
|
|
|
func isMessageLine(isStaged bool, latestState ParseState, line []byte) bool {
|
|
|
|
if isStaged || !(latestState == MessageStartLine || latestState == MessageLine) {
|
|
|
|
return false
|
|
|
|
}
|
|
|
|
if len(line) > 4 && bytes.Equal(line[:4], []byte(" ")) {
|
|
|
|
return true
|
|
|
|
}
|
|
|
|
return false
|
|
|
|
}
|
|
|
|
|
|
|
|
// Line directly after MessageLine with only a newline.
|
|
|
|
func isMessageEndLine(isStaged bool, latestState ParseState, line []byte) bool {
|
|
|
|
if isStaged || latestState != MessageLine {
|
|
|
|
return false
|
|
|
|
}
|
|
|
|
if len(strings.TrimRight(string(line[:]), "\r\n")) == 0 {
|
2022-08-23 20:29:20 +00:00
|
|
|
return true
|
|
|
|
}
|
|
|
|
return false
|
|
|
|
}
|
|
|
|
|
2024-04-29 20:58:45 +00:00
|
|
|
// `Notes:` or `Notes (context):`
|
|
|
|
// See https://tylercipriani.com/blog/2022/11/19/git-notes-gits-coolest-most-unloved-feature/
|
|
|
|
func isNotesStartLine(isStaged bool, latestState ParseState, line []byte) bool {
|
|
|
|
if isStaged || latestState != MessageEndLine {
|
|
|
|
return false
|
|
|
|
}
|
|
|
|
if len(line) > 5 && bytes.Equal(line[:5], []byte("Notes")) {
|
|
|
|
return true
|
|
|
|
}
|
|
|
|
return false
|
|
|
|
}
|
|
|
|
|
|
|
|
// Line after NotesStartLine that starts with 4 spaces
|
|
|
|
func isNotesLine(isStaged bool, latestState ParseState, line []byte) bool {
|
|
|
|
if isStaged || !(latestState == NotesStartLine || latestState == NotesLine) {
|
|
|
|
return false
|
|
|
|
}
|
|
|
|
if len(line) > 4 && bytes.Equal(line[:4], []byte(" ")) {
|
|
|
|
return true
|
|
|
|
}
|
|
|
|
return false
|
|
|
|
}
|
|
|
|
|
|
|
|
// Line directly after NotesLine with only a newline.
|
|
|
|
func isNotesEndLine(isStaged bool, latestState ParseState, line []byte) bool {
|
|
|
|
if isStaged || latestState != NotesLine {
|
|
|
|
return false
|
|
|
|
}
|
|
|
|
if len(strings.TrimRight(string(line[:]), "\r\n")) == 0 {
|
|
|
|
return true
|
|
|
|
}
|
|
|
|
return false
|
|
|
|
}
|
|
|
|
|
2022-08-23 20:29:20 +00:00
|
|
|
// diff --git a/internal/addrs/move_endpoint_module.go b/internal/addrs/move_endpoint_module.go
|
2023-07-25 22:52:34 +00:00
|
|
|
func isDiffLine(isStaged bool, latestState ParseState, line []byte) bool {
|
|
|
|
if !(latestState == MessageStartLine || // Empty commit messages can go from MessageStart->Diff
|
|
|
|
latestState == MessageEndLine ||
|
2024-04-29 20:58:45 +00:00
|
|
|
latestState == NotesEndLine ||
|
2023-07-25 22:52:34 +00:00
|
|
|
latestState == BinaryFileLine ||
|
2024-04-29 20:58:45 +00:00
|
|
|
latestState == ModeLine ||
|
2023-07-25 22:52:34 +00:00
|
|
|
latestState == IndexLine ||
|
|
|
|
latestState == HunkContentLine ||
|
|
|
|
latestState == ParseFailure) {
|
2024-04-29 20:58:45 +00:00
|
|
|
if !(isStaged && latestState == Initial) {
|
2023-07-25 22:52:34 +00:00
|
|
|
return false
|
|
|
|
}
|
|
|
|
}
|
|
|
|
if len(line) > 11 && bytes.Equal(line[:11], []byte("diff --git ")) {
|
|
|
|
return true
|
|
|
|
}
|
|
|
|
return false
|
|
|
|
}
|
|
|
|
|
|
|
|
// old mode 100644
|
|
|
|
// new mode 100755
|
|
|
|
// new file mode 100644
|
|
|
|
// similarity index 100%
|
|
|
|
// rename from old.txt
|
|
|
|
// rename to new.txt
|
|
|
|
// deleted file mode 100644
|
2024-02-06 18:06:10 +00:00
|
|
|
func isModeLine(latestState ParseState, line []byte) bool {
|
2023-07-25 22:52:34 +00:00
|
|
|
if !(latestState == DiffLine || latestState == ModeLine) {
|
|
|
|
return false
|
|
|
|
}
|
|
|
|
// This could probably be better written.
|
|
|
|
if (len(line) > 17 && bytes.Equal(line[:17], []byte("deleted file mode"))) ||
|
|
|
|
(len(line) > 16 && bytes.Equal(line[:16], []byte("similarity index"))) ||
|
|
|
|
(len(line) > 13 && bytes.Equal(line[:13], []byte("new file mode"))) ||
|
|
|
|
(len(line) > 11 && bytes.Equal(line[:11], []byte("rename from"))) ||
|
|
|
|
(len(line) > 9 && bytes.Equal(line[:9], []byte("rename to"))) ||
|
|
|
|
(len(line) > 8 && bytes.Equal(line[:8], []byte("old mode"))) ||
|
|
|
|
(len(line) > 8 && bytes.Equal(line[:8], []byte("new mode"))) {
|
2022-08-23 20:29:20 +00:00
|
|
|
return true
|
|
|
|
}
|
|
|
|
return false
|
|
|
|
}
|
|
|
|
|
|
|
|
// index 1ed6fbee1..aea1e643a 100644
|
2023-07-25 22:52:34 +00:00
|
|
|
// index 00000000..e69de29b
|
2024-02-06 18:06:10 +00:00
|
|
|
func isIndexLine(latestState ParseState, line []byte) bool {
|
2023-07-25 22:52:34 +00:00
|
|
|
if !(latestState == DiffLine || latestState == ModeLine) {
|
|
|
|
return false
|
|
|
|
}
|
|
|
|
if len(line) > 6 && bytes.Equal(line[:6], []byte("index ")) {
|
2022-08-23 20:29:20 +00:00
|
|
|
return true
|
|
|
|
}
|
|
|
|
return false
|
|
|
|
}
|
|
|
|
|
2023-07-25 22:52:34 +00:00
|
|
|
// Binary files /dev/null and b/plugin.sig differ
|
2024-02-06 18:06:10 +00:00
|
|
|
func isBinaryLine(latestState ParseState, line []byte) bool {
|
2023-07-25 22:52:34 +00:00
|
|
|
if latestState != IndexLine {
|
|
|
|
return false
|
|
|
|
}
|
|
|
|
if len(line) > 7 && bytes.Equal(line[:6], []byte("Binary")) {
|
2022-08-23 20:29:20 +00:00
|
|
|
return true
|
|
|
|
}
|
|
|
|
return false
|
|
|
|
}
|
|
|
|
|
2023-07-25 22:52:34 +00:00
|
|
|
// Get the b/ file path. Ignoring the edge case of files having `and /b` in the name for simplicity.
|
2024-03-22 15:35:10 +00:00
|
|
|
func pathFromBinaryLine(line []byte) (string, bool) {
|
2024-02-08 15:25:04 +00:00
|
|
|
if bytes.Contains(line, []byte("and /dev/null")) {
|
|
|
|
return "", true
|
|
|
|
}
|
|
|
|
|
2024-03-22 15:35:10 +00:00
|
|
|
var (
|
|
|
|
path string
|
|
|
|
err error
|
|
|
|
)
|
2024-03-19 15:50:27 +00:00
|
|
|
if _, after, ok := bytes.Cut(line, []byte(" and b/")); ok {
|
2024-02-08 15:25:04 +00:00
|
|
|
// drop the " differ\n"
|
2024-03-19 15:50:27 +00:00
|
|
|
path = string(after[:len(after)-8])
|
|
|
|
} else if _, after, ok = bytes.Cut(line, []byte(` and "b/`)); ok {
|
|
|
|
// Edge case where the path is quoted.
|
|
|
|
// https://github.com/trufflesecurity/trufflehog/issues/2384
|
2024-02-08 15:25:04 +00:00
|
|
|
|
2024-03-22 15:35:10 +00:00
|
|
|
// Drop the `" differ\n` and handle escaped characters in the path.
|
|
|
|
// e.g., "\342\200\224" instead of "—".
|
|
|
|
// See https://github.com/trufflesecurity/trufflehog/issues/2418
|
|
|
|
path, err = strconv.Unquote(`"` + string(after[:len(after)-9]) + `"`)
|
|
|
|
if err != nil {
|
|
|
|
return "", false
|
|
|
|
}
|
2024-03-19 15:50:27 +00:00
|
|
|
} else {
|
|
|
|
// Unknown format.
|
|
|
|
return "", false
|
|
|
|
}
|
|
|
|
|
2024-03-22 15:35:10 +00:00
|
|
|
return path, true
|
2023-07-25 22:52:34 +00:00
|
|
|
}
|
|
|
|
|
2022-08-23 20:29:20 +00:00
|
|
|
// --- a/internal/addrs/move_endpoint_module.go
|
2024-01-04 22:53:08 +00:00
|
|
|
// --- /dev/null
|
2024-02-06 18:06:10 +00:00
|
|
|
func isFromFileLine(latestState ParseState, line []byte) bool {
|
2024-01-04 22:53:08 +00:00
|
|
|
if !(latestState == IndexLine || latestState == ModeLine) {
|
2023-07-25 22:52:34 +00:00
|
|
|
return false
|
|
|
|
}
|
|
|
|
if len(line) >= 6 && bytes.Equal(line[:4], []byte("--- ")) {
|
2022-08-23 20:29:20 +00:00
|
|
|
return true
|
|
|
|
}
|
|
|
|
return false
|
|
|
|
}
|
|
|
|
|
|
|
|
// +++ b/internal/addrs/move_endpoint_module.go
|
2024-02-06 18:06:10 +00:00
|
|
|
func isToFileLine(latestState ParseState, line []byte) bool {
|
2023-07-25 22:52:34 +00:00
|
|
|
if latestState != FromFileLine {
|
|
|
|
return false
|
|
|
|
}
|
2023-06-08 23:29:11 +00:00
|
|
|
if len(line) >= 6 && bytes.Equal(line[:4], []byte("+++ ")) {
|
2022-08-23 20:29:20 +00:00
|
|
|
return true
|
|
|
|
}
|
|
|
|
return false
|
|
|
|
}
|
|
|
|
|
2024-03-19 15:50:27 +00:00
|
|
|
// Get the b/ file path.
|
2024-03-22 15:35:10 +00:00
|
|
|
func pathFromToFileLine(line []byte) (string, bool) {
|
2024-03-19 15:50:27 +00:00
|
|
|
// Normalize paths, as they can end in `\n`, `\t\n`, etc.
|
|
|
|
// See https://github.com/trufflesecurity/trufflehog/issues/1060
|
|
|
|
line = bytes.TrimSpace(line)
|
|
|
|
|
|
|
|
// File was deleted.
|
|
|
|
if bytes.Equal(line, []byte("+++ /dev/null")) {
|
|
|
|
return "", true
|
|
|
|
}
|
|
|
|
|
2024-03-22 15:35:10 +00:00
|
|
|
var (
|
|
|
|
path string
|
|
|
|
err error
|
|
|
|
)
|
2024-03-19 15:50:27 +00:00
|
|
|
if _, after, ok := bytes.Cut(line, []byte("+++ b/")); ok {
|
|
|
|
path = string(after)
|
|
|
|
} else if _, after, ok = bytes.Cut(line, []byte(`+++ "b/`)); ok {
|
|
|
|
// Edge case where the path is quoted.
|
|
|
|
// e.g., `+++ "b/C++/1 \320\243\321\200\320\276\320\272/B.c"`
|
|
|
|
|
2024-03-22 15:35:10 +00:00
|
|
|
// Drop the trailing `"` and handle escaped characters in the path
|
|
|
|
// e.g., "\342\200\224" instead of "—".
|
|
|
|
// See https://github.com/trufflesecurity/trufflehog/issues/2418
|
|
|
|
path, err = strconv.Unquote(`"` + string(after[:len(after)-1]) + `"`)
|
|
|
|
if err != nil {
|
|
|
|
return "", false
|
|
|
|
}
|
2024-03-19 15:50:27 +00:00
|
|
|
} else {
|
|
|
|
// Unknown format.
|
|
|
|
return "", false
|
|
|
|
}
|
|
|
|
|
2024-03-22 15:35:10 +00:00
|
|
|
return path, true
|
2024-03-19 15:50:27 +00:00
|
|
|
}
|
|
|
|
|
2023-07-25 22:52:34 +00:00
|
|
|
// @@ -298 +298 @@ func maxRetryErrorHandler(resp *http.Response, err error, numTries int)
|
2024-02-06 18:06:10 +00:00
|
|
|
func isHunkLineNumberLine(latestState ParseState, line []byte) bool {
|
2023-07-25 22:52:34 +00:00
|
|
|
if !(latestState == ToFileLine || latestState == HunkContentLine) {
|
|
|
|
return false
|
|
|
|
}
|
|
|
|
if len(line) >= 8 && bytes.Equal(line[:2], []byte("@@")) {
|
2022-08-23 20:29:20 +00:00
|
|
|
return true
|
|
|
|
}
|
|
|
|
return false
|
|
|
|
}
|
|
|
|
|
2023-07-25 22:52:34 +00:00
|
|
|
// fmt.Println("ok")
|
|
|
|
// (There's a space before `fmt` that gets removed by the formatter.)
|
2024-02-06 18:06:10 +00:00
|
|
|
func isHunkContextLine(latestState ParseState, line []byte) bool {
|
2023-07-25 22:52:34 +00:00
|
|
|
if !(latestState == HunkLineNumberLine || latestState == HunkContentLine) {
|
|
|
|
return false
|
|
|
|
}
|
|
|
|
if len(line) >= 1 && bytes.Equal(line[:1], []byte(" ")) {
|
2022-08-23 20:29:20 +00:00
|
|
|
return true
|
|
|
|
}
|
|
|
|
return false
|
|
|
|
}
|
|
|
|
|
2023-07-25 22:52:34 +00:00
|
|
|
// +fmt.Println("ok")
|
2024-02-06 18:06:10 +00:00
|
|
|
func isHunkPlusLine(latestState ParseState, line []byte) bool {
|
2023-07-25 22:52:34 +00:00
|
|
|
if !(latestState == HunkLineNumberLine || latestState == HunkContentLine) {
|
|
|
|
return false
|
|
|
|
}
|
|
|
|
if len(line) >= 1 && bytes.Equal(line[:1], []byte("+")) {
|
2022-09-09 22:00:33 +00:00
|
|
|
return true
|
|
|
|
}
|
|
|
|
return false
|
|
|
|
}
|
|
|
|
|
2023-07-25 22:52:34 +00:00
|
|
|
// -fmt.Println("ok")
|
2024-02-06 18:06:10 +00:00
|
|
|
func isHunkMinusLine(latestState ParseState, line []byte) bool {
|
2023-07-25 22:52:34 +00:00
|
|
|
if !(latestState == HunkLineNumberLine || latestState == HunkContentLine) {
|
|
|
|
return false
|
|
|
|
}
|
|
|
|
if len(line) >= 1 && bytes.Equal(line[:1], []byte("-")) {
|
2022-08-23 20:29:20 +00:00
|
|
|
return true
|
|
|
|
}
|
|
|
|
return false
|
|
|
|
}
|
|
|
|
|
2023-07-25 22:52:34 +00:00
|
|
|
// \ No newline at end of file
|
2024-02-06 18:06:10 +00:00
|
|
|
func isHunkNewlineWarningLine(latestState ParseState, line []byte) bool {
|
2023-07-25 22:52:34 +00:00
|
|
|
if latestState != HunkContentLine {
|
|
|
|
return false
|
|
|
|
}
|
|
|
|
if len(line) >= 27 && bytes.Equal(line[:27], []byte("\\ No newline at end of file")) {
|
2022-08-23 20:29:20 +00:00
|
|
|
return true
|
|
|
|
}
|
|
|
|
return false
|
|
|
|
}
|
|
|
|
|
2023-07-25 22:52:34 +00:00
|
|
|
// Newline after hunk, or an empty line, e.g.
|
|
|
|
// +}
|
|
|
|
//
|
|
|
|
// commit 00920984e3435057f09cee5468850f7546dfa637 (tag: v3.42.0)
|
2024-02-06 18:06:10 +00:00
|
|
|
func isHunkEmptyLine(latestState ParseState, line []byte) bool {
|
2023-07-25 22:52:34 +00:00
|
|
|
if !(latestState == HunkLineNumberLine || latestState == HunkContentLine) {
|
|
|
|
return false
|
|
|
|
}
|
|
|
|
// TODO: Can this also be `\n\r`?
|
|
|
|
if len(line) == 1 && bytes.Equal(line[:1], []byte("\n")) {
|
2022-08-23 20:29:20 +00:00
|
|
|
return true
|
|
|
|
}
|
|
|
|
return false
|
|
|
|
}
|
|
|
|
|
2024-02-06 18:06:10 +00:00
|
|
|
func isCommitSeparatorLine(latestState ParseState, line []byte) bool {
|
2023-07-25 22:52:34 +00:00
|
|
|
if (latestState == ModeLine || latestState == IndexLine || latestState == BinaryFileLine || latestState == ToFileLine) &&
|
|
|
|
len(line) == 1 && bytes.Equal(line[:1], []byte("\n")) {
|
|
|
|
return true
|
|
|
|
}
|
|
|
|
return false
|
|
|
|
}
|
|
|
|
|
2024-02-06 18:06:10 +00:00
|
|
|
func cleanupParse(ctx context.Context, currentCommit *Commit, currentDiff *Diff, diffChan chan *Diff, totalLogSize *int) {
|
2024-01-30 20:30:51 +00:00
|
|
|
if err := currentDiff.finalize(); err != nil {
|
|
|
|
ctx.Logger().Error(err, "failed to finalize diff")
|
|
|
|
return
|
|
|
|
}
|
2024-02-06 18:06:10 +00:00
|
|
|
|
2023-07-25 22:52:34 +00:00
|
|
|
// Ignore empty or binary diffs (this condition may be redundant).
|
2024-01-30 20:30:51 +00:00
|
|
|
if currentDiff != nil && (currentDiff.Len() > 0 || currentDiff.IsBinary) {
|
2024-02-06 18:06:10 +00:00
|
|
|
currentDiff.Commit = currentCommit
|
|
|
|
diffChan <- currentDiff
|
2023-07-25 22:52:34 +00:00
|
|
|
}
|
|
|
|
if currentCommit != nil {
|
|
|
|
if totalLogSize != nil {
|
|
|
|
*totalLogSize += currentCommit.Size
|
|
|
|
}
|
2022-08-23 20:29:20 +00:00
|
|
|
}
|
|
|
|
}
|