Update GitParse to handle quoted binary filenames (#2391)

* fix(gitparse): quoted binary files

* fix(gitparse): use bytes.Cut instead of regexp

* fix lint warning

---------

Co-authored-by: Zachary Rice <zachary.rice@trufflesec.com>
This commit is contained in:
Richard Gomez 2024-02-08 10:25:04 -05:00 committed by GitHub
parent a00ffe9522
commit 3b40c4fa63
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
2 changed files with 37 additions and 13 deletions

View file

@ -458,10 +458,17 @@ func (c *Parser) FromReader(ctx context.Context, stdOut io.Reader, diffChan chan
case isBinaryLine(latestState, line):
latestState = BinaryFileLine
currentDiff.PathB = pathFromBinaryLine(line)
path, ok := pathFromBinaryLine(line)
if !ok {
err = fmt.Errorf(`expected line to match 'Binary files a/fileA and b/fileB differ', got "%s"`, line)
ctx.Logger().Error(err, "Failed to parse binary file line")
latestState = ParseFailure
continue
}
// Don't do anything if the file is deleted. (pathA has file path, pathB is /dev/null)
if currentDiff.PathB != "" {
if path != "" {
currentDiff.PathB = path
currentDiff.IsBinary = true
}
case isFromFileLine(latestState, line):
@ -708,15 +715,27 @@ func isBinaryLine(latestState ParseState, line []byte) bool {
}
// Get the b/ file path. Ignoring the edge case of files having `and /b` in the name for simplicity.
func pathFromBinaryLine(line []byte) string {
logger := context.Background().Logger()
sbytes := bytes.Split(line, []byte(" and b/"))
if len(sbytes) != 2 {
logger.V(2).Info("Expected binary line to be in 'Binary files a/fileA and b/fileB differ' format.", "got", line)
return ""
func pathFromBinaryLine(line []byte) (string, bool) {
if bytes.Contains(line, []byte("and /dev/null")) {
return "", true
}
bRaw := sbytes[1]
return string(bRaw[:len(bRaw)-8]) // drop the "b/" and " differ\n"
_, after, ok := bytes.Cut(line, []byte(" and b/"))
if ok {
// drop the " differ\n"
return string(after[:len(after)-8]), true
}
// Edge case where the path is quoted.
// https://github.com/trufflesecurity/trufflehog/issues/2384
_, after, ok = bytes.Cut(line, []byte(` and "b/`))
if ok {
// drop the `" differ\n`
return string(after[:len(after)-9]), true
}
// Unknown format.
return "", false
}
// --- a/internal/addrs/move_endpoint_module.go

View file

@ -591,12 +591,17 @@ func TestLineChecksNoStaged(t *testing.T) {
func TestBinaryPathParse(t *testing.T) {
cases := map[string]string{
"Binary files /dev/null and b/plugin.sig differ\n": "plugin.sig",
"Binary files /dev/null and b/ Lunch and Learn - HCDiag.pdf differ\n": " Lunch and Learn - HCDiag.pdf",
"Binary files a/trufflehog_3.42.0_linux_arm64.tar.gz and /dev/null differ\n": "",
"Binary files /dev/null and b/plugin.sig differ\n": "plugin.sig",
"Binary files /dev/null and b/ Lunch and Learn - HCDiag.pdf differ\n": " Lunch and Learn - HCDiag.pdf",
"Binary files /dev/null and \"b/assets/retailers/ON-ikony-Platforma-ecom \\342\\200\\224 kopia.png\" differ\n": "assets/retailers/ON-ikony-Platforma-ecom \\342\\200\\224 kopia.png",
}
for name, expected := range cases {
filename := pathFromBinaryLine([]byte(name))
filename, ok := pathFromBinaryLine([]byte(name))
if !ok {
t.Errorf("Failed to get path: %s", name)
}
if filename != expected {
t.Errorf("Expected: %s, Got: %s", expected, filename)
}