mirror of
https://github.com/trufflesecurity/trufflehog.git
synced 2024-11-10 07:04:24 +00:00
fix(git): decode unicode paths (#2585)
This commit is contained in:
parent
7e164d4a84
commit
aa862e46bb
2 changed files with 128 additions and 22 deletions
|
@ -424,10 +424,10 @@ func (c *Parser) FromReader(ctx context.Context, stdOut io.Reader, diffChan chan
|
|||
case isBinaryLine(latestState, line):
|
||||
latestState = BinaryFileLine
|
||||
|
||||
path, ok := pathFromBinaryLine(line)
|
||||
path, ok := pathFromBinaryLine(ctx, line)
|
||||
if !ok {
|
||||
err = fmt.Errorf(`expected line to match 'Binary files a/fileA and b/fileB differ', got "%s"`, line)
|
||||
ctx.Logger().Error(err, "Failed to parse binary file line")
|
||||
ctx.Logger().Error(err, "Failed to parse BinaryFileLine")
|
||||
latestState = ParseFailure
|
||||
continue
|
||||
}
|
||||
|
@ -443,8 +443,15 @@ func (c *Parser) FromReader(ctx context.Context, stdOut io.Reader, diffChan chan
|
|||
case isToFileLine(latestState, line):
|
||||
latestState = ToFileLine
|
||||
|
||||
// TODO: Is this fix still required?
|
||||
currentDiff.PathB = strings.TrimRight(strings.TrimRight(string(line[6:]), "\n"), "\t") // Trim the newline and tab characters. https://github.com/trufflesecurity/trufflehog/issues/1060
|
||||
path, ok := pathFromToFileLine(ctx, line)
|
||||
if !ok {
|
||||
err = fmt.Errorf(`expected line to match format '+++ b/path/to/file.go', got "%s"`, line)
|
||||
ctx.Logger().Error(err, "Failed to parse ToFileLine")
|
||||
latestState = ParseFailure
|
||||
continue
|
||||
}
|
||||
|
||||
currentDiff.PathB = path
|
||||
case isHunkLineNumberLine(latestState, line):
|
||||
latestState = HunkLineNumberLine
|
||||
|
||||
|
@ -681,27 +688,35 @@ func isBinaryLine(latestState ParseState, line []byte) bool {
|
|||
}
|
||||
|
||||
// Get the b/ file path. Ignoring the edge case of files having `and /b` in the name for simplicity.
|
||||
func pathFromBinaryLine(line []byte) (string, bool) {
|
||||
func pathFromBinaryLine(ctx context.Context, line []byte) (string, bool) {
|
||||
if bytes.Contains(line, []byte("and /dev/null")) {
|
||||
return "", true
|
||||
}
|
||||
|
||||
_, after, ok := bytes.Cut(line, []byte(" and b/"))
|
||||
if ok {
|
||||
var path string
|
||||
if _, after, ok := bytes.Cut(line, []byte(" and b/")); ok {
|
||||
// drop the " differ\n"
|
||||
return string(after[:len(after)-8]), true
|
||||
}
|
||||
|
||||
path = string(after[:len(after)-8])
|
||||
} else if _, after, ok = bytes.Cut(line, []byte(` and "b/`)); ok {
|
||||
// Edge case where the path is quoted.
|
||||
// https://github.com/trufflesecurity/trufflehog/issues/2384
|
||||
_, after, ok = bytes.Cut(line, []byte(` and "b/`))
|
||||
if ok {
|
||||
// drop the `" differ\n`
|
||||
return string(after[:len(after)-9]), true
|
||||
}
|
||||
|
||||
// drop the `" differ\n`
|
||||
path = string(after[:len(after)-9])
|
||||
} else {
|
||||
// Unknown format.
|
||||
return "", false
|
||||
}
|
||||
|
||||
// Handle escaped characters in the path, such as "\342\200\224" instead of "—".
|
||||
// See https://github.com/trufflesecurity/trufflehog/issues/2418
|
||||
unicodePath, err := strconv.Unquote(`"` + path + `"`)
|
||||
if err != nil {
|
||||
ctx.Logger().Error(err, "failed to decode path", "path", path)
|
||||
return path, true
|
||||
}
|
||||
|
||||
return unicodePath, true
|
||||
}
|
||||
|
||||
// --- a/internal/addrs/move_endpoint_module.go
|
||||
|
@ -727,6 +742,42 @@ func isToFileLine(latestState ParseState, line []byte) bool {
|
|||
return false
|
||||
}
|
||||
|
||||
// Get the b/ file path.
|
||||
func pathFromToFileLine(ctx context.Context, line []byte) (string, bool) {
|
||||
// Normalize paths, as they can end in `\n`, `\t\n`, etc.
|
||||
// See https://github.com/trufflesecurity/trufflehog/issues/1060
|
||||
line = bytes.TrimSpace(line)
|
||||
|
||||
// File was deleted.
|
||||
if bytes.Equal(line, []byte("+++ /dev/null")) {
|
||||
return "", true
|
||||
}
|
||||
|
||||
var path string
|
||||
if _, after, ok := bytes.Cut(line, []byte("+++ b/")); ok {
|
||||
path = string(after)
|
||||
} else if _, after, ok = bytes.Cut(line, []byte(`+++ "b/`)); ok {
|
||||
// Edge case where the path is quoted.
|
||||
// e.g., `+++ "b/C++/1 \320\243\321\200\320\276\320\272/B.c"`
|
||||
|
||||
// drop the trailing `"`
|
||||
path = string(after[:len(after)-1])
|
||||
} else {
|
||||
// Unknown format.
|
||||
return "", false
|
||||
}
|
||||
|
||||
// Handle escaped characters in the path, such as "\342\200\224" instead of "—".
|
||||
// See https://github.com/trufflesecurity/trufflehog/issues/2418
|
||||
unicodePath, err := strconv.Unquote(`"` + path + `"`)
|
||||
if err != nil {
|
||||
ctx.Logger().Error(err, "failed to decode path", "path", path)
|
||||
return path, true
|
||||
}
|
||||
|
||||
return unicodePath, true
|
||||
}
|
||||
|
||||
// @@ -298 +298 @@ func maxRetryErrorHandler(resp *http.Response, err error, numTries int)
|
||||
func isHunkLineNumberLine(latestState ParseState, line []byte) bool {
|
||||
if !(latestState == ToFileLine || latestState == HunkContentLine) {
|
||||
|
|
|
@ -589,15 +589,36 @@ func TestLineChecksNoStaged(t *testing.T) {
|
|||
}
|
||||
|
||||
func TestBinaryPathParse(t *testing.T) {
|
||||
ctx := context.Background()
|
||||
cases := map[string]string{
|
||||
"Binary files a/trufflehog_3.42.0_linux_arm64.tar.gz and /dev/null differ\n": "",
|
||||
"Binary files /dev/null and b/plugin.sig differ\n": "plugin.sig",
|
||||
"Binary files /dev/null and b/ Lunch and Learn - HCDiag.pdf differ\n": " Lunch and Learn - HCDiag.pdf",
|
||||
"Binary files /dev/null and \"b/assets/retailers/ON-ikony-Platforma-ecom \\342\\200\\224 kopia.png\" differ\n": "assets/retailers/ON-ikony-Platforma-ecom \\342\\200\\224 kopia.png",
|
||||
"Binary files /dev/null and \"b/assets/retailers/ON-ikony-Platforma-ecom \\342\\200\\224 kopia.png\" differ\n": "assets/retailers/ON-ikony-Platforma-ecom — kopia.png",
|
||||
"Binary files /dev/null and \"b/\\346\\267\\261\\345\\272\\246\\345\\255\\246\\344\\271\\240500\\351\\227\\256-Tan-00\\347\\233\\256\\345\\275\\225.docx\" differ\n": "深度学习500问-Tan-00目录.docx",
|
||||
}
|
||||
|
||||
for name, expected := range cases {
|
||||
filename, ok := pathFromBinaryLine([]byte(name))
|
||||
filename, ok := pathFromBinaryLine(ctx, []byte(name))
|
||||
if !ok {
|
||||
t.Errorf("Failed to get path: %s", name)
|
||||
}
|
||||
if filename != expected {
|
||||
t.Errorf("Expected: %s, Got: %s", expected, filename)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func TestToFileLinePathParse(t *testing.T) {
|
||||
ctx := context.Background()
|
||||
cases := map[string]string{
|
||||
"+++ /dev/null\n": "",
|
||||
"+++ b/embeds.xml\t\n": "embeds.xml",
|
||||
"+++ \"b/C++/1 \\320\\243\\321\\200\\320\\276\\320\\272/B.c\"\t\n": "C++/1 Урок/B.c",
|
||||
}
|
||||
|
||||
for name, expected := range cases {
|
||||
filename, ok := pathFromToFileLine(ctx, []byte(name))
|
||||
if !ok {
|
||||
t.Errorf("Failed to get path: %s", name)
|
||||
}
|
||||
|
@ -1336,7 +1357,29 @@ func TestMaxCommitSize(t *testing.T) {
|
|||
|
||||
}
|
||||
|
||||
const commitLog = `commit fd6e99e7a80199b76a694603be57c5ade1de18e7
|
||||
const commitLog = `commit e50b135fd29e91b2fbb25923797f5ecffe59f359
|
||||
Author: lionzxy <nikita@kulikof.ru>
|
||||
Date: Wed Mar 1 18:20:04 2017 +0300
|
||||
|
||||
Все работает, но он не принимает :(
|
||||
|
||||
diff --git "a/C++/1 \320\243\321\200\320\276\320\272/.idea/workspace.xml" "b/C++/1 \320\243\321\200\320\276\320\272/.idea/workspace.xml"
|
||||
index 85bfb17..89b08b5 100644
|
||||
--- "a/C++/1 \320\243\321\200\320\276\320\272/.idea/workspace.xml"
|
||||
+++ "b/C++/1 \320\243\321\200\320\276\320\272/.idea/workspace.xml"
|
||||
@@ -29,8 +29,8 @@
|
||||
<file leaf-file-name="CMakeLists.txt" pinned="false" current-in-tab="false">
|
||||
<entry file="file://$PROJECT_DIR$/CMakeLists.txt">
|
||||
<provider selected="true" editor-type-id="text-editor">
|
||||
- <state relative-caret-position="0">
|
||||
- <caret line="0" column="0" lean-forward="false" selection-start-line="0" selection-start-column="0" selection-end-line="0" selection-end-column="0" />
|
||||
+ <state relative-caret-position="72">
|
||||
+ <caret line="4" column="0" lean-forward="false" selection-start-line="4" selection-start-column="0" selection-end-line="4" selection-end-column="0" />
|
||||
<folding />
|
||||
</state>
|
||||
</provider>
|
||||
|
||||
commit fd6e99e7a80199b76a694603be57c5ade1de18e7
|
||||
Author: Jaliborc <jaliborc@gmail.com>
|
||||
Date: Mon Apr 25 16:28:06 2011 +0100
|
||||
|
||||
|
@ -1776,6 +1819,18 @@ func newStringBuilderValue(value string) strings.Builder {
|
|||
// This throws a nasty panic if it's a top-level var.
|
||||
func expectedDiffs() []*Diff {
|
||||
return []*Diff{
|
||||
{
|
||||
PathB: "C++/1 \320\243\321\200\320\276\320\272/.idea/workspace.xml",
|
||||
LineStart: 29,
|
||||
Commit: &Commit{
|
||||
Hash: "e50b135fd29e91b2fbb25923797f5ecffe59f359",
|
||||
Author: "lionzxy <nikita@kulikof.ru>",
|
||||
Date: newTime("Wed Mar 1 18:20:04 2017 +0300"),
|
||||
Message: newStringBuilderValue("Все работает, но он не принимает :(\n"),
|
||||
},
|
||||
contentWriter: newBufferWithContent([]byte("\n\n\n <state relative-caret-position=\"72\">\n <caret line=\"4\" column=\"0\" lean-forward=\"false\" selection-start-line=\"4\" selection-start-column=\"0\" selection-end-line=\"4\" selection-end-column=\"0\" />\n\n\n\n")),
|
||||
IsBinary: false,
|
||||
},
|
||||
{
|
||||
PathB: "components/item.lua",
|
||||
LineStart: 9,
|
||||
|
|
Loading…
Reference in a new issue