mirror of
https://github.com/trufflesecurity/trufflehog.git
synced 2024-11-10 07:04:24 +00:00
Use heuristic to choose the most likely UTF-16 decoded string (#1381)
* Use heuristic to choose the most likely UTF-16 decoded string * Assume ASCII and include valid BE and LE bytes * Remove unused code * Assume ASCII and return nil when not utf16 --------- Co-authored-by: bill-rich <bill.rich@gmail.com>
This commit is contained in:
parent
3d395497cf
commit
fb76eaf17b
4 changed files with 45 additions and 45 deletions
|
@ -3,7 +3,6 @@ package decoders
|
||||||
import (
|
import (
|
||||||
"bytes"
|
"bytes"
|
||||||
"encoding/binary"
|
"encoding/binary"
|
||||||
"fmt"
|
|
||||||
"unicode/utf8"
|
"unicode/utf8"
|
||||||
|
|
||||||
"github.com/trufflesecurity/trufflehog/v3/pkg/sources"
|
"github.com/trufflesecurity/trufflehog/v3/pkg/sources"
|
||||||
|
@ -17,6 +16,9 @@ func (d *UTF16) FromChunk(chunk *sources.Chunk) *sources.Chunk {
|
||||||
}
|
}
|
||||||
|
|
||||||
if utf16Data, err := utf16ToUTF8(chunk.Data); err == nil {
|
if utf16Data, err := utf16ToUTF8(chunk.Data); err == nil {
|
||||||
|
if len(utf16Data) == 0 {
|
||||||
|
return nil
|
||||||
|
}
|
||||||
chunk.Data = utf16Data
|
chunk.Data = utf16Data
|
||||||
return chunk
|
return chunk
|
||||||
}
|
}
|
||||||
|
@ -26,43 +28,19 @@ func (d *UTF16) FromChunk(chunk *sources.Chunk) *sources.Chunk {
|
||||||
|
|
||||||
// utf16ToUTF8 converts a byte slice containing UTF-16 encoded data to a UTF-8 encoded byte slice.
|
// utf16ToUTF8 converts a byte slice containing UTF-16 encoded data to a UTF-8 encoded byte slice.
|
||||||
func utf16ToUTF8(b []byte) ([]byte, error) {
|
func utf16ToUTF8(b []byte) ([]byte, error) {
|
||||||
endianness, err := guessUTF16Endianness(b)
|
var bufBE, bufLE bytes.Buffer
|
||||||
if err != nil {
|
for i := 0; i < len(b)-1; i += 2 {
|
||||||
return nil, err
|
if r := rune(binary.BigEndian.Uint16(b[i:])); b[i] == 0 && utf8.ValidRune(r) {
|
||||||
}
|
if isValidByte(byte(r)) {
|
||||||
|
bufBE.WriteRune(r)
|
||||||
buf := &bytes.Buffer{}
|
}
|
||||||
for i := 0; i < len(b); i += 2 {
|
}
|
||||||
r := rune(endianness.Uint16(b[i:]))
|
if r := rune(binary.LittleEndian.Uint16(b[i:])); b[i+1] == 0 && utf8.ValidRune(r) {
|
||||||
if utf8.ValidRune(r) {
|
if isValidByte(byte(r)) {
|
||||||
buf.WriteRune(r)
|
bufLE.WriteRune(r)
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
return buf.Bytes(), nil
|
return append(bufLE.Bytes(), bufBE.Bytes()...), nil
|
||||||
}
|
|
||||||
|
|
||||||
func guessUTF16Endianness(b []byte) (binary.ByteOrder, error) {
|
|
||||||
if len(b) < 2 || len(b)%2 != 0 {
|
|
||||||
return nil, fmt.Errorf("input length must be even and at least 2 bytes long")
|
|
||||||
}
|
|
||||||
|
|
||||||
var evenNullBytes, oddNullBytes int
|
|
||||||
|
|
||||||
for i := 0; i < len(b); i += 2 {
|
|
||||||
if b[i] == 0 {
|
|
||||||
oddNullBytes++
|
|
||||||
}
|
|
||||||
if b[i+1] == 0 {
|
|
||||||
evenNullBytes++
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
if evenNullBytes > oddNullBytes {
|
|
||||||
return binary.LittleEndian, nil
|
|
||||||
}
|
|
||||||
if oddNullBytes > evenNullBytes {
|
|
||||||
return binary.BigEndian, nil
|
|
||||||
}
|
|
||||||
return nil, fmt.Errorf("could not determine endianness")
|
|
||||||
}
|
}
|
||||||
|
|
BIN
pkg/decoders/utf16_test.dll
Normal file
BIN
pkg/decoders/utf16_test.dll
Normal file
Binary file not shown.
|
@ -2,6 +2,7 @@ package decoders
|
||||||
|
|
||||||
import (
|
import (
|
||||||
"bytes"
|
"bytes"
|
||||||
|
"os"
|
||||||
"testing"
|
"testing"
|
||||||
|
|
||||||
"github.com/trufflesecurity/trufflehog/v3/pkg/sources"
|
"github.com/trufflesecurity/trufflehog/v3/pkg/sources"
|
||||||
|
@ -35,8 +36,8 @@ func TestUTF16Decoder(t *testing.T) {
|
||||||
{
|
{
|
||||||
name: "Invalid UTF-16 input (odd length)",
|
name: "Invalid UTF-16 input (odd length)",
|
||||||
input: []byte{72, 0, 101, 0, 108, 0, 108, 0, 111, 0, 32, 0, 87, 0, 111, 0, 114, 0, 108, 0, 0},
|
input: []byte{72, 0, 101, 0, 108, 0, 108, 0, 111, 0, 32, 0, 87, 0, 111, 0, 114, 0, 108, 0, 0},
|
||||||
expected: nil,
|
expected: []byte("Hello Worl"),
|
||||||
expectNil: true,
|
expectNil: false,
|
||||||
},
|
},
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -57,12 +58,32 @@ func TestUTF16Decoder(t *testing.T) {
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
if !bytes.Equal(decodedChunk.Data, tc.expected) {
|
if !bytes.Equal(decodedChunk.Data, tc.expected) {
|
||||||
t.Errorf("Expected decoded data: %v, got: %v", tc.expected, decodedChunk.Data)
|
t.Errorf("Expected decoded data: %s, got: %s", tc.expected, decodedChunk.Data)
|
||||||
}
|
}
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func TestDLL(t *testing.T) {
|
||||||
|
data, err := os.ReadFile("utf16_test.dll")
|
||||||
|
if err != nil {
|
||||||
|
t.Errorf("Failed to read test data: %v", err)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
chunk := &sources.Chunk{Data: data}
|
||||||
|
decoder := &UTF16{}
|
||||||
|
decodedChunk := decoder.FromChunk(chunk)
|
||||||
|
if decodedChunk == nil {
|
||||||
|
t.Errorf("Expected chunk with data, got nil")
|
||||||
|
return
|
||||||
|
}
|
||||||
|
if !bytes.Contains(decodedChunk.Data, []byte("aws_secret_access_key")) {
|
||||||
|
t.Errorf("Expected chunk to have aws_secret_access_key")
|
||||||
|
return
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
func BenchmarkUtf16ToUtf8(b *testing.B) {
|
func BenchmarkUtf16ToUtf8(b *testing.B) {
|
||||||
// Example UTF-16LE encoded data
|
// Example UTF-16LE encoded data
|
||||||
data := []byte{72, 0, 101, 0, 108, 0, 108, 0, 111, 0, 32, 0, 87, 0, 111, 0, 114, 0, 108, 0, 100, 0}
|
data := []byte{72, 0, 101, 0, 108, 0, 108, 0, 111, 0, 32, 0, 87, 0, 111, 0, 114, 0, 108, 0, 100, 0}
|
||||||
|
|
|
@ -26,11 +26,6 @@ func (d *UTF8) FromChunk(chunk *sources.Chunk) *sources.Chunk {
|
||||||
// extacting contigous portions of printable characters that we care
|
// extacting contigous portions of printable characters that we care
|
||||||
// about from some bytes
|
// about from some bytes
|
||||||
func extractSubstrings(b []byte) []byte {
|
func extractSubstrings(b []byte) []byte {
|
||||||
isValidByte := func(c byte) bool {
|
|
||||||
// https://www.rapidtables.com/code/text/ascii-table.html
|
|
||||||
// split on anything that is not ascii space through tilde
|
|
||||||
return c > 31 && c < 127
|
|
||||||
}
|
|
||||||
|
|
||||||
field := make([]byte, len(b))
|
field := make([]byte, len(b))
|
||||||
fieldLen := 0
|
fieldLen := 0
|
||||||
|
@ -53,3 +48,9 @@ func extractSubstrings(b []byte) []byte {
|
||||||
|
|
||||||
return buf.Bytes()
|
return buf.Bytes()
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func isValidByte(c byte) bool {
|
||||||
|
// https://www.rapidtables.com/code/text/ascii-table.html
|
||||||
|
// split on anything that is not ascii space through tilde
|
||||||
|
return c > 31 && c < 127
|
||||||
|
}
|
||||||
|
|
Loading…
Reference in a new issue