mirror of
https://github.com/trufflesecurity/trufflehog.git
synced 2024-11-14 08:57:40 +00:00
Merge branch 'main' into test-revert-regex-engine
This commit is contained in:
commit
a19bcd7813
11 changed files with 29 additions and 213 deletions
|
@ -27,6 +27,10 @@ func init() {
|
|||
}
|
||||
}
|
||||
|
||||
func (d *Base64) Type() detectorspb.DecoderType {
|
||||
return detectorspb.DecoderType_BASE64
|
||||
}
|
||||
|
||||
func (d *Base64) FromChunk(chunk *sources.Chunk) *DecodableChunk {
|
||||
decodableChunk := &DecodableChunk{Chunk: chunk, DecoderType: d.Type()}
|
||||
encodedSubstrings := getSubstringsOfCharacterSet(chunk.Data, 20, b64CharsetMapping, b64EndChars)
|
||||
|
@ -67,10 +71,6 @@ func (d *Base64) FromChunk(chunk *sources.Chunk) *DecodableChunk {
|
|||
return nil
|
||||
}
|
||||
|
||||
func (d *Base64) Type() detectorspb.DecoderType {
|
||||
return detectorspb.DecoderType_BASE64
|
||||
}
|
||||
|
||||
func isASCII(b []byte) bool {
|
||||
for i := 0; i < len(b); i++ {
|
||||
if b[i] > unicode.MaxASCII {
|
||||
|
|
|
@ -24,6 +24,10 @@ var (
|
|||
escapePat = regexp.MustCompile(`(?i:\\{1,2}u)([a-fA-F0-9]{4})`)
|
||||
)
|
||||
|
||||
func (d *EscapedUnicode) Type() detectorspb.DecoderType {
|
||||
return detectorspb.DecoderType_ESCAPED_UNICODE
|
||||
}
|
||||
|
||||
func (d *EscapedUnicode) FromChunk(chunk *sources.Chunk) *DecodableChunk {
|
||||
if chunk == nil || len(chunk.Data) == 0 {
|
||||
return nil
|
||||
|
@ -94,10 +98,6 @@ func decodeCodePoint(input []byte) []byte {
|
|||
return input
|
||||
}
|
||||
|
||||
func (d *EscapedUnicode) Type() detectorspb.DecoderType {
|
||||
return detectorspb.DecoderType_ESCAPED_UNICODE
|
||||
}
|
||||
|
||||
func decodeEscaped(input []byte) []byte {
|
||||
// Find all Unicode escape sequences in the input byte slice
|
||||
indices := escapePat.FindAllSubmatchIndex(input, -1)
|
||||
|
|
|
@ -11,6 +11,10 @@ import (
|
|||
|
||||
type UTF16 struct{}
|
||||
|
||||
func (d *UTF16) Type() detectorspb.DecoderType {
|
||||
return detectorspb.DecoderType_UTF16
|
||||
}
|
||||
|
||||
func (d *UTF16) FromChunk(chunk *sources.Chunk) *DecodableChunk {
|
||||
if chunk == nil || len(chunk.Data) == 0 {
|
||||
return nil
|
||||
|
@ -28,10 +32,6 @@ func (d *UTF16) FromChunk(chunk *sources.Chunk) *DecodableChunk {
|
|||
return nil
|
||||
}
|
||||
|
||||
func (d *UTF16) Type() detectorspb.DecoderType {
|
||||
return detectorspb.DecoderType_UTF16
|
||||
}
|
||||
|
||||
// utf16ToUTF8 converts a byte slice containing UTF-16 encoded data to a UTF-8 encoded byte slice.
|
||||
func utf16ToUTF8(b []byte) ([]byte, error) {
|
||||
var bufBE, bufLE bytes.Buffer
|
||||
|
|
|
@ -10,6 +10,10 @@ import (
|
|||
|
||||
type UTF8 struct{}
|
||||
|
||||
func (d *UTF8) Type() detectorspb.DecoderType {
|
||||
return detectorspb.DecoderType_PLAIN
|
||||
}
|
||||
|
||||
func (d *UTF8) FromChunk(chunk *sources.Chunk) *DecodableChunk {
|
||||
if chunk == nil || len(chunk.Data) == 0 {
|
||||
return nil
|
||||
|
@ -25,10 +29,6 @@ func (d *UTF8) FromChunk(chunk *sources.Chunk) *DecodableChunk {
|
|||
return decodableChunk
|
||||
}
|
||||
|
||||
func (d *UTF8) Type() detectorspb.DecoderType {
|
||||
return detectorspb.DecoderType_PLAIN
|
||||
}
|
||||
|
||||
// extractSubstrings performs similarly to the strings binutil,
|
||||
// extacting contigous portions of printable characters that we care
|
||||
// about from some bytes
|
||||
|
|
|
@ -31,7 +31,7 @@ var (
|
|||
// Keywords are used for efficiently pre-filtering chunks.
|
||||
// Use identifiers in the secret preferably, or the provider name.
|
||||
func (s Scanner) Keywords() []string {
|
||||
return []string{"aha"}
|
||||
return []string{"aha.io"}
|
||||
}
|
||||
|
||||
func (s Scanner) getClient() *http.Client {
|
||||
|
|
|
@ -31,7 +31,7 @@ var (
|
|||
// Tokens created after Jan 18 2023 use a variable length
|
||||
tokenPat = regexp.MustCompile(detectors.PrefixRegex([]string{"jira"}) + `\b([A-Za-z0-9+/=_-]+=[A-Za-z0-9]{8})\b`)
|
||||
domainPat = regexp.MustCompile(detectors.PrefixRegex([]string{"jira"}) + `\b([a-zA-Z-0-9]{5,24}\.[a-zA-Z-0-9]{3,16}\.[a-zA-Z-0-9]{3,16})\b`)
|
||||
emailPat = regexp.MustCompile(detectors.PrefixRegex([]string{"jira"}) + `\b[A-Za-z0-9._%+-]+@[A-Za-z0-9.-]+\.[A-Z|a-z]{2,}\b`)
|
||||
emailPat = regexp.MustCompile(detectors.PrefixRegex([]string{"jira"}) + `\b([A-Za-z0-9._%+-]+@[A-Za-z0-9.-]+\.[A-Z|a-z]{2,})\b`)
|
||||
)
|
||||
|
||||
const (
|
||||
|
@ -54,11 +54,11 @@ func (s Scanner) FromData(ctx context.Context, verify bool, data []byte) (result
|
|||
emails := emailPat.FindAllStringSubmatch(dataStr, -1)
|
||||
|
||||
for _, email := range emails {
|
||||
email = strings.Split(email[0], " ")
|
||||
if len(email) != 2 {
|
||||
continue
|
||||
}
|
||||
resEmail := strings.TrimSpace(email[1])
|
||||
|
||||
for _, token := range tokens {
|
||||
if len(token) != 2 {
|
||||
continue
|
||||
|
|
|
@ -771,7 +771,7 @@ func (e *Engine) scannerWorker(ctx context.Context) {
|
|||
decodeLatency.WithLabelValues(decoder.Type().String(), chunk.SourceName).Observe(float64(decodeTime))
|
||||
|
||||
if decoded == nil {
|
||||
ctx.Logger().V(4).Info("no decoder found for chunk", "chunk", chunk)
|
||||
ctx.Logger().V(4).Info("decoder not applicable for chunk", "decoder", decoder.Type().String(), "chunk", chunk)
|
||||
continue
|
||||
}
|
||||
|
||||
|
@ -797,7 +797,6 @@ func (e *Engine) scannerWorker(ctx context.Context) {
|
|||
wgDoneFn: wgDetect.Done,
|
||||
}
|
||||
}
|
||||
continue
|
||||
}
|
||||
|
||||
dataSize := float64(len(chunk.Data))
|
||||
|
|
|
@ -5,4 +5,5 @@ import "sync/atomic"
|
|||
var (
|
||||
ForceSkipBinaries = atomic.Bool{}
|
||||
ForceSkipArchives = atomic.Bool{}
|
||||
SkipAdditionalRefs = atomic.Bool{}
|
||||
)
|
||||
|
|
|
@ -422,6 +422,11 @@ func executeClone(ctx context.Context, params cloneParams) (*git.Repository, err
|
|||
params.clonePath,
|
||||
"--quiet", // https://git-scm.com/docs/git-clone#Documentation/git-clone.txt-code--quietcode
|
||||
}
|
||||
if !feature.SkipAdditionalRefs.Load() {
|
||||
gitArgs = append(gitArgs,
|
||||
"-c",
|
||||
"remote.origin.fetch=+refs/*:refs/remotes/origin/*")
|
||||
}
|
||||
gitArgs = append(gitArgs, params.args...)
|
||||
cloneCmd := exec.Command("git", gitArgs...)
|
||||
|
||||
|
|
|
@ -118,17 +118,6 @@ func New(opts ...Option) *BufferedFileWriter {
|
|||
return w
|
||||
}
|
||||
|
||||
// NewFromReader creates a new instance of BufferedFileWriter and writes the content from the provided reader to the writer.
|
||||
func NewFromReader(r io.Reader, opts ...Option) (*BufferedFileWriter, error) {
|
||||
opts = append(opts, WithBufferSize(Large))
|
||||
writer := New(opts...)
|
||||
if _, err := io.Copy(writer, r); err != nil && !errors.Is(err, io.EOF) {
|
||||
return nil, fmt.Errorf("error writing to buffered file writer: %w", err)
|
||||
}
|
||||
|
||||
return writer, nil
|
||||
}
|
||||
|
||||
// Len returns the number of bytes written to the buffer or file.
|
||||
func (w *BufferedFileWriter) Len() int { return int(w.size) }
|
||||
|
||||
|
@ -291,14 +280,7 @@ func (w *BufferedFileWriter) CloseForWriting() error {
|
|||
// If the content is stored in memory, it returns a custom reader that handles returning the buffer to the pool.
|
||||
// The caller should call Close() on the returned io.Reader when done to ensure resources are properly released.
|
||||
// This method can only be used when the BufferedFileWriter is in read-only mode.
|
||||
func (w *BufferedFileWriter) ReadCloser() (io.ReadCloser, error) { return w.ReadSeekCloser() }
|
||||
|
||||
// ReadSeekCloser returns an io.ReadSeekCloser to read the written content.
|
||||
// If the content is stored in a file, it opens the file and returns a file reader.
|
||||
// If the content is stored in memory, it returns a custom reader that allows seeking and handles returning
|
||||
// the buffer to the pool.
|
||||
// This method can only be used when the BufferedFileWriter is in read-only mode.
|
||||
func (w *BufferedFileWriter) ReadSeekCloser() (io.ReadSeekCloser, error) {
|
||||
func (w *BufferedFileWriter) ReadCloser() (io.ReadCloser, error) {
|
||||
if w.state != readOnly {
|
||||
return nil, fmt.Errorf("BufferedFileWriter must be in read-only mode to read")
|
||||
}
|
||||
|
|
|
@ -2,11 +2,8 @@ package bufferedfilewriter
|
|||
|
||||
import (
|
||||
"bytes"
|
||||
"crypto/rand"
|
||||
"fmt"
|
||||
"io"
|
||||
"os"
|
||||
"strings"
|
||||
"testing"
|
||||
"time"
|
||||
|
||||
|
@ -498,103 +495,6 @@ func BenchmarkBufferedFileWriterWriteSmall(b *testing.B) {
|
|||
}
|
||||
}
|
||||
|
||||
// Create a custom reader that can simulate errors.
|
||||
type errorReader struct{}
|
||||
|
||||
func (errorReader) Read([]byte) (n int, err error) { return 0, fmt.Errorf("error reading") }
|
||||
|
||||
func TestNewFromReader(t *testing.T) {
|
||||
t.Parallel()
|
||||
|
||||
testCases := []struct {
|
||||
name string
|
||||
reader io.Reader
|
||||
wantErr bool
|
||||
wantData string
|
||||
}{
|
||||
{
|
||||
name: "Success case",
|
||||
reader: strings.NewReader("hello world"),
|
||||
wantData: "hello world",
|
||||
},
|
||||
{
|
||||
name: "Empty reader",
|
||||
reader: strings.NewReader(""),
|
||||
},
|
||||
{
|
||||
name: "Error reader",
|
||||
reader: errorReader{},
|
||||
wantErr: true,
|
||||
},
|
||||
}
|
||||
|
||||
for _, tc := range testCases {
|
||||
tc := tc
|
||||
t.Run(tc.name, func(t *testing.T) {
|
||||
t.Parallel()
|
||||
bufWriter, err := NewFromReader(tc.reader)
|
||||
if err != nil && tc.wantErr {
|
||||
return
|
||||
}
|
||||
|
||||
assert.NoError(t, err)
|
||||
assert.NotNil(t, bufWriter)
|
||||
|
||||
err = bufWriter.CloseForWriting()
|
||||
assert.NoError(t, err)
|
||||
|
||||
b := new(bytes.Buffer)
|
||||
rdr, err := bufWriter.ReadCloser()
|
||||
if err != nil && tc.wantErr {
|
||||
return
|
||||
}
|
||||
assert.NoError(t, err)
|
||||
|
||||
if rdr == nil {
|
||||
return
|
||||
}
|
||||
defer rdr.Close()
|
||||
|
||||
_, err = b.ReadFrom(rdr)
|
||||
assert.NoError(t, err)
|
||||
assert.Equal(t, tc.wantData, b.String())
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
func TestNewFromReaderThresholdExceeded(t *testing.T) {
|
||||
t.Parallel()
|
||||
|
||||
// Create a large data buffer that exceeds the threshold.
|
||||
largeData := make([]byte, 1024*1024) // 1 MB
|
||||
_, err := rand.Read(largeData)
|
||||
assert.NoError(t, err)
|
||||
|
||||
// Create a BufferedFileWriter with a smaller threshold.
|
||||
threshold := uint64(1024) // 1 KB
|
||||
bufWriter, err := NewFromReader(bytes.NewReader(largeData), WithThreshold(threshold))
|
||||
assert.NoError(t, err)
|
||||
|
||||
err = bufWriter.CloseForWriting()
|
||||
assert.NoError(t, err)
|
||||
|
||||
rdr, err := bufWriter.ReadCloser()
|
||||
assert.NoError(t, err)
|
||||
defer rdr.Close()
|
||||
|
||||
// Verify that the data was written to a file.
|
||||
assert.NotEmpty(t, bufWriter.filename)
|
||||
assert.NotNil(t, bufWriter.file)
|
||||
|
||||
// Read the data from the BufferedFileWriter.
|
||||
readData, err := io.ReadAll(rdr)
|
||||
assert.NoError(t, err)
|
||||
assert.Equal(t, largeData, readData)
|
||||
|
||||
// Verify the size of the data written.
|
||||
assert.Equal(t, uint64(len(largeData)), bufWriter.size)
|
||||
}
|
||||
|
||||
func TestBufferWriterCloseForWritingWithFile(t *testing.T) {
|
||||
bufPool := pool.NewBufferPool(defaultBufferSize)
|
||||
|
||||
|
@ -700,74 +600,3 @@ func TestBufferedFileWriter_ReadFrom(t *testing.T) {
|
|||
})
|
||||
}
|
||||
}
|
||||
|
||||
// simpleReader wraps a string, allowing it to be read as an io.Reader without implementing io.WriterTo.
|
||||
type simpleReader struct {
|
||||
data []byte
|
||||
offset int
|
||||
}
|
||||
|
||||
func newSimpleReader(s string) *simpleReader { return &simpleReader{data: []byte(s)} }
|
||||
|
||||
// Read implements the io.Reader interface.
|
||||
func (sr *simpleReader) Read(p []byte) (n int, err error) {
|
||||
if sr.offset >= len(sr.data) {
|
||||
return 0, io.EOF // no more data to read
|
||||
}
|
||||
n = copy(p, sr.data[sr.offset:]) // copy data to p
|
||||
sr.offset += n // move offset for next read
|
||||
return
|
||||
}
|
||||
|
||||
func TestNewFromReaderThresholdExceededSimpleReader(t *testing.T) {
|
||||
t.Parallel()
|
||||
|
||||
// Create a large data buffer that exceeds the threshold.
|
||||
largeData := strings.Repeat("a", 1024*1024) // 1 MB
|
||||
|
||||
// Create a BufferedFileWriter with a smaller threshold.
|
||||
threshold := uint64(1024) // 1 KB
|
||||
bufWriter, err := NewFromReader(newSimpleReader(largeData), WithThreshold(threshold))
|
||||
assert.NoError(t, err)
|
||||
|
||||
err = bufWriter.CloseForWriting()
|
||||
assert.NoError(t, err)
|
||||
|
||||
rdr, err := bufWriter.ReadCloser()
|
||||
assert.NoError(t, err)
|
||||
defer rdr.Close()
|
||||
|
||||
// Verify that the data was written to a file.
|
||||
assert.NotEmpty(t, bufWriter.filename)
|
||||
assert.NotNil(t, bufWriter.file)
|
||||
|
||||
// Read the data from the BufferedFileWriter.
|
||||
readData, err := io.ReadAll(rdr)
|
||||
assert.NoError(t, err)
|
||||
assert.Equal(t, largeData, string(readData))
|
||||
|
||||
// Verify the size of the data written.
|
||||
assert.Equal(t, uint64(len(largeData)), bufWriter.size)
|
||||
}
|
||||
|
||||
func BenchmarkNewFromReader(b *testing.B) {
|
||||
largeData := strings.Repeat("a", 1024*1024) // 1 MB
|
||||
|
||||
b.ResetTimer()
|
||||
|
||||
for i := 0; i < b.N; i++ {
|
||||
reader := newSimpleReader(largeData)
|
||||
|
||||
b.StartTimer()
|
||||
bufWriter, err := NewFromReader(reader)
|
||||
assert.NoError(b, err)
|
||||
b.StopTimer()
|
||||
|
||||
err = bufWriter.CloseForWriting()
|
||||
assert.NoError(b, err)
|
||||
|
||||
rdr, err := bufWriter.ReadCloser()
|
||||
assert.NoError(b, err)
|
||||
rdr.Close()
|
||||
}
|
||||
}
|
||||
|
|
Loading…
Reference in a new issue