From 0feca62469fb9fe3cc55c0b67f34b5d3d33bc428 Mon Sep 17 00:00:00 2001 From: Ankush Goel Date: Tue, 10 Sep 2024 22:57:55 +0530 Subject: [PATCH 1/5] Jira Email fix (#3061) --- pkg/detectors/jiratoken/v2/jiratoken_v2.go | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pkg/detectors/jiratoken/v2/jiratoken_v2.go b/pkg/detectors/jiratoken/v2/jiratoken_v2.go index cac0a5d3a..d12cba1e4 100644 --- a/pkg/detectors/jiratoken/v2/jiratoken_v2.go +++ b/pkg/detectors/jiratoken/v2/jiratoken_v2.go @@ -31,7 +31,7 @@ var ( // Tokens created after Jan 18 2023 use a variable length tokenPat = regexp.MustCompile(detectors.PrefixRegex([]string{"jira"}) + `\b([A-Za-z0-9+/=_-]+=[A-Za-z0-9]{8})\b`) domainPat = regexp.MustCompile(detectors.PrefixRegex([]string{"jira"}) + `\b([a-zA-Z-0-9]{5,24}\.[a-zA-Z-0-9]{3,16}\.[a-zA-Z-0-9]{3,16})\b`) - emailPat = regexp.MustCompile(detectors.PrefixRegex([]string{"jira"}) + `\b[A-Za-z0-9._%+-]+@[A-Za-z0-9.-]+\.[A-Z|a-z]{2,}\b`) + emailPat = regexp.MustCompile(detectors.PrefixRegex([]string{"jira"}) + `\b([A-Za-z0-9._%+-]+@[A-Za-z0-9.-]+\.[A-Z|a-z]{2,})\b`) ) const ( @@ -54,11 +54,11 @@ func (s Scanner) FromData(ctx context.Context, verify bool, data []byte) (result emails := emailPat.FindAllStringSubmatch(dataStr, -1) for _, email := range emails { - email = strings.Split(email[0], " ") if len(email) != 2 { continue } resEmail := strings.TrimSpace(email[1]) + for _, token := range tokens { if len(token) != 2 { continue From 5dad5a738bec56df5623cf1edc05843c7bbb4d72 Mon Sep 17 00:00:00 2001 From: ahrav Date: Tue, 10 Sep 2024 10:35:49 -0700 Subject: [PATCH 2/5] [chore] - remove unused method and function (#3089) * remove unused method * delete more unused stuff --- .../bufferedfilewriter.go | 20 +- .../bufferedfilewriter_test.go | 171 ------------------ 2 files changed, 1 insertion(+), 190 deletions(-) diff --git a/pkg/writers/buffered_file_writer/bufferedfilewriter.go b/pkg/writers/buffered_file_writer/bufferedfilewriter.go index 8fa93fe16..49c3afa9a 100644 --- a/pkg/writers/buffered_file_writer/bufferedfilewriter.go +++ b/pkg/writers/buffered_file_writer/bufferedfilewriter.go @@ -118,17 +118,6 @@ func New(opts ...Option) *BufferedFileWriter { return w } -// NewFromReader creates a new instance of BufferedFileWriter and writes the content from the provided reader to the writer. -func NewFromReader(r io.Reader, opts ...Option) (*BufferedFileWriter, error) { - opts = append(opts, WithBufferSize(Large)) - writer := New(opts...) - if _, err := io.Copy(writer, r); err != nil && !errors.Is(err, io.EOF) { - return nil, fmt.Errorf("error writing to buffered file writer: %w", err) - } - - return writer, nil -} - // Len returns the number of bytes written to the buffer or file. func (w *BufferedFileWriter) Len() int { return int(w.size) } @@ -291,14 +280,7 @@ func (w *BufferedFileWriter) CloseForWriting() error { // If the content is stored in memory, it returns a custom reader that handles returning the buffer to the pool. // The caller should call Close() on the returned io.Reader when done to ensure resources are properly released. // This method can only be used when the BufferedFileWriter is in read-only mode. -func (w *BufferedFileWriter) ReadCloser() (io.ReadCloser, error) { return w.ReadSeekCloser() } - -// ReadSeekCloser returns an io.ReadSeekCloser to read the written content. -// If the content is stored in a file, it opens the file and returns a file reader. -// If the content is stored in memory, it returns a custom reader that allows seeking and handles returning -// the buffer to the pool. -// This method can only be used when the BufferedFileWriter is in read-only mode. -func (w *BufferedFileWriter) ReadSeekCloser() (io.ReadSeekCloser, error) { +func (w *BufferedFileWriter) ReadCloser() (io.ReadCloser, error) { if w.state != readOnly { return nil, fmt.Errorf("BufferedFileWriter must be in read-only mode to read") } diff --git a/pkg/writers/buffered_file_writer/bufferedfilewriter_test.go b/pkg/writers/buffered_file_writer/bufferedfilewriter_test.go index f250f9420..3f4fad534 100644 --- a/pkg/writers/buffered_file_writer/bufferedfilewriter_test.go +++ b/pkg/writers/buffered_file_writer/bufferedfilewriter_test.go @@ -2,11 +2,8 @@ package bufferedfilewriter import ( "bytes" - "crypto/rand" - "fmt" "io" "os" - "strings" "testing" "time" @@ -498,103 +495,6 @@ func BenchmarkBufferedFileWriterWriteSmall(b *testing.B) { } } -// Create a custom reader that can simulate errors. -type errorReader struct{} - -func (errorReader) Read([]byte) (n int, err error) { return 0, fmt.Errorf("error reading") } - -func TestNewFromReader(t *testing.T) { - t.Parallel() - - testCases := []struct { - name string - reader io.Reader - wantErr bool - wantData string - }{ - { - name: "Success case", - reader: strings.NewReader("hello world"), - wantData: "hello world", - }, - { - name: "Empty reader", - reader: strings.NewReader(""), - }, - { - name: "Error reader", - reader: errorReader{}, - wantErr: true, - }, - } - - for _, tc := range testCases { - tc := tc - t.Run(tc.name, func(t *testing.T) { - t.Parallel() - bufWriter, err := NewFromReader(tc.reader) - if err != nil && tc.wantErr { - return - } - - assert.NoError(t, err) - assert.NotNil(t, bufWriter) - - err = bufWriter.CloseForWriting() - assert.NoError(t, err) - - b := new(bytes.Buffer) - rdr, err := bufWriter.ReadCloser() - if err != nil && tc.wantErr { - return - } - assert.NoError(t, err) - - if rdr == nil { - return - } - defer rdr.Close() - - _, err = b.ReadFrom(rdr) - assert.NoError(t, err) - assert.Equal(t, tc.wantData, b.String()) - }) - } -} - -func TestNewFromReaderThresholdExceeded(t *testing.T) { - t.Parallel() - - // Create a large data buffer that exceeds the threshold. - largeData := make([]byte, 1024*1024) // 1 MB - _, err := rand.Read(largeData) - assert.NoError(t, err) - - // Create a BufferedFileWriter with a smaller threshold. - threshold := uint64(1024) // 1 KB - bufWriter, err := NewFromReader(bytes.NewReader(largeData), WithThreshold(threshold)) - assert.NoError(t, err) - - err = bufWriter.CloseForWriting() - assert.NoError(t, err) - - rdr, err := bufWriter.ReadCloser() - assert.NoError(t, err) - defer rdr.Close() - - // Verify that the data was written to a file. - assert.NotEmpty(t, bufWriter.filename) - assert.NotNil(t, bufWriter.file) - - // Read the data from the BufferedFileWriter. - readData, err := io.ReadAll(rdr) - assert.NoError(t, err) - assert.Equal(t, largeData, readData) - - // Verify the size of the data written. - assert.Equal(t, uint64(len(largeData)), bufWriter.size) -} - func TestBufferWriterCloseForWritingWithFile(t *testing.T) { bufPool := pool.NewBufferPool(defaultBufferSize) @@ -700,74 +600,3 @@ func TestBufferedFileWriter_ReadFrom(t *testing.T) { }) } } - -// simpleReader wraps a string, allowing it to be read as an io.Reader without implementing io.WriterTo. -type simpleReader struct { - data []byte - offset int -} - -func newSimpleReader(s string) *simpleReader { return &simpleReader{data: []byte(s)} } - -// Read implements the io.Reader interface. -func (sr *simpleReader) Read(p []byte) (n int, err error) { - if sr.offset >= len(sr.data) { - return 0, io.EOF // no more data to read - } - n = copy(p, sr.data[sr.offset:]) // copy data to p - sr.offset += n // move offset for next read - return -} - -func TestNewFromReaderThresholdExceededSimpleReader(t *testing.T) { - t.Parallel() - - // Create a large data buffer that exceeds the threshold. - largeData := strings.Repeat("a", 1024*1024) // 1 MB - - // Create a BufferedFileWriter with a smaller threshold. - threshold := uint64(1024) // 1 KB - bufWriter, err := NewFromReader(newSimpleReader(largeData), WithThreshold(threshold)) - assert.NoError(t, err) - - err = bufWriter.CloseForWriting() - assert.NoError(t, err) - - rdr, err := bufWriter.ReadCloser() - assert.NoError(t, err) - defer rdr.Close() - - // Verify that the data was written to a file. - assert.NotEmpty(t, bufWriter.filename) - assert.NotNil(t, bufWriter.file) - - // Read the data from the BufferedFileWriter. - readData, err := io.ReadAll(rdr) - assert.NoError(t, err) - assert.Equal(t, largeData, string(readData)) - - // Verify the size of the data written. - assert.Equal(t, uint64(len(largeData)), bufWriter.size) -} - -func BenchmarkNewFromReader(b *testing.B) { - largeData := strings.Repeat("a", 1024*1024) // 1 MB - - b.ResetTimer() - - for i := 0; i < b.N; i++ { - reader := newSimpleReader(largeData) - - b.StartTimer() - bufWriter, err := NewFromReader(reader) - assert.NoError(b, err) - b.StopTimer() - - err = bufWriter.CloseForWriting() - assert.NoError(b, err) - - rdr, err := bufWriter.ReadCloser() - assert.NoError(b, err) - rdr.Close() - } -} From 2fb90295ced9355721ba342f181b4a0792efe6c6 Mon Sep 17 00:00:00 2001 From: Zachary Rice Date: Tue, 10 Sep 2024 13:07:27 -0500 Subject: [PATCH 3/5] update aha keyword (#3281) --- pkg/detectors/aha/aha.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pkg/detectors/aha/aha.go b/pkg/detectors/aha/aha.go index 07d99671e..96db04190 100644 --- a/pkg/detectors/aha/aha.go +++ b/pkg/detectors/aha/aha.go @@ -31,7 +31,7 @@ var ( // Keywords are used for efficiently pre-filtering chunks. // Use identifiers in the secret preferably, or the provider name. func (s Scanner) Keywords() []string { - return []string{"aha"} + return []string{"aha.io"} } func (s Scanner) getClient() *http.Client { From b7411d29223192fe446514c24d3e05e3a6bef7c4 Mon Sep 17 00:00:00 2001 From: Richard Gomez <32133502+rgmz@users.noreply.github.com> Date: Tue, 10 Sep 2024 14:58:40 -0400 Subject: [PATCH 4/5] Clarify "no decoder found for chunk" log message (#3001) * chore(engine): clarify trace log message * chore(engine): fix merge conflicts --- pkg/decoders/base64.go | 8 ++++---- pkg/decoders/escaped_unicode.go | 8 ++++---- pkg/decoders/utf16.go | 8 ++++---- pkg/decoders/utf8.go | 8 ++++---- pkg/engine/engine.go | 3 +-- 5 files changed, 17 insertions(+), 18 deletions(-) diff --git a/pkg/decoders/base64.go b/pkg/decoders/base64.go index 8d19e7eb5..dcf78cc8a 100644 --- a/pkg/decoders/base64.go +++ b/pkg/decoders/base64.go @@ -27,6 +27,10 @@ func init() { } } +func (d *Base64) Type() detectorspb.DecoderType { + return detectorspb.DecoderType_BASE64 +} + func (d *Base64) FromChunk(chunk *sources.Chunk) *DecodableChunk { decodableChunk := &DecodableChunk{Chunk: chunk, DecoderType: d.Type()} encodedSubstrings := getSubstringsOfCharacterSet(chunk.Data, 20, b64CharsetMapping, b64EndChars) @@ -67,10 +71,6 @@ func (d *Base64) FromChunk(chunk *sources.Chunk) *DecodableChunk { return nil } -func (d *Base64) Type() detectorspb.DecoderType { - return detectorspb.DecoderType_BASE64 -} - func isASCII(b []byte) bool { for i := 0; i < len(b); i++ { if b[i] > unicode.MaxASCII { diff --git a/pkg/decoders/escaped_unicode.go b/pkg/decoders/escaped_unicode.go index b47d9383f..f1a523f23 100644 --- a/pkg/decoders/escaped_unicode.go +++ b/pkg/decoders/escaped_unicode.go @@ -24,6 +24,10 @@ var ( escapePat = regexp.MustCompile(`(?i:\\{1,2}u)([a-fA-F0-9]{4})`) ) +func (d *EscapedUnicode) Type() detectorspb.DecoderType { + return detectorspb.DecoderType_ESCAPED_UNICODE +} + func (d *EscapedUnicode) FromChunk(chunk *sources.Chunk) *DecodableChunk { if chunk == nil || len(chunk.Data) == 0 { return nil @@ -94,10 +98,6 @@ func decodeCodePoint(input []byte) []byte { return input } -func (d *EscapedUnicode) Type() detectorspb.DecoderType { - return detectorspb.DecoderType_ESCAPED_UNICODE -} - func decodeEscaped(input []byte) []byte { // Find all Unicode escape sequences in the input byte slice indices := escapePat.FindAllSubmatchIndex(input, -1) diff --git a/pkg/decoders/utf16.go b/pkg/decoders/utf16.go index 7f0616d25..5949196e4 100644 --- a/pkg/decoders/utf16.go +++ b/pkg/decoders/utf16.go @@ -11,6 +11,10 @@ import ( type UTF16 struct{} +func (d *UTF16) Type() detectorspb.DecoderType { + return detectorspb.DecoderType_UTF16 +} + func (d *UTF16) FromChunk(chunk *sources.Chunk) *DecodableChunk { if chunk == nil || len(chunk.Data) == 0 { return nil @@ -28,10 +32,6 @@ func (d *UTF16) FromChunk(chunk *sources.Chunk) *DecodableChunk { return nil } -func (d *UTF16) Type() detectorspb.DecoderType { - return detectorspb.DecoderType_UTF16 -} - // utf16ToUTF8 converts a byte slice containing UTF-16 encoded data to a UTF-8 encoded byte slice. func utf16ToUTF8(b []byte) ([]byte, error) { var bufBE, bufLE bytes.Buffer diff --git a/pkg/decoders/utf8.go b/pkg/decoders/utf8.go index 6672aade3..49cfea451 100644 --- a/pkg/decoders/utf8.go +++ b/pkg/decoders/utf8.go @@ -10,6 +10,10 @@ import ( type UTF8 struct{} +func (d *UTF8) Type() detectorspb.DecoderType { + return detectorspb.DecoderType_PLAIN +} + func (d *UTF8) FromChunk(chunk *sources.Chunk) *DecodableChunk { if chunk == nil || len(chunk.Data) == 0 { return nil @@ -25,10 +29,6 @@ func (d *UTF8) FromChunk(chunk *sources.Chunk) *DecodableChunk { return decodableChunk } -func (d *UTF8) Type() detectorspb.DecoderType { - return detectorspb.DecoderType_PLAIN -} - // extractSubstrings performs similarly to the strings binutil, // extacting contigous portions of printable characters that we care // about from some bytes diff --git a/pkg/engine/engine.go b/pkg/engine/engine.go index 909dfa9fd..d2ca05a88 100644 --- a/pkg/engine/engine.go +++ b/pkg/engine/engine.go @@ -771,7 +771,7 @@ func (e *Engine) scannerWorker(ctx context.Context) { decodeLatency.WithLabelValues(decoder.Type().String(), chunk.SourceName).Observe(float64(decodeTime)) if decoded == nil { - ctx.Logger().V(4).Info("no decoder found for chunk", "chunk", chunk) + ctx.Logger().V(4).Info("decoder not applicable for chunk", "decoder", decoder.Type().String(), "chunk", chunk) continue } @@ -797,7 +797,6 @@ func (e *Engine) scannerWorker(ctx context.Context) { wgDoneFn: wgDetect.Done, } } - continue } dataSize := float64(len(chunk.Data)) From 70c6bb56345b8bdfbd181442d16a6ae68de1cdb3 Mon Sep 17 00:00:00 2001 From: Dustin Decker Date: Tue, 10 Sep 2024 15:51:41 -0700 Subject: [PATCH 5/5] feature flag additional refs (#3282) --- pkg/feature/feature.go | 5 +++-- pkg/sources/git/git.go | 7 +++++-- 2 files changed, 8 insertions(+), 4 deletions(-) diff --git a/pkg/feature/feature.go b/pkg/feature/feature.go index 59ec82403..cd4669247 100644 --- a/pkg/feature/feature.go +++ b/pkg/feature/feature.go @@ -3,6 +3,7 @@ package feature import "sync/atomic" var ( - ForceSkipBinaries = atomic.Bool{} - ForceSkipArchives = atomic.Bool{} + ForceSkipBinaries = atomic.Bool{} + ForceSkipArchives = atomic.Bool{} + SkipAdditionalRefs = atomic.Bool{} ) diff --git a/pkg/sources/git/git.go b/pkg/sources/git/git.go index b98cadbee..23a69f6cf 100644 --- a/pkg/sources/git/git.go +++ b/pkg/sources/git/git.go @@ -420,10 +420,13 @@ func executeClone(ctx context.Context, params cloneParams) (*git.Repository, err "clone", cloneURL.String(), params.clonePath, - "-c", - "remote.origin.fetch=+refs/*:refs/remotes/origin/*", "--quiet", // https://git-scm.com/docs/git-clone#Documentation/git-clone.txt-code--quietcode } + if !feature.SkipAdditionalRefs.Load() { + gitArgs = append(gitArgs, + "-c", + "remote.origin.fetch=+refs/*:refs/remotes/origin/*") + } gitArgs = append(gitArgs, params.args...) cloneCmd := exec.Command("git", gitArgs...)