Clarify "no decoder found for chunk" log message (#3001)

* chore(engine): clarify trace log message

* chore(engine): fix merge conflicts
This commit is contained in:
Richard Gomez 2024-09-10 14:58:40 -04:00 committed by GitHub
parent 2fb90295ce
commit b7411d2922
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
5 changed files with 17 additions and 18 deletions

View file

@ -27,6 +27,10 @@ func init() {
}
}
func (d *Base64) Type() detectorspb.DecoderType {
return detectorspb.DecoderType_BASE64
}
func (d *Base64) FromChunk(chunk *sources.Chunk) *DecodableChunk {
decodableChunk := &DecodableChunk{Chunk: chunk, DecoderType: d.Type()}
encodedSubstrings := getSubstringsOfCharacterSet(chunk.Data, 20, b64CharsetMapping, b64EndChars)
@ -67,10 +71,6 @@ func (d *Base64) FromChunk(chunk *sources.Chunk) *DecodableChunk {
return nil
}
func (d *Base64) Type() detectorspb.DecoderType {
return detectorspb.DecoderType_BASE64
}
func isASCII(b []byte) bool {
for i := 0; i < len(b); i++ {
if b[i] > unicode.MaxASCII {

View file

@ -24,6 +24,10 @@ var (
escapePat = regexp.MustCompile(`(?i:\\{1,2}u)([a-fA-F0-9]{4})`)
)
func (d *EscapedUnicode) Type() detectorspb.DecoderType {
return detectorspb.DecoderType_ESCAPED_UNICODE
}
func (d *EscapedUnicode) FromChunk(chunk *sources.Chunk) *DecodableChunk {
if chunk == nil || len(chunk.Data) == 0 {
return nil
@ -94,10 +98,6 @@ func decodeCodePoint(input []byte) []byte {
return input
}
func (d *EscapedUnicode) Type() detectorspb.DecoderType {
return detectorspb.DecoderType_ESCAPED_UNICODE
}
func decodeEscaped(input []byte) []byte {
// Find all Unicode escape sequences in the input byte slice
indices := escapePat.FindAllSubmatchIndex(input, -1)

View file

@ -11,6 +11,10 @@ import (
type UTF16 struct{}
func (d *UTF16) Type() detectorspb.DecoderType {
return detectorspb.DecoderType_UTF16
}
func (d *UTF16) FromChunk(chunk *sources.Chunk) *DecodableChunk {
if chunk == nil || len(chunk.Data) == 0 {
return nil
@ -28,10 +32,6 @@ func (d *UTF16) FromChunk(chunk *sources.Chunk) *DecodableChunk {
return nil
}
func (d *UTF16) Type() detectorspb.DecoderType {
return detectorspb.DecoderType_UTF16
}
// utf16ToUTF8 converts a byte slice containing UTF-16 encoded data to a UTF-8 encoded byte slice.
func utf16ToUTF8(b []byte) ([]byte, error) {
var bufBE, bufLE bytes.Buffer

View file

@ -10,6 +10,10 @@ import (
type UTF8 struct{}
func (d *UTF8) Type() detectorspb.DecoderType {
return detectorspb.DecoderType_PLAIN
}
func (d *UTF8) FromChunk(chunk *sources.Chunk) *DecodableChunk {
if chunk == nil || len(chunk.Data) == 0 {
return nil
@ -25,10 +29,6 @@ func (d *UTF8) FromChunk(chunk *sources.Chunk) *DecodableChunk {
return decodableChunk
}
func (d *UTF8) Type() detectorspb.DecoderType {
return detectorspb.DecoderType_PLAIN
}
// extractSubstrings performs similarly to the strings binutil,
// extacting contigous portions of printable characters that we care
// about from some bytes

View file

@ -771,7 +771,7 @@ func (e *Engine) scannerWorker(ctx context.Context) {
decodeLatency.WithLabelValues(decoder.Type().String(), chunk.SourceName).Observe(float64(decodeTime))
if decoded == nil {
ctx.Logger().V(4).Info("no decoder found for chunk", "chunk", chunk)
ctx.Logger().V(4).Info("decoder not applicable for chunk", "decoder", decoder.Type().String(), "chunk", chunk)
continue
}
@ -797,7 +797,6 @@ func (e *Engine) scannerWorker(ctx context.Context) {
wgDoneFn: wgDetect.Done,
}
}
continue
}
dataSize := float64(len(chunk.Data))