Capture decoding time metric (#3209)

We're trying to track down some slowness.
This commit is contained in:
Cody Rose 2024-08-09 15:19:16 -04:00 committed by GitHub
parent f2c7bb93be
commit 9718ec6a51
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
7 changed files with 36 additions and 4 deletions

View file

@ -28,7 +28,7 @@ func init() {
}
func (d *Base64) FromChunk(chunk *sources.Chunk) *DecodableChunk {
decodableChunk := &DecodableChunk{Chunk: chunk, DecoderType: detectorspb.DecoderType_BASE64}
decodableChunk := &DecodableChunk{Chunk: chunk, DecoderType: d.Type()}
encodedSubstrings := getSubstringsOfCharacterSet(chunk.Data, 20, b64CharsetMapping, b64EndChars)
decodedSubstrings := make(map[string][]byte)
@ -67,6 +67,10 @@ func (d *Base64) FromChunk(chunk *sources.Chunk) *DecodableChunk {
return nil
}
func (d *Base64) Type() detectorspb.DecoderType {
return detectorspb.DecoderType_BASE64
}
func isASCII(b []byte) bool {
for i := 0; i < len(b); i++ {
if b[i] > unicode.MaxASCII {

View file

@ -24,6 +24,7 @@ type DecodableChunk struct {
type Decoder interface {
FromChunk(chunk *sources.Chunk) *DecodableChunk
Type() detectorspb.DecoderType
}
// Fuzz is an entrypoint for go-fuzz, which is an AFL-style fuzzing tool.

View file

@ -41,7 +41,7 @@ func (d *EscapedUnicode) FromChunk(chunk *sources.Chunk) *DecodableChunk {
if matched {
decodableChunk := &DecodableChunk{
DecoderType: detectorspb.DecoderType_ESCAPED_UNICODE,
DecoderType: d.Type(),
Chunk: chunk,
}
return decodableChunk
@ -94,6 +94,10 @@ func decodeCodePoint(input []byte) []byte {
return input
}
func (d *EscapedUnicode) Type() detectorspb.DecoderType {
return detectorspb.DecoderType_ESCAPED_UNICODE
}
func decodeEscaped(input []byte) []byte {
// Find all Unicode escape sequences in the input byte slice
indices := escapePat.FindAllSubmatchIndex(input, -1)

View file

@ -16,7 +16,7 @@ func (d *UTF16) FromChunk(chunk *sources.Chunk) *DecodableChunk {
return nil
}
decodableChunk := &DecodableChunk{Chunk: chunk, DecoderType: detectorspb.DecoderType_UTF16}
decodableChunk := &DecodableChunk{Chunk: chunk, DecoderType: d.Type()}
if utf16Data, err := utf16ToUTF8(chunk.Data); err == nil {
if len(utf16Data) == 0 {
return nil
@ -28,6 +28,10 @@ func (d *UTF16) FromChunk(chunk *sources.Chunk) *DecodableChunk {
return nil
}
func (d *UTF16) Type() detectorspb.DecoderType {
return detectorspb.DecoderType_UTF16
}
// utf16ToUTF8 converts a byte slice containing UTF-16 encoded data to a UTF-8 encoded byte slice.
func utf16ToUTF8(b []byte) ([]byte, error) {
var bufBE, bufLE bytes.Buffer

View file

@ -15,7 +15,7 @@ func (d *UTF8) FromChunk(chunk *sources.Chunk) *DecodableChunk {
return nil
}
decodableChunk := &DecodableChunk{Chunk: chunk, DecoderType: detectorspb.DecoderType_PLAIN}
decodableChunk := &DecodableChunk{Chunk: chunk, DecoderType: d.Type()}
if !utf8.Valid(chunk.Data) {
chunk.Data = extractSubstrings(chunk.Data)
@ -25,6 +25,10 @@ func (d *UTF8) FromChunk(chunk *sources.Chunk) *DecodableChunk {
return decodableChunk
}
func (d *UTF8) Type() detectorspb.DecoderType {
return detectorspb.DecoderType_PLAIN
}
// extractSubstrings performs similarly to the strings binutil,
// extacting contigous portions of printable characters that we care
// about from some bytes

View file

@ -733,7 +733,11 @@ func (e *Engine) scannerWorker(ctx context.Context) {
startTime := time.Now()
sourceVerify := chunk.Verify
for _, decoder := range e.decoders {
decodeStart := time.Now()
decoded := decoder.FromChunk(chunk)
decodeTime := time.Since(decodeStart).Microseconds()
decodeLatency.WithLabelValues(decoder.Type().String(), chunk.SourceName).Observe(float64(decodeTime))
if decoded == nil {
ctx.Logger().V(4).Info("no decoder found for chunk", "chunk", chunk)
continue

View file

@ -8,6 +8,17 @@ import (
)
var (
decodeLatency = promauto.NewHistogramVec(
prometheus.HistogramOpts{
Namespace: common.MetricsNamespace,
Subsystem: common.MetricsSubsystem,
Name: "decode_latency",
Help: "Time spent decoding a chunk in microseconds",
Buckets: prometheus.ExponentialBuckets(50, 2, 20),
},
[]string{"decoder_type", "source_name"},
)
// Detector metrics.
detectorExecutionCount = promauto.NewCounterVec(
prometheus.CounterOpts{