trufflehog/pkg/sources/chunker.go
Bill Rich 958266ea84
Run chunker in pipeline (#859)
* Run chunker in pipeline

* Move ChunkSize and PeekSize to source package.

* Use new Chunk and Peek size location
2022-10-24 13:57:27 -07:00

47 lines
1 KiB
Go

package sources
import (
"bufio"
"bytes"
"errors"
"io"
"github.com/sirupsen/logrus"
)
const (
// ChunkSize is the maximum size of a chunk.
ChunkSize = 10 * 1024
// PeekSize is the size of the peek into the previous chunk.
PeekSize = 3 * 1024
)
// Chunker takes a chunk and splits it into chunks of ChunkSize.
func Chunker(originalChunk *Chunk) chan *Chunk {
chunkChan := make(chan *Chunk)
go func() {
defer close(chunkChan)
if len(originalChunk.Data) <= ChunkSize+PeekSize {
chunkChan <- originalChunk
return
}
r := bytes.NewReader(originalChunk.Data)
reader := bufio.NewReaderSize(bufio.NewReader(r), ChunkSize)
for {
chunkBytes := make([]byte, ChunkSize)
chunk := *originalChunk
n, err := reader.Read(chunkBytes)
if err != nil && !errors.Is(err, io.EOF) {
logrus.WithError(err).Error("Error chunking reader.")
break
}
peekData, _ := reader.Peek(PeekSize)
chunk.Data = append(chunkBytes[:n], peekData...)
chunkChan <- &chunk
if errors.Is(err, io.EOF) {
break
}
}
}()
return chunkChan
}