[chore] - optimize chunker (#1535)

* Use chunkbytes that includes the size of peek.

* linter.

* continue.

* add TotalChunkSize const.
This commit is contained in:
ahrav 2023-07-24 19:30:29 -07:00 committed by GitHub
parent 85f363f093
commit b5b01d3eba
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
2 changed files with 27 additions and 9 deletions

View file

@ -12,6 +12,8 @@ const (
ChunkSize = 10 * 1024
// PeekSize is the size of the peek into the previous chunk.
PeekSize = 3 * 1024
// TotalChunkSize is the total size of a chunk with peek data.
TotalChunkSize = ChunkSize + PeekSize
)
// Chunker takes a chunk and splits it into chunks of ChunkSize.
@ -19,27 +21,31 @@ func Chunker(originalChunk *Chunk) chan *Chunk {
chunkChan := make(chan *Chunk)
go func() {
defer close(chunkChan)
if len(originalChunk.Data) <= ChunkSize+PeekSize {
if len(originalChunk.Data) <= TotalChunkSize {
chunkChan <- originalChunk
return
}
r := bytes.NewReader(originalChunk.Data)
reader := bufio.NewReaderSize(bufio.NewReader(r), ChunkSize)
for {
chunkBytes := make([]byte, ChunkSize)
chunkBytes := make([]byte, TotalChunkSize)
chunk := *originalChunk
chunkBytes = chunkBytes[:ChunkSize]
n, err := reader.Read(chunkBytes)
if err != nil && !errors.Is(err, io.EOF) {
break
}
if n == 0 {
if errors.Is(err, io.EOF) {
break
}
continue
}
peekData, _ := reader.Peek(PeekSize)
chunk.Data = append(chunkBytes[:n], peekData...)
if n > 0 {
chunkChan <- &chunk
}
if errors.Is(err, io.EOF) {
break
}
copy(chunkBytes[n:], peekData)
chunk.Data = chunkBytes[:n+len(peekData)]
chunkChan <- &chunk
}
}()
return chunkChan

View file

@ -72,3 +72,15 @@ func TestChunker(t *testing.T) {
}
}
func BenchmarkChunker(b *testing.B) {
data := bytes.Repeat([]byte("a"), ChunkSize*100)
chunk := &Chunk{
Data: data,
}
b.ResetTimer()
for i := 0; i < b.N; i++ {
for range Chunker(chunk) {
}
}
}