2023-07-26 05:48:28 +00:00
|
|
|
package sources
|
|
|
|
|
|
|
|
import (
|
2023-08-25 17:43:33 +00:00
|
|
|
"errors"
|
2023-07-26 05:48:28 +00:00
|
|
|
"fmt"
|
|
|
|
"testing"
|
|
|
|
|
2023-08-03 18:36:30 +00:00
|
|
|
"github.com/stretchr/testify/assert"
|
2023-09-07 16:03:37 +00:00
|
|
|
"google.golang.org/protobuf/types/known/anypb"
|
|
|
|
|
2023-08-03 18:36:30 +00:00
|
|
|
"github.com/trufflesecurity/trufflehog/v3/pkg/common"
|
2023-07-26 05:48:28 +00:00
|
|
|
"github.com/trufflesecurity/trufflehog/v3/pkg/context"
|
|
|
|
"github.com/trufflesecurity/trufflehog/v3/pkg/pb/sourcespb"
|
|
|
|
)
|
|
|
|
|
|
|
|
// DummySource implements Source and is used for testing a SourceManager.
|
|
|
|
type DummySource struct {
|
|
|
|
sourceID int64
|
|
|
|
jobID int64
|
|
|
|
chunker
|
|
|
|
}
|
|
|
|
|
|
|
|
func (d *DummySource) Type() sourcespb.SourceType { return 1337 }
|
|
|
|
func (d *DummySource) SourceID() int64 { return d.sourceID }
|
|
|
|
func (d *DummySource) JobID() int64 { return d.jobID }
|
|
|
|
func (d *DummySource) Init(_ context.Context, _ string, jobID, sourceID int64, _ bool, _ *anypb.Any, _ int) error {
|
|
|
|
d.sourceID = sourceID
|
|
|
|
d.jobID = jobID
|
|
|
|
return nil
|
|
|
|
}
|
|
|
|
func (d *DummySource) GetProgress() *Progress { return nil }
|
|
|
|
|
|
|
|
// Interface to easily test different chunking methods.
|
|
|
|
type chunker interface {
|
2023-09-07 16:03:37 +00:00
|
|
|
Chunks(context.Context, chan *Chunk, ...ChunkingTarget) error
|
2023-07-27 15:49:56 +00:00
|
|
|
ChunkUnit(ctx context.Context, unit SourceUnit, reporter ChunkReporter) error
|
|
|
|
Enumerate(ctx context.Context, reporter UnitReporter) error
|
2023-07-26 05:48:28 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
// Chunk method that writes count bytes to the channel before returning.
|
|
|
|
type counterChunker struct {
|
|
|
|
chunkCounter byte
|
|
|
|
count int
|
|
|
|
}
|
|
|
|
|
2023-09-07 16:03:37 +00:00
|
|
|
func (c *counterChunker) Chunks(ctx context.Context, ch chan *Chunk, _ ...ChunkingTarget) error {
|
2023-07-26 05:48:28 +00:00
|
|
|
for i := 0; i < c.count; i++ {
|
2023-08-03 18:36:30 +00:00
|
|
|
select {
|
|
|
|
case ch <- &Chunk{Data: []byte{c.chunkCounter}}:
|
|
|
|
c.chunkCounter++
|
|
|
|
case <-ctx.Done():
|
|
|
|
return ctx.Err()
|
|
|
|
}
|
2023-07-26 05:48:28 +00:00
|
|
|
}
|
|
|
|
return nil
|
|
|
|
}
|
|
|
|
|
2023-07-27 15:49:56 +00:00
|
|
|
// countChunk implements SourceUnit.
|
|
|
|
type countChunk byte
|
|
|
|
|
|
|
|
func (c countChunk) SourceUnitID() string { return fmt.Sprintf("countChunk(%d)", c) }
|
|
|
|
|
|
|
|
func (c *counterChunker) Enumerate(ctx context.Context, reporter UnitReporter) error {
|
|
|
|
for i := 0; i < c.count; i++ {
|
|
|
|
if err := reporter.UnitOk(ctx, countChunk(byte(i))); err != nil {
|
|
|
|
return err
|
|
|
|
}
|
|
|
|
}
|
|
|
|
return nil
|
|
|
|
}
|
|
|
|
|
|
|
|
func (c *counterChunker) ChunkUnit(ctx context.Context, unit SourceUnit, reporter ChunkReporter) error {
|
|
|
|
return reporter.ChunkOk(ctx, Chunk{Data: []byte{byte(unit.(countChunk))}})
|
|
|
|
}
|
|
|
|
|
|
|
|
// Chunk method that always returns an error.
|
|
|
|
type errorChunker struct{ error }
|
|
|
|
|
2023-09-07 16:03:37 +00:00
|
|
|
func (c errorChunker) Chunks(context.Context, chan *Chunk, ...ChunkingTarget) error { return c }
|
|
|
|
func (c errorChunker) Enumerate(context.Context, UnitReporter) error { return c }
|
|
|
|
func (c errorChunker) ChunkUnit(context.Context, SourceUnit, ChunkReporter) error { return c }
|
2023-07-27 15:49:56 +00:00
|
|
|
|
2023-07-26 05:48:28 +00:00
|
|
|
// enrollDummy is a helper function to enroll a DummySource with a SourceManager.
|
|
|
|
func enrollDummy(mgr *SourceManager, chunkMethod chunker) (handle, error) {
|
|
|
|
return mgr.Enroll(context.Background(), "dummy", 1337,
|
|
|
|
func(ctx context.Context, jobID, sourceID int64) (Source, error) {
|
|
|
|
source := &DummySource{chunker: chunkMethod}
|
|
|
|
if err := source.Init(ctx, "dummy", jobID, sourceID, true, nil, 42); err != nil {
|
|
|
|
return nil, err
|
|
|
|
}
|
|
|
|
return source, nil
|
|
|
|
})
|
|
|
|
}
|
|
|
|
|
|
|
|
// tryRead is a helper function that will try to read from a channel and return
|
|
|
|
// an error if it cannot.
|
|
|
|
func tryRead(ch <-chan *Chunk) (*Chunk, error) {
|
|
|
|
select {
|
|
|
|
case chunk := <-ch:
|
|
|
|
return chunk, nil
|
|
|
|
default:
|
|
|
|
return nil, fmt.Errorf("no chunk available")
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
func TestSourceManagerRun(t *testing.T) {
|
|
|
|
mgr := NewManager(WithBufferedOutput(8))
|
|
|
|
handle, err := enrollDummy(mgr, &counterChunker{count: 1})
|
2023-08-03 18:36:30 +00:00
|
|
|
assert.NoError(t, err)
|
2023-07-26 05:48:28 +00:00
|
|
|
for i := 0; i < 3; i++ {
|
2023-08-03 18:36:30 +00:00
|
|
|
_, err = mgr.Run(context.Background(), handle)
|
|
|
|
assert.NoError(t, err)
|
2023-07-26 05:48:28 +00:00
|
|
|
chunk, err := tryRead(mgr.Chunks())
|
2023-08-03 18:36:30 +00:00
|
|
|
assert.NoError(t, err)
|
|
|
|
assert.Equal(t, []byte{byte(i)}, chunk.Data)
|
2023-07-26 05:48:28 +00:00
|
|
|
// The Chunks channel should be empty now.
|
2023-08-03 18:36:30 +00:00
|
|
|
_, err = tryRead(mgr.Chunks())
|
|
|
|
assert.Error(t, err)
|
2023-07-26 05:48:28 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
func TestSourceManagerWait(t *testing.T) {
|
|
|
|
mgr := NewManager()
|
|
|
|
handle, err := enrollDummy(mgr, &counterChunker{count: 1})
|
2023-08-03 18:36:30 +00:00
|
|
|
assert.NoError(t, err)
|
2023-07-26 05:48:28 +00:00
|
|
|
// Asynchronously run the source.
|
2023-08-03 18:36:30 +00:00
|
|
|
_, err = mgr.ScheduleRun(context.Background(), handle)
|
|
|
|
assert.NoError(t, err)
|
2023-07-26 05:48:28 +00:00
|
|
|
// Read the 1 chunk we're expecting so Waiting completes.
|
|
|
|
<-mgr.Chunks()
|
|
|
|
// Wait for all resources to complete.
|
2023-08-03 18:36:30 +00:00
|
|
|
assert.NoError(t, mgr.Wait())
|
2023-07-26 05:48:28 +00:00
|
|
|
// Enroll and run should return an error now.
|
2023-08-03 18:36:30 +00:00
|
|
|
_, err = enrollDummy(mgr, &counterChunker{count: 1})
|
|
|
|
assert.Error(t, err)
|
|
|
|
_, err = mgr.ScheduleRun(context.Background(), handle)
|
|
|
|
assert.Error(t, err)
|
2023-07-26 05:48:28 +00:00
|
|
|
}
|
2023-07-27 15:49:56 +00:00
|
|
|
|
|
|
|
func TestSourceManagerError(t *testing.T) {
|
|
|
|
mgr := NewManager()
|
|
|
|
handle, err := enrollDummy(mgr, errorChunker{fmt.Errorf("oops")})
|
2023-08-03 18:36:30 +00:00
|
|
|
assert.NoError(t, err)
|
2023-07-27 15:49:56 +00:00
|
|
|
// A synchronous run should fail.
|
2023-08-03 18:36:30 +00:00
|
|
|
_, err = mgr.Run(context.Background(), handle)
|
|
|
|
assert.Error(t, err)
|
2023-07-27 15:49:56 +00:00
|
|
|
// Scheduling a run should not fail, but the error should surface in
|
|
|
|
// Wait().
|
2023-07-31 16:28:30 +00:00
|
|
|
ref, err := mgr.ScheduleRun(context.Background(), handle)
|
2023-08-03 18:36:30 +00:00
|
|
|
assert.NoError(t, err)
|
|
|
|
assert.Error(t, mgr.Wait())
|
|
|
|
assert.Error(t, ref.Snapshot().FatalError())
|
2023-07-27 15:49:56 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
func TestSourceManagerReport(t *testing.T) {
|
|
|
|
for _, opts := range [][]func(*SourceManager){
|
|
|
|
{WithBufferedOutput(8)},
|
|
|
|
{WithBufferedOutput(8), WithSourceUnits()},
|
|
|
|
{WithBufferedOutput(8), WithSourceUnits(), WithConcurrentUnits(1)},
|
|
|
|
} {
|
|
|
|
mgr := NewManager(opts...)
|
|
|
|
handle, err := enrollDummy(mgr, &counterChunker{count: 4})
|
2023-08-03 18:36:30 +00:00
|
|
|
assert.NoError(t, err)
|
2023-07-27 15:49:56 +00:00
|
|
|
// Synchronously run the source.
|
2023-07-31 16:28:30 +00:00
|
|
|
ref, err := mgr.Run(context.Background(), handle)
|
2023-08-03 18:36:30 +00:00
|
|
|
assert.NoError(t, err)
|
|
|
|
assert.Equal(t, 0, len(ref.Snapshot().Errors))
|
|
|
|
assert.Equal(t, uint64(4), ref.Snapshot().TotalChunks)
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
type unitChunk struct {
|
|
|
|
unit string
|
|
|
|
output string
|
|
|
|
err string
|
|
|
|
}
|
|
|
|
|
|
|
|
type unitChunker struct{ steps []unitChunk }
|
|
|
|
|
2023-09-07 16:03:37 +00:00
|
|
|
func (c *unitChunker) Chunks(ctx context.Context, ch chan *Chunk, _ ...ChunkingTarget) error {
|
2023-08-03 18:36:30 +00:00
|
|
|
for _, step := range c.steps {
|
|
|
|
if step.err != "" {
|
|
|
|
continue
|
2023-07-27 15:49:56 +00:00
|
|
|
}
|
2023-08-03 18:36:30 +00:00
|
|
|
if err := common.CancellableWrite(ctx, ch, &Chunk{Data: []byte(step.output)}); err != nil {
|
|
|
|
return err
|
2023-07-27 15:49:56 +00:00
|
|
|
}
|
2023-08-03 18:36:30 +00:00
|
|
|
}
|
|
|
|
return nil
|
|
|
|
}
|
|
|
|
func (c *unitChunker) Enumerate(ctx context.Context, rep UnitReporter) error {
|
|
|
|
for _, step := range c.steps {
|
|
|
|
if err := rep.UnitOk(ctx, CommonSourceUnit{step.unit}); err != nil {
|
|
|
|
return err
|
2023-07-27 15:49:56 +00:00
|
|
|
}
|
|
|
|
}
|
2023-08-03 18:36:30 +00:00
|
|
|
return nil
|
|
|
|
}
|
|
|
|
func (c *unitChunker) ChunkUnit(ctx context.Context, unit SourceUnit, rep ChunkReporter) error {
|
|
|
|
for _, step := range c.steps {
|
|
|
|
if unit.SourceUnitID() != step.unit {
|
|
|
|
continue
|
|
|
|
}
|
|
|
|
if step.err != "" {
|
|
|
|
if err := rep.ChunkErr(ctx, fmt.Errorf(step.err)); err != nil {
|
|
|
|
return err
|
|
|
|
}
|
|
|
|
continue
|
|
|
|
}
|
|
|
|
if err := rep.ChunkOk(ctx, Chunk{Data: []byte(step.output)}); err != nil {
|
|
|
|
return err
|
|
|
|
}
|
|
|
|
}
|
|
|
|
return nil
|
|
|
|
}
|
|
|
|
|
|
|
|
func TestSourceManagerNonFatalError(t *testing.T) {
|
|
|
|
input := []unitChunk{
|
|
|
|
{unit: "one", output: "bar"},
|
|
|
|
{unit: "two", err: "oh no"},
|
|
|
|
{unit: "three", err: "not again"},
|
|
|
|
}
|
|
|
|
mgr := NewManager(WithBufferedOutput(8), WithSourceUnits())
|
|
|
|
handle, err := enrollDummy(mgr, &unitChunker{input})
|
|
|
|
assert.NoError(t, err)
|
|
|
|
ref, err := mgr.Run(context.Background(), handle)
|
|
|
|
assert.NoError(t, err)
|
|
|
|
report := ref.Snapshot()
|
|
|
|
assert.Equal(t, len(input), int(report.TotalUnits))
|
|
|
|
assert.Equal(t, len(input), int(report.FinishedUnits))
|
|
|
|
assert.Equal(t, 1, int(report.TotalChunks))
|
|
|
|
assert.Equal(t, 2, len(report.Errors))
|
|
|
|
assert.True(t, report.DoneEnumerating)
|
|
|
|
}
|
|
|
|
|
|
|
|
func TestSourceManagerContextCancelled(t *testing.T) {
|
|
|
|
mgr := NewManager(WithBufferedOutput(8))
|
|
|
|
handle, err := enrollDummy(mgr, &counterChunker{count: 100})
|
|
|
|
assert.NoError(t, err)
|
|
|
|
|
|
|
|
ctx, cancel := context.WithCancel(context.Background())
|
|
|
|
ref, err := mgr.ScheduleRun(ctx, handle)
|
|
|
|
assert.NoError(t, err)
|
|
|
|
|
|
|
|
cancel()
|
|
|
|
<-ref.Done()
|
|
|
|
report := ref.Snapshot()
|
|
|
|
assert.Error(t, report.FatalError())
|
2023-07-27 15:49:56 +00:00
|
|
|
}
|
2023-08-22 14:55:56 +00:00
|
|
|
|
|
|
|
type DummyAPI struct {
|
|
|
|
registerSource func(context.Context, string, sourcespb.SourceType) (int64, error)
|
|
|
|
getJobID func(context.Context, int64) (int64, error)
|
|
|
|
}
|
|
|
|
|
|
|
|
func (api DummyAPI) RegisterSource(ctx context.Context, name string, kind sourcespb.SourceType) (int64, error) {
|
|
|
|
return api.registerSource(ctx, name, kind)
|
|
|
|
}
|
|
|
|
|
|
|
|
func (api DummyAPI) GetJobID(ctx context.Context, id int64) (int64, error) {
|
|
|
|
return api.getJobID(ctx, id)
|
|
|
|
}
|
|
|
|
|
|
|
|
func TestSourceManagerJobAndSourceIDs(t *testing.T) {
|
|
|
|
mgr := NewManager(WithAPI(DummyAPI{
|
|
|
|
registerSource: func(context.Context, string, sourcespb.SourceType) (int64, error) {
|
|
|
|
return 1337, nil
|
|
|
|
},
|
|
|
|
getJobID: func(context.Context, int64) (int64, error) {
|
|
|
|
return 9001, nil
|
|
|
|
},
|
|
|
|
}))
|
|
|
|
var (
|
|
|
|
initializedJobID int64
|
|
|
|
initializedSourceID int64
|
|
|
|
)
|
|
|
|
handle, err := mgr.Enroll(context.Background(), "dummy", 1337,
|
|
|
|
func(ctx context.Context, jobID, sourceID int64) (Source, error) {
|
|
|
|
initializedJobID = jobID
|
|
|
|
initializedSourceID = sourceID
|
|
|
|
return nil, fmt.Errorf("ignore")
|
|
|
|
})
|
|
|
|
assert.NoError(t, err)
|
|
|
|
|
2023-08-25 14:48:25 +00:00
|
|
|
ref, _ := mgr.Run(context.Background(), handle)
|
2023-08-22 14:55:56 +00:00
|
|
|
assert.Equal(t, int64(1337), initializedSourceID)
|
2023-08-25 14:48:25 +00:00
|
|
|
assert.Equal(t, int64(1337), ref.SourceID)
|
2023-08-22 14:55:56 +00:00
|
|
|
assert.Equal(t, int64(9001), initializedJobID)
|
2023-08-25 14:48:25 +00:00
|
|
|
assert.Equal(t, int64(9001), ref.JobID)
|
|
|
|
assert.Equal(t, "dummy", ref.SourceName)
|
2023-08-22 14:55:56 +00:00
|
|
|
}
|
2023-08-25 17:43:33 +00:00
|
|
|
|
|
|
|
// Chunk method that has a custom callback for the Chunks method.
|
|
|
|
type callbackChunker struct {
|
|
|
|
cb func(context.Context, chan *Chunk) error
|
|
|
|
}
|
|
|
|
|
2023-09-07 16:03:37 +00:00
|
|
|
func (c callbackChunker) Chunks(ctx context.Context, ch chan *Chunk, _ ...ChunkingTarget) error {
|
|
|
|
return c.cb(ctx, ch)
|
|
|
|
}
|
2023-08-25 17:43:33 +00:00
|
|
|
func (c callbackChunker) Enumerate(context.Context, UnitReporter) error { return nil }
|
|
|
|
func (c callbackChunker) ChunkUnit(context.Context, SourceUnit, ChunkReporter) error { return nil }
|
|
|
|
|
|
|
|
func TestSourceManagerCancelRun(t *testing.T) {
|
|
|
|
mgr := NewManager(WithBufferedOutput(8))
|
|
|
|
var returnedErr error
|
|
|
|
handle, err := enrollDummy(mgr, callbackChunker{func(ctx context.Context, _ chan *Chunk) error {
|
|
|
|
// The context passed to Chunks should get cancelled when ref.CancelRun() is called.
|
|
|
|
<-ctx.Done()
|
|
|
|
returnedErr = fmt.Errorf("oh no: %w", ctx.Err())
|
|
|
|
return returnedErr
|
|
|
|
}})
|
|
|
|
assert.NoError(t, err)
|
|
|
|
|
|
|
|
ref, err := mgr.ScheduleRun(context.Background(), handle)
|
|
|
|
assert.NoError(t, err)
|
|
|
|
|
2023-08-30 19:00:44 +00:00
|
|
|
cancelErr := fmt.Errorf("abort! abort!")
|
|
|
|
ref.CancelRun(cancelErr)
|
2023-08-25 17:43:33 +00:00
|
|
|
<-ref.Done()
|
|
|
|
assert.Error(t, ref.Snapshot().FatalError())
|
|
|
|
assert.True(t, errors.Is(ref.Snapshot().FatalError(), returnedErr))
|
2023-08-30 19:00:44 +00:00
|
|
|
assert.True(t, errors.Is(ref.Snapshot().FatalErrors(), cancelErr))
|
2023-08-25 17:43:33 +00:00
|
|
|
}
|
2023-08-29 19:36:44 +00:00
|
|
|
|
|
|
|
func TestSourceManagerAvailableCapacity(t *testing.T) {
|
|
|
|
mgr := NewManager(WithConcurrentSources(1337))
|
|
|
|
start, end := make(chan struct{}), make(chan struct{})
|
|
|
|
handle, err := enrollDummy(mgr, callbackChunker{func(context.Context, chan *Chunk) error {
|
|
|
|
start <- struct{}{} // Send start signal.
|
|
|
|
<-end // Wait for end signal.
|
|
|
|
return nil
|
|
|
|
}})
|
|
|
|
assert.NoError(t, err)
|
|
|
|
|
|
|
|
assert.Equal(t, 1337, mgr.AvailableCapacity())
|
|
|
|
ref, err := mgr.ScheduleRun(context.Background(), handle)
|
|
|
|
assert.NoError(t, err)
|
|
|
|
|
|
|
|
<-start // Wait for start signal.
|
|
|
|
assert.Equal(t, 1336, mgr.AvailableCapacity())
|
|
|
|
end <- struct{}{} // Send end signal.
|
|
|
|
<-ref.Done() // Wait for the job to finish.
|
|
|
|
assert.Equal(t, 1337, mgr.AvailableCapacity())
|
|
|
|
}
|