Merge branch 'main' into docker-tar-identification-from-filesystem-2

2024-11-14 00:47:21 +00:00 · 2023-08-30 15:03:48 -04:00 · 2023-08-30 15:03:48 -04:00 · f623c0686f
commit f623c0686f
parent b8468af367 522b2fab29
26 changed files with 502 additions and 129 deletions
--- a/README.md
+++ b/README.md
@ -32,7 +32,7 @@ Have questions? Feedback? Jump in slack or discord and hang out with us

 Join our [Slack Community](https://join.slack.com/t/trufflehog-community/shared_invite/zt-pw2qbi43-Aa86hkiimstfdKH9UCpPzQ)

-Join the [Secret Scanning Discord](https://discord.gg/sydS6AHTUP)
+Join the [Secret Scanning Discord](https://discord.gg/8Hzbrnkr7E)

 # :tv: Demo

--- a/pkg/context/context.go
+++ b/pkg/context/context.go
@ -26,9 +26,10 @@ type Context interface {
 	Logger() logr.Logger
 }

-// CancelFunc is a type alias to context.CancelFunc to allow use as if they are
-// the same types.
+// CancelFunc and CancelCauseFunc are type aliases to allow use as if they are
+// the same types as the standard library variants.
 type CancelFunc = context.CancelFunc
+type CancelCauseFunc = context.CancelCauseFunc

 // logCtx implements Context.
 type logCtx struct {
@ -68,6 +69,16 @@ func WithCancel(parent Context) (Context, context.CancelFunc) {
 	return lCtx, cancel
 }

+// WithCancelCause returns context.WithCancelCause with the log object propagated.
+func WithCancelCause(parent Context) (Context, context.CancelCauseFunc) {
+	ctx, cancel := context.WithCancelCause(parent)
+	lCtx := logCtx{
+		log:     parent.Logger(),
+		Context: ctx,
+	}
+	return lCtx, cancel
+}
+
 // WithDeadline returns context.WithDeadline with the log object propagated and
 // the deadline added to the structured log values.
 func WithDeadline(parent Context, d time.Time) (Context, context.CancelFunc) {
@ -79,6 +90,17 @@ func WithDeadline(parent Context, d time.Time) (Context, context.CancelFunc) {
 	return lCtx, cancel
 }

+// WithDeadlineCause returns context.WithDeadlineCause with the log object
+// propagated and the deadline added to the structured log values.
+func WithDeadlineCause(parent Context, d time.Time, cause error) (Context, context.CancelFunc) {
+	ctx, cancel := context.WithDeadlineCause(parent, d, cause)
+	lCtx := logCtx{
+		log:     parent.Logger().WithValues("deadline", d),
+		Context: ctx,
+	}
+	return lCtx, cancel
+}
+
 // WithTimeout returns context.WithTimeout with the log object propagated and
 // the timeout added to the structured log values.
 func WithTimeout(parent Context, timeout time.Duration) (Context, context.CancelFunc) {
@ -90,6 +112,22 @@ func WithTimeout(parent Context, timeout time.Duration) (Context, context.Cancel
 	return lCtx, cancel
 }

+// WithTimeoutCause returns context.WithTimeoutCause with the log object
+// propagated and the timeout added to the structured log values.
+func WithTimeoutCause(parent Context, timeout time.Duration, cause error) (Context, context.CancelFunc) {
+	ctx, cancel := context.WithTimeoutCause(parent, timeout, cause)
+	lCtx := logCtx{
+		log:     parent.Logger().WithValues("timeout", timeout),
+		Context: ctx,
+	}
+	return lCtx, cancel
+}
+
+// Cause returns the context.Cause of the context.
+func Cause(ctx context.Context) error {
+	return context.Cause(ctx)
+}
+
 // WithValue returns context.WithValue with the log object propagated and
 // the value added to the structured log values (if the key is a string).
 func WithValue(parent Context, key, val any) Context {
--- a/pkg/context/context_test.go
+++ b/pkg/context/context_test.go
@ -3,6 +3,7 @@ package context
 import (
 	"bytes"
 	"context"
+	"fmt"
 	"strings"
 	"testing"
 	"time"
@ -177,3 +178,10 @@ func TestRace(t *testing.T) {
 	cancel()
 	_ = ctx.Err()
 }
+
+func TestCause(t *testing.T) {
+	ctx, cancel := WithCancelCause(Background())
+	err := fmt.Errorf("oh no")
+	cancel(err)
+	assert.Equal(t, err, Cause(ctx))
+}
--- a/pkg/detectors/salesforce/salesforce.go
+++ b/pkg/detectors/salesforce/salesforce.go
@ -43,8 +43,6 @@ func (s Scanner) FromData(ctx context.Context, verify bool, data []byte) (result
 	instanceMatches := instancePat.FindAllStringSubmatch(dataStr, -1)
 	tokenMatches := accessTokenPat.FindAllStringSubmatch(dataStr, -1)

-	fmt.Printf("instanceMatches: %v\n", instanceMatches)
-
 	for _, instance := range instanceMatches {
 		if len(instance) != 1 {
 			continue
--- a/pkg/engine/defaults.go
+++ b/pkg/engine/defaults.go
@ -2,6 +2,10 @@ package engine

 import (
 	"github.com/trufflesecurity/trufflehog/v3/pkg/detectors/envoyapikey"
+	"github.com/trufflesecurity/trufflehog/v3/pkg/detectors/huggingface"
+	"github.com/trufflesecurity/trufflehog/v3/pkg/detectors/salesforce"
+	"github.com/trufflesecurity/trufflehog/v3/pkg/detectors/snowflake"
+	"github.com/trufflesecurity/trufflehog/v3/pkg/detectors/trufflehogenterprise"

 	"github.com/trufflesecurity/trufflehog/v3/pkg/detectors"
 	"github.com/trufflesecurity/trufflehog/v3/pkg/detectors/abbysale"
@ -287,6 +291,7 @@ import (
 	"github.com/trufflesecurity/trufflehog/v3/pkg/detectors/getresponse"
 	"github.com/trufflesecurity/trufflehog/v3/pkg/detectors/getsandbox"
 	"github.com/trufflesecurity/trufflehog/v3/pkg/detectors/github"
+	"github.com/trufflesecurity/trufflehog/v3/pkg/detectors/github_oauth2"
 	"github.com/trufflesecurity/trufflehog/v3/pkg/detectors/github_old"
 	"github.com/trufflesecurity/trufflehog/v3/pkg/detectors/githubapp"
 	"github.com/trufflesecurity/trufflehog/v3/pkg/detectors/gitlab"
@ -556,7 +561,6 @@ import (
 	"github.com/trufflesecurity/trufflehog/v3/pkg/detectors/satismeterwritekey"
 	"github.com/trufflesecurity/trufflehog/v3/pkg/detectors/saucelabs"
 	"github.com/trufflesecurity/trufflehog/v3/pkg/detectors/scalewaykey"
-	"github.com/trufflesecurity/trufflehog/v3/pkg/detectors/github_oauth2"
 	"github.com/trufflesecurity/trufflehog/v3/pkg/detectors/scalr"
 	"github.com/trufflesecurity/trufflehog/v3/pkg/detectors/scrapeowl"
 	"github.com/trufflesecurity/trufflehog/v3/pkg/detectors/scraperapi"
@ -1532,6 +1536,10 @@ func DefaultDetectors() []detectors.Detector {
 		couchbase.Scanner{},
 		envoyapikey.Scanner{},
 		github_oauth2.Scanner{},
+		snowflake.Scanner{},
+		huggingface.Scanner{},
+		trufflehogenterprise.Scanner{},
+		salesforce.Scanner{},
 	}

 }
--- a/pkg/giturl/giturl.go
+++ b/pkg/giturl/giturl.go
@ -2,6 +2,7 @@ package giturl

 import (
 	"net/url"
+	"strconv"
 	"strings"

 	"github.com/pkg/errors"
@ -13,8 +14,29 @@ const (
 	providerGithub    provider = "Github"
 	providerGitlab    provider = "Gitlab"
 	providerBitbucket provider = "Bitbucket"
+	providerAzure     provider = "Azure"
+
+	urlGithub    = "github.com/"
+	urlGitlab    = "gitlab.com/"
+	urlBitbucket = "bitbucket.org/"
+	urlAzure     = "dev.azure.com/"
 )

+func determineProvider(repo string) provider {
+	switch {
+	case strings.Contains(repo, urlGithub):
+		return providerGithub
+	case strings.Contains(repo, urlGitlab):
+		return providerGitlab
+	case strings.Contains(repo, urlBitbucket):
+		return providerBitbucket
+	case strings.Contains(repo, urlAzure):
+		return providerAzure
+	default:
+		return ""
+	}
+}
+
 func NormalizeBitbucketRepo(repoURL string) (string, error) {
 	if !strings.HasPrefix(repoURL, "https") {
 		return "", errors.New("Bitbucket requires https repo urls: e.g. https://bitbucket.org/org/repo.git")
@ -88,3 +110,35 @@ func NormalizeOrgRepoURL(provider provider, repoURL string) (string, error) {
 	parsed.Path += ".git"
 	return parsed.String(), nil
 }
+
+// GenerateLink crafts a link to the specific file from a commit.
+// Supports GitHub, GitLab, Bitbucket, and Azure Repos.
+// If the provider supports hyperlinks to specific lines, the line number will be included.
+func GenerateLink(repo, commit, file string, line int64) string {
+	switch determineProvider(repo) {
+	case providerBitbucket:
+		return repo[:len(repo)-4] + "/commits/" + commit
+
+	case providerGithub, providerGitlab:
+		var baseLink string
+		if file == "" {
+			baseLink = repo[:len(repo)-4] + "/commit/" + commit
+		} else {
+			baseLink = repo[:len(repo)-4] + "/blob/" + commit + "/" + file
+			if line > 0 {
+				baseLink += "#L" + strconv.FormatInt(line, 10)
+			}
+		}
+		return baseLink
+
+	case providerAzure:
+		baseLink := repo + "?path=" + file + "&version=GB" + commit
+		if line > 0 {
+			baseLink += "&line=" + strconv.FormatInt(line, 10)
+		}
+		return baseLink
+
+	default:
+		return ""
+	}
+}
--- a/pkg/giturl/giturl_test.go
+++ b/pkg/giturl/giturl_test.go
@ -7,6 +7,8 @@ import (
 )

 func Test_NormalizeOrgRepoURL(t *testing.T) {
+	t.Parallel()
+
 	tests := map[string]struct {
 		Provider provider
 		Repo     string
@ -43,6 +45,8 @@ func Test_NormalizeOrgRepoURL(t *testing.T) {
 }

 func Test_NormalizeBitbucketRepo(t *testing.T) {
+	t.Parallel()
+
 	tests := map[string]struct {
 		Repo string
 		Out  string
@ -69,6 +73,8 @@ func Test_NormalizeBitbucketRepo(t *testing.T) {
 }

 func Test_NormalizeGitlabRepo(t *testing.T) {
+	t.Parallel()
+
 	tests := map[string]struct {
 		Repo string
 		Out  string
@ -93,3 +99,73 @@ func Test_NormalizeGitlabRepo(t *testing.T) {
 		}
 	}
 }
+
+func TestGenerateLink(t *testing.T) {
+	t.Parallel()
+
+	type args struct {
+		repo   string
+		commit string
+		file   string
+		line   int64
+	}
+	tests := []struct {
+		name string
+		args args
+		want string
+	}{
+		{
+			name: "github link gen",
+			args: args{
+				repo:   "https://github.com/trufflesec-julian/confluence-go-api.git",
+				commit: "047b4a2ba42fc5b6c0bd535c5307434a666db5ec",
+				file:   ".gitignore",
+			},
+			want: "https://github.com/trufflesec-julian/confluence-go-api/blob/047b4a2ba42fc5b6c0bd535c5307434a666db5ec/.gitignore",
+		},
+		{
+			name: "github link gen with line",
+			args: args{
+				repo:   "https://github.com/trufflesec-julian/confluence-go-api.git",
+				commit: "047b4a2ba42fc5b6c0bd535c5307434a666db5ec",
+				file:   ".gitignore",
+				line:   int64(4),
+			},
+			want: "https://github.com/trufflesec-julian/confluence-go-api/blob/047b4a2ba42fc5b6c0bd535c5307434a666db5ec/.gitignore#L4",
+		},
+		{
+			name: "github link gen - no file",
+			args: args{
+				repo:   "https://github.com/trufflesec-julian/confluence-go-api.git",
+				commit: "047b4a2ba42fc5b6c0bd535c5307434a666db5ec",
+			},
+			want: "https://github.com/trufflesec-julian/confluence-go-api/commit/047b4a2ba42fc5b6c0bd535c5307434a666db5ec",
+		},
+		{
+			name: "Azure link gen",
+			args: args{
+				repo:   "https://dev.azure.com/org/project/_git/repo",
+				commit: "abcdef",
+				file:   "main.go",
+			},
+			want: "https://dev.azure.com/org/project/_git/repo?path=main.go&version=GBabcdef",
+		},
+		{
+			name: "Azure link gen with line",
+			args: args{
+				repo:   "https://dev.azure.com/org/project/_git/repo",
+				commit: "abcdef",
+				file:   "main.go",
+				line:   int64(20),
+			},
+			want: "https://dev.azure.com/org/project/_git/repo?path=main.go&version=GBabcdef&line=20",
+		},
+	}
+	for _, tt := range tests {
+		t.Run(tt.name, func(t *testing.T) {
+			if got := GenerateLink(tt.args.repo, tt.args.commit, tt.args.file, tt.args.line); got != tt.want {
+				t.Errorf("generateLink() = %v, want %v", got, tt.want)
+			}
+		})
+	}
+}
--- a/pkg/handlers/archive.go
+++ b/pkg/handlers/archive.go
@ -31,6 +31,8 @@ var (
 	maxDepth   = 5
 	maxSize    = 250 * 1024 * 1024 // 20MB
 	maxTimeout = time.Duration(30) * time.Second
+
+	defaultBufferSize = 512
 )

 // Ensure the Archive satisfies the interfaces at compile time.
@ -85,7 +87,7 @@ func SetArchiveMaxTimeout(timeout time.Duration) {

 // FromFile extracts the files from an archive.
 func (a *Archive) FromFile(originalCtx context.Context, data io.Reader) chan []byte {
-	archiveChan := make(chan []byte, 512)
+	archiveChan := make(chan []byte, defaultBufferSize)
 	go func() {
 		ctx, cancel := context.WithTimeout(originalCtx, maxTimeout)
 		logger := logContext.AddLogger(ctx).Logger()
@ -206,29 +208,28 @@ func (a *Archive) ReadToMax(ctx context.Context, reader io.Reader) (data []byte,
 			logger.Error(err, "Panic occurred when reading archive")
 		}
 	}()
-	fileContent := bytes.Buffer{}
-	logger.V(5).Info("Remaining buffer capacity", "bytes", maxSize-a.size)
-	for i := 0; i <= maxSize/512; i++ {
-		if common.IsDone(ctx) {
-			return nil, ctx.Err()
-		}
-		fileChunk := make([]byte, 512)
-		bRead, err := reader.Read(fileChunk)
-		if err != nil && !errors.Is(err, io.EOF) {
-			return []byte{}, err
-		}
-		a.size += bRead
-		if len(fileChunk) > 0 {
-			fileContent.Write(fileChunk[0:bRead])
-		}
-		if bRead < 512 {
-			return fileContent.Bytes(), nil
-		}
-		if a.size >= maxSize && bRead == 512 {
-			logger.V(2).Info("Max archive size reached.")
-			return fileContent.Bytes(), nil
-		}
+
+	if common.IsDone(ctx) {
+		return nil, ctx.Err()
 	}
+
+	var fileContent bytes.Buffer
+	// Create a limited reader to ensure we don't read more than the max size.
+	lr := io.LimitReader(reader, int64(maxSize))
+
+	// Using io.CopyBuffer for performance advantages. Though buf is mandatory
+	// for the method, due to the internal implementation of io.CopyBuffer, when
+	// *bytes.Buffer implements io.WriterTo or io.ReaderFrom, the provided buf
+	// is simply ignored. Thus, we can pass nil for the buf parameter.
+	_, err = io.CopyBuffer(&fileContent, lr, nil)
+	if err != nil && !errors.Is(err, io.EOF) {
+		return nil, err
+	}
+
+	if fileContent.Len() == maxSize {
+		logger.V(2).Info("Max archive size reached.")
+	}
+
 	return fileContent.Bytes(), nil
 }

--- a/pkg/handlers/archive_test.go
+++ b/pkg/handlers/archive_test.go
@ -1,6 +1,7 @@
 package handlers

 import (
+	"bytes"
 	"context"
 	"io"
 	"net/http"
@ -127,6 +128,58 @@ func TestHandleFile(t *testing.T) {
 	assert.Equal(t, 1, len(ch))
 }

+func TestReadToMax(t *testing.T) {
+	tests := []struct {
+		name     string
+		input    []byte
+		expected []byte
+	}{
+		{
+			name:     "read full content within maxSize",
+			input:    []byte("abcdefg"),
+			expected: []byte("abcdefg"),
+		},
+		{
+			name:     "read content larger than maxSize",
+			input:    make([]byte, maxSize+10), // this creates a byte slice 10 bytes larger than maxSize
+			expected: make([]byte, maxSize),
+		},
+		{
+			name:     "empty input",
+			input:    []byte(""),
+			expected: []byte(""),
+		},
+	}
+
+	a := &Archive{}
+
+	for _, tt := range tests {
+		t.Run(tt.name, func(t *testing.T) {
+			reader := bytes.NewReader(tt.input)
+			output, err := a.ReadToMax(context.Background(), reader)
+			assert.Nil(t, err)
+
+			assert.Equal(t, tt.expected, output)
+		})
+	}
+}
+
+func BenchmarkReadToMax(b *testing.B) {
+	data := bytes.Repeat([]byte("a"), 1024*1000) // 1MB of data.
+	reader := bytes.NewReader(data)
+	a := &Archive{}
+
+	b.ResetTimer()
+	for i := 0; i < b.N; i++ {
+		b.StartTimer()
+		_, _ = a.ReadToMax(context.Background(), reader)
+		b.StopTimer()
+
+		_, _ = reader.Seek(0, 0) // Reset the reader position.
+		a.size = 0               // Reset archive size.
+	}
+}
+
 func TestExtractDebContent(t *testing.T) {
 	// Open the sample .deb file from the testdata folder.
 	file, err := os.Open("testdata/test.deb")
--- a/pkg/output/legacy_json.go
+++ b/pkg/output/legacy_json.go
@ -133,7 +133,7 @@ func BranchHeads(repo *gogit.Repository) (map[string]*object.Commit, error) {
 		}
 		headCommit, err := repo.CommitObject(*headHash)
 		if err != nil {
-			logger.Error(err, "unable to get commit", "commit", headCommit.String())
+			logger.Error(err, "unable to get commit", "head_hash", headHash.String())
 			return nil
 		}
 		branches[branchName] = headCommit
--- a/pkg/sources/chunker.go
+++ b/pkg/sources/chunker.go
@ -20,7 +20,7 @@ const (

 // Chunker takes a chunk and splits it into chunks of ChunkSize.
 func Chunker(originalChunk *Chunk) chan *Chunk {
-	chunkChan := make(chan *Chunk)
+	chunkChan := make(chan *Chunk, 1)
 	go func() {
 		defer close(chunkChan)
 		if len(originalChunk.Data) <= TotalChunkSize {
--- a/pkg/sources/circleci/circleci.go
+++ b/pkg/sources/circleci/circleci.go
@ -238,6 +238,7 @@ func (s *Source) chunkAction(ctx context.Context, proj project, bld build, act a
 			SourceType: s.Type(),
 			SourceName: s.name,
 			SourceID:   s.SourceID(),
+			JobID:      s.JobID(),
 			Data:       removeCircleSha1Line(data.Bytes()),
 			SourceMetadata: &source_metadatapb.MetaData{
 				Data: &source_metadatapb.MetaData_Circleci{
--- a/pkg/sources/docker/docker.go
+++ b/pkg/sources/docker/docker.go
@ -119,6 +119,7 @@ func (s *Source) Chunks(ctx context.Context, chunksChan chan *sources.Chunk) err
 			SourceType: s.Type(),
 			SourceName: s.name,
 			SourceID:   s.SourceID(),
+      JobID:      s.JobID(),
 			SourceMetadata: &source_metadatapb.MetaData{
 				Data: &source_metadatapb.MetaData_Docker{
 					Docker: &source_metadatapb.Docker{
--- a/pkg/sources/filesystem/filesystem.go
+++ b/pkg/sources/filesystem/filesystem.go
@ -161,6 +161,7 @@ func (s *Source) scanFile(ctx context.Context, path string, chunksChan chan *sou
 		SourceType: s.Type(),
 		SourceName: s.name,
 		SourceID:   s.SourceID(),
+		JobID:      s.JobID(),
 		SourceMetadata: &source_metadatapb.MetaData{
 			Data: &source_metadatapb.MetaData_Filesystem{
 				Filesystem: &source_metadatapb.Filesystem{
@ -191,6 +192,7 @@ func (s *Source) scanFile(ctx context.Context, path string, chunksChan chan *sou
 			SourceType: s.Type(),
 			SourceName: s.name,
 			SourceID:   s.SourceID(),
+			JobID:      s.JobID(),
 			Data:       data.Bytes(),
 			SourceMetadata: &source_metadatapb.MetaData{
 				Data: &source_metadatapb.MetaData_Filesystem{
--- a/pkg/sources/gcs/gcs.go
+++ b/pkg/sources/gcs/gcs.go
@ -324,6 +324,7 @@ func (s *Source) processObject(ctx context.Context, o object) error {
 	chunkSkel := &sources.Chunk{
 		SourceName: s.name,
 		SourceType: s.Type(),
+		JobID:      s.JobID(),
 		SourceID:   s.sourceId,
 		Verify:     s.verify,
 		SourceMetadata: &source_metadatapb.MetaData{
--- a/pkg/sources/git/git.go
+++ b/pkg/sources/git/git.go
@ -9,7 +9,6 @@ import (
 	"os/exec"
 	"path/filepath"
 	"runtime"
-	"strconv"
 	"strings"
 	"sync/atomic"
 	"time"
@ -447,6 +446,7 @@ func (s *Git) ScanCommits(ctx context.Context, repo *git.Repository, path string
 				chunkSkel := &sources.Chunk{
 					SourceName:     s.sourceName,
 					SourceID:       s.sourceID,
+					JobID:          s.jobID,
 					SourceType:     s.sourceType,
 					SourceMetadata: metadata,
 					Verify:         s.verify,
@ -465,6 +465,7 @@ func (s *Git) ScanCommits(ctx context.Context, repo *git.Repository, path string
 			chunksChan <- &sources.Chunk{
 				SourceName:     s.sourceName,
 				SourceID:       s.sourceID,
+				JobID:          s.jobID,
 				SourceType:     s.sourceType,
 				SourceMetadata: metadata,
 				Data:           diff.Content.Bytes(),
@ -491,6 +492,7 @@ func (s *Git) gitChunk(ctx context.Context, diff gitparse.Diff, fileName, email,
 				chunksChan <- &sources.Chunk{
 					SourceName:     s.sourceName,
 					SourceID:       s.sourceID,
+					JobID:          s.jobID,
 					SourceType:     s.sourceType,
 					SourceMetadata: metadata,
 					Data:           append([]byte{}, newChunkBuffer.Bytes()...),
@ -505,6 +507,7 @@ func (s *Git) gitChunk(ctx context.Context, diff gitparse.Diff, fileName, email,
 				chunksChan <- &sources.Chunk{
 					SourceName:     s.sourceName,
 					SourceID:       s.sourceID,
+					JobID:          s.jobID,
 					SourceType:     s.sourceType,
 					SourceMetadata: metadata,
 					Data:           line,
@ -524,6 +527,7 @@ func (s *Git) gitChunk(ctx context.Context, diff gitparse.Diff, fileName, email,
 		chunksChan <- &sources.Chunk{
 			SourceName:     s.sourceName,
 			SourceID:       s.sourceID,
+			JobID:          s.jobID,
 			SourceType:     s.sourceType,
 			SourceMetadata: metadata,
 			Data:           append([]byte{}, newChunkBuffer.Bytes()...),
@ -589,6 +593,7 @@ func (s *Git) ScanStaged(ctx context.Context, repo *git.Repository, path string,
 				chunkSkel := &sources.Chunk{
 					SourceName:     s.sourceName,
 					SourceID:       s.sourceID,
+					JobID:          s.jobID,
 					SourceType:     s.sourceType,
 					SourceMetadata: metadata,
 					Verify:         s.verify,
@ -603,6 +608,7 @@ func (s *Git) ScanStaged(ctx context.Context, repo *git.Repository, path string,
 			chunksChan <- &sources.Chunk{
 				SourceName:     s.sourceName,
 				SourceID:       s.sourceID,
+				JobID:          s.jobID,
 				SourceType:     s.sourceType,
 				SourceMetadata: metadata,
 				Data:           diff.Content.Bytes(),
@ -693,28 +699,6 @@ func normalizeConfig(scanOptions *ScanOptions, repo *git.Repository) (err error)
 	return nil
 }

-// GenerateLink crafts a link to the specific file from a commit. This works in most major git providers (Github/Gitlab)
-func GenerateLink(repo, commit, file string, line int64) string {
-	// bitbucket links are commits not commit...
-	if strings.Contains(repo, "bitbucket.org/") {
-		return repo[:len(repo)-4] + "/commits/" + commit
-	}
-	var link string
-	if file == "" {
-		link = repo[:len(repo)-4] + "/commit/" + commit
-	} else {
-		link = repo[:len(repo)-4] + "/blob/" + commit + "/" + file
-
-		// Both GitHub and Gitlab support hyperlinking to a specific line with #L<number>, e.g.:
-		// https://github.com/trufflesecurity/trufflehog/blob/e856a6890d0da5a218f4f9283500b80043884641/go.mod#L169
-		// https://gitlab.com/pdftk-java/pdftk/-/blob/88559a08f34175b6fae76c40a88f0377f64a12d7/java/com/gitlab/pdftk_java/report.java#L893
-		if line > 0 && (strings.Contains(repo, "github") || strings.Contains(repo, "gitlab")) {
-			link += "#L" + strconv.FormatInt(line, 10)
-		}
-	}
-	return link
-}
-
 func stripPassword(u string) (string, error) {
 	if strings.HasPrefix(u, "git@") {
 		return u, nil
--- a/pkg/sources/git/git_test.go
+++ b/pkg/sources/git/git_test.go
@ -151,55 +151,6 @@ func TestSource_Scan(t *testing.T) {
 	}
 }

-func Test_generateLink(t *testing.T) {
-	type args struct {
-		repo   string
-		commit string
-		file   string
-		line   int64
-	}
-	tests := []struct {
-		name string
-		args args
-		want string
-	}{
-		{
-			name: "test link gen",
-			args: args{
-				repo:   "https://github.com/trufflesec-julian/confluence-go-api.git",
-				commit: "047b4a2ba42fc5b6c0bd535c5307434a666db5ec",
-				file:   ".gitignore",
-			},
-			want: "https://github.com/trufflesec-julian/confluence-go-api/blob/047b4a2ba42fc5b6c0bd535c5307434a666db5ec/.gitignore",
-		},
-		{
-			name: "test link gen",
-			args: args{
-				repo:   "https://github.com/trufflesec-julian/confluence-go-api.git",
-				commit: "047b4a2ba42fc5b6c0bd535c5307434a666db5ec",
-				file:   ".gitignore",
-				line:   int64(4),
-			},
-			want: "https://github.com/trufflesec-julian/confluence-go-api/blob/047b4a2ba42fc5b6c0bd535c5307434a666db5ec/.gitignore#L4",
-		},
-		{
-			name: "test link gen - no file",
-			args: args{
-				repo:   "https://github.com/trufflesec-julian/confluence-go-api.git",
-				commit: "047b4a2ba42fc5b6c0bd535c5307434a666db5ec",
-			},
-			want: "https://github.com/trufflesec-julian/confluence-go-api/commit/047b4a2ba42fc5b6c0bd535c5307434a666db5ec",
-		},
-	}
-	for _, tt := range tests {
-		t.Run(tt.name, func(t *testing.T) {
-			if got := GenerateLink(tt.args.repo, tt.args.commit, tt.args.file, tt.args.line); got != tt.want {
-				t.Errorf("generateLink() = %v, want %v", got, tt.want)
-			}
-		})
-	}
-}
-
 // We ran into an issue where upgrading a dependency caused the git patch chunking to break
 // So this test exists to make sure that when something changes, we know about it.
 func TestSource_Chunks_Integration(t *testing.T) {
--- a/pkg/sources/github/github.go
+++ b/pkg/sources/github/github.go
@ -29,6 +29,7 @@ import (
 	"github.com/trufflesecurity/trufflehog/v3/pkg/cache/memory"
 	"github.com/trufflesecurity/trufflehog/v3/pkg/common"
 	"github.com/trufflesecurity/trufflehog/v3/pkg/context"
+	"github.com/trufflesecurity/trufflehog/v3/pkg/giturl"
 	"github.com/trufflesecurity/trufflehog/v3/pkg/pb/credentialspb"
 	"github.com/trufflesecurity/trufflehog/v3/pkg/pb/source_metadatapb"
 	"github.com/trufflesecurity/trufflehog/v3/pkg/pb/sourcespb"
@ -148,14 +149,16 @@ func (s *Source) newFilteredRepoCache(c cache.Cache, include, exclude []string)
 	for _, ig := range include {
 		g, err := glob.Compile(ig)
 		if err != nil {
-			s.log.V(1).Info("invalid include glob", "glob", g, "err", err)
+			s.log.V(1).Info("invalid include glob", "include_value", ig, "err", err)
+			continue
 		}
 		includeGlobs = append(includeGlobs, g)
 	}
 	for _, eg := range exclude {
 		g, err := glob.Compile(eg)
 		if err != nil {
-			s.log.V(1).Info("invalid exclude glob", "glob", g, "err", err)
+			s.log.V(1).Info("invalid exclude glob", "exclude_value", eg, "err", err)
+			continue
 		}
 		excludeGlobs = append(excludeGlobs, g)
 	}
@ -261,7 +264,7 @@ func (s *Source) Init(aCtx context.Context, name string, jobID, sourceID int64,
 						File:       sanitizer.UTF8(file),
 						Email:      sanitizer.UTF8(email),
 						Repository: sanitizer.UTF8(repository),
-						Link:       git.GenerateLink(repository, commit, file, line),
+						Link:       giturl.GenerateLink(repository, commit, file, line),
 						Timestamp:  sanitizer.UTF8(timestamp),
 						Line:       line,
 						Visibility: s.visibilityOf(aCtx, repository),
@ -1222,6 +1225,7 @@ func (s *Source) chunkIssueComments(ctx context.Context, repo, repoPath string,
 		chunk := &sources.Chunk{
 			SourceName: s.name,
 			SourceID:   s.SourceID(),
+			JobID:      s.JobID(),
 			SourceType: s.Type(),
 			SourceMetadata: &source_metadatapb.MetaData{
 				Data: &source_metadatapb.MetaData_Github{
@ -1255,6 +1259,7 @@ func (s *Source) chunkPullRequestComments(ctx context.Context, repo string, comm
 			SourceName: s.name,
 			SourceID:   s.SourceID(),
 			SourceType: s.Type(),
+			JobID:      s.JobID(),
 			SourceMetadata: &source_metadatapb.MetaData{
 				Data: &source_metadatapb.MetaData_Github{
 					Github: &source_metadatapb.Github{
@ -1286,6 +1291,7 @@ func (s *Source) chunkGistComments(ctx context.Context, gistUrl string, comments
 			SourceName: s.name,
 			SourceID:   s.SourceID(),
 			SourceType: s.Type(),
+			JobID:      s.JobID(),
 			SourceMetadata: &source_metadatapb.MetaData{
 				Data: &source_metadatapb.MetaData_Github{
 					Github: &source_metadatapb.Github{
--- a/pkg/sources/gitlab/gitlab.go
+++ b/pkg/sources/gitlab/gitlab.go
@ -127,7 +127,7 @@ func (s *Source) Init(_ context.Context, name string, jobId, sourceId int64, ver
 						File:       sanitizer.UTF8(file),
 						Email:      sanitizer.UTF8(email),
 						Repository: sanitizer.UTF8(repository),
-						Link:       git.GenerateLink(repository, commit, file, line),
+						Link:       giturl.GenerateLink(repository, commit, file, line),
 						Timestamp:  sanitizer.UTF8(timestamp),
 						Line:       line,
 					},
--- a/pkg/sources/job_progress.go
+++ b/pkg/sources/job_progress.go
@ -40,9 +40,12 @@ type JobProgressHook interface {
 }

 // JobProgressRef is a wrapper of a JobProgress for read-only access to its state.
+// If the job supports it, the reference can also be used to cancel running via
+// CancelRun.
 type JobProgressRef struct {
-	SourceID    int64
 	JobID       int64
+	SourceID    int64
+	SourceName  string
 	jobProgress *JobProgress
 }

@ -65,6 +68,16 @@ func (r *JobProgressRef) Done() <-chan struct{} {
 	return r.jobProgress.Done()
 }

+// CancelRun requests that the job this is referencing is cancelled and stops
+// running. This method will have no effect if the job does not allow
+// cancellation.
+func (r *JobProgressRef) CancelRun(cause error) {
+	if r.jobProgress == nil || r.jobProgress.jobCancel == nil {
+		return
+	}
+	r.jobProgress.jobCancel(cause)
+}
+
 // Fatal is a wrapper around error to differentiate non-fatal errors from fatal
 // ones. A fatal error is typically from a finished context or any error
 // returned from a source's Init, Chunks, Enumerate, or ChunkUnit methods.
@ -88,14 +101,19 @@ func (f ChunkError) Unwrap() error { return f.err }
 // JobProgress aggregates information about a run of a Source.
 type JobProgress struct {
 	// Unique identifiers for this job.
-	SourceID int64
-	JobID    int64
+	JobID      int64
+	SourceID   int64
+	SourceName string
 	// Tracks whether the job is finished or not.
 	ctx    context.Context
 	cancel context.CancelFunc
+	// Requests to cancel the job.
+	jobCancel context.CancelCauseFunc
 	// Metrics.
 	metrics     JobProgressMetrics
 	metricsLock sync.Mutex
+	// Progress reported by the source.
+	progress *Progress
 	// Coarse grained hooks for adding extra functionality when events trigger.
 	hooks []JobProgressHook
 }
@ -110,9 +128,20 @@ type JobProgressMetrics struct {
 	FinishedUnits uint64
 	// Total number of chunks produced. This metric updates before the
 	// chunk is sent on the output channel.
-	TotalChunks     uint64
-	Errors          []error
+	TotalChunks uint64
+	// All errors encountered.
+	Errors []error
+	// Set to true if the source supports enumeration and has finished
+	// enumerating. If the source does not support enumeration, this field
+	// is always false.
 	DoneEnumerating bool
+
+	// Progress information reported by the source.
+	SourcePercent           int64
+	SourceMessage           string
+	SourceEncodedResumeInfo string
+	SourceSectionsCompleted int32
+	SourceSectionsRemaining int32
 }

 // WithHooks adds hooks to be called when an event triggers.
@ -120,14 +149,20 @@ func WithHooks(hooks ...JobProgressHook) func(*JobProgress) {
 	return func(jp *JobProgress) { jp.hooks = append(jp.hooks, hooks...) }
 }

+// WithCancel allows cancelling the job by the JobProgressRef.
+func WithCancel(cancel context.CancelCauseFunc) func(*JobProgress) {
+	return func(jp *JobProgress) { jp.jobCancel = cancel }
+}
+
 // NewJobProgress creates a new job report for the given source and job ID.
-func NewJobProgress(sourceID, jobID int64, opts ...func(*JobProgress)) *JobProgress {
+func NewJobProgress(jobID, sourceID int64, sourceName string, opts ...func(*JobProgress)) *JobProgress {
 	ctx, cancel := context.WithCancel(context.Background())
 	jp := &JobProgress{
-		SourceID: sourceID,
-		JobID:    jobID,
-		ctx:      ctx,
-		cancel:   cancel,
+		JobID:      jobID,
+		SourceID:   sourceID,
+		SourceName: sourceName,
+		ctx:        ctx,
+		cancel:     cancel,
 	}
 	for _, opt := range opts {
 		opt(jp)
@ -135,6 +170,12 @@ func NewJobProgress(sourceID, jobID int64, opts ...func(*JobProgress)) *JobProgr
 	return jp
 }

+// TrackProgress informs the JobProgress of a Progress object and safely
+// exposes its information in the Snapshots.
+func (jp *JobProgress) TrackProgress(progress *Progress) {
+	jp.progress = progress
+}
+
 // executeHooks is a helper method to execute all the hooks for the given
 // closure.
 func (jp *JobProgress) executeHooks(todo func(hook JobProgressHook)) {
@ -210,6 +251,16 @@ func (jp *JobProgress) Snapshot() JobProgressMetrics {
 	metrics.Errors = make([]error, len(metrics.Errors))
 	copy(metrics.Errors, jp.metrics.Errors)

+	if jp.progress != nil {
+		jp.progress.mut.Lock()
+		defer jp.progress.mut.Unlock()
+		metrics.SourcePercent = jp.progress.PercentComplete
+		metrics.SourceMessage = jp.progress.Message
+		metrics.SourceEncodedResumeInfo = jp.progress.EncodedResumeInfo
+		metrics.SourceSectionsCompleted = jp.progress.SectionsCompleted
+		metrics.SourceSectionsRemaining = jp.progress.SectionsRemaining
+	}
+
 	return metrics
 }

@ -231,6 +282,7 @@ func (jp *JobProgress) Ref() JobProgressRef {
 	return JobProgressRef{
 		SourceID:    jp.SourceID,
 		JobID:       jp.JobID,
+		SourceName:  jp.SourceName,
 		jobProgress: jp,
 	}
 }
@ -280,7 +332,22 @@ func (m JobProgressMetrics) PercentComplete() int {
 	num := m.FinishedUnits
 	den := m.TotalUnits
 	if num == 0 && den == 0 {
-		return 0
+		// Fallback to the source's self-reported percent complete if
+		// the unit information isn't available.
+		return int(m.SourcePercent)
 	}
 	return int(num * 100 / den)
 }
+
+// ElapsedTime is a convenience method that provides the elapsed time the job
+// has been running. If it hasn't started yet, 0 is returned. If it has
+// finished, the total time is returned.
+func (m JobProgressMetrics) ElapsedTime() time.Duration {
+	if m.StartTime.IsZero() {
+		return 0
+	}
+	if m.EndTime.IsZero() {
+		return time.Since(m.StartTime)
+	}
+	return m.EndTime.Sub(m.StartTime)
+}
--- a/pkg/sources/job_progress_test.go
+++ b/pkg/sources/job_progress_test.go
@ -36,10 +36,10 @@ func TestJobProgressFatalErrors(t *testing.T) {
 }

 func TestJobProgressRef(t *testing.T) {
-	jp := NewJobProgress(123, 456)
+	jp := NewJobProgress(123, 456, "source name")
 	ref := jp.Ref()
-	assert.Equal(t, int64(123), ref.SourceID)
-	assert.Equal(t, int64(456), ref.JobID)
+	assert.Equal(t, int64(123), ref.JobID)
+	assert.Equal(t, int64(456), ref.SourceID)

 	// Test Done() blocks until Finish() is called.
 	select {
@ -61,7 +61,7 @@ func TestJobProgressHook(t *testing.T) {
 	defer ctrl.Finish()

 	hook := NewMockJobProgressHook(ctrl)
-	jp := NewJobProgress(123, 456, WithHooks(hook))
+	jp := NewJobProgress(123, 456, "source name", WithHooks(hook))

 	// Start(JobProgressRef, time.Time)
 	// End(JobProgressRef, time.Time)
@ -115,3 +115,14 @@ func TestJobProgressDone(t *testing.T) {
 		assert.FailNow(t, "done should not block for a nil job")
 	}
 }
+
+func TestJobProgressElapsedTime(t *testing.T) {
+	metrics := JobProgressMetrics{}
+	assert.Equal(t, time.Duration(0), metrics.ElapsedTime())
+
+	metrics.StartTime = time.Now()
+	assert.Greater(t, metrics.ElapsedTime(), time.Duration(0))
+
+	metrics.EndTime = metrics.StartTime.Add(1 * time.Hour)
+	assert.Equal(t, metrics.ElapsedTime(), 1*time.Hour)
+}
--- a/pkg/sources/s3/s3.go
+++ b/pkg/sources/s3/s3.go
@ -376,6 +376,7 @@ func (s *Source) pageChunker(ctx context.Context, client *s3.S3, chunksChan chan
 				SourceType: s.Type(),
 				SourceName: s.name,
 				SourceID:   s.SourceID(),
+				JobID:      s.JobID(),
 				SourceMetadata: &source_metadatapb.MetaData{
 					Data: &source_metadatapb.MetaData_S3{
 						S3: &source_metadatapb.S3{
--- a/pkg/sources/s3/s3_test.go
+++ b/pkg/sources/s3/s3_test.go
@ -10,13 +10,12 @@ import (

 	"github.com/kylelemons/godebug/pretty"
 	"github.com/stretchr/testify/assert"
-	"google.golang.org/protobuf/types/known/anypb"
-
 	"github.com/trufflesecurity/trufflehog/v3/pkg/common"
 	"github.com/trufflesecurity/trufflehog/v3/pkg/context"
 	"github.com/trufflesecurity/trufflehog/v3/pkg/pb/credentialspb"
 	"github.com/trufflesecurity/trufflehog/v3/pkg/pb/sourcespb"
 	"github.com/trufflesecurity/trufflehog/v3/pkg/sources"
+	"google.golang.org/protobuf/types/known/anypb"
 )

 func TestSource_Chunks(t *testing.T) {
@ -35,6 +34,7 @@ func TestSource_Chunks(t *testing.T) {
 		name       string
 		verify     bool
 		connection *sourcespb.S3
+		setEnv     map[string]string
 	}
 	tests := []struct {
 		name          string
@ -58,6 +58,23 @@ func TestSource_Chunks(t *testing.T) {
 			wantErr:       false,
 			wantChunkData: `W2RlZmF1bHRdCmF3c19hY2Nlc3Nfa2V5X2lkID0gQUtJQTM1T0hYMkRTT1pHNjQ3TkgKYXdzX3NlY3JldF9hY2Nlc3Nfa2V5ID0gUXk5OVMrWkIvQ1dsRk50eFBBaWQ3Z0d6dnNyWGhCQjd1ckFDQUxwWgpvdXRwdXQgPSBqc29uCnJlZ2lvbiA9IHVzLWVhc3QtMg==`,
 		},
+		{
+			name: "gets chunks after assuming role",
+			// This test will attempt to scan every bucket in the account, but the role policy blocks access to every
+			// bucket except the one we want. This (expected behavior) causes errors in the test log output, but these
+			// errors shouldn't actually cause test failures.
+			init: init{
+				connection: &sourcespb.S3{
+					Roles: []string{"arn:aws:iam::619888638459:role/s3-test-assume-role"},
+				},
+				setEnv: map[string]string{
+					"AWS_ACCESS_KEY_ID":     s3key,
+					"AWS_SECRET_ACCESS_KEY": s3secret,
+				},
+			},
+			wantErr:       false,
+			wantChunkData: `W2RlZmF1bHRdCmF3c19zZWNyZXRfYWNjZXNzX2tleSA9IFF5OTlTK1pCL0NXbEZOdHhQQWlkN2dHenZzclhoQkI3dXJBQ0FMcFoKYXdzX2FjY2Vzc19rZXlfaWQgPSBBS0lBMzVPSFgyRFNPWkc2NDdOSApvdXRwdXQgPSBqc29uCnJlZ2lvbiA9IHVzLWVhc3QtMg==`,
+		},
 	}
 	for _, tt := range tests {
 		t.Run(tt.name, func(t *testing.T) {
@ -65,6 +82,10 @@ func TestSource_Chunks(t *testing.T) {
 			var cancelOnce sync.Once
 			defer cancelOnce.Do(cancel)

+			for k, v := range tt.init.setEnv {
+				t.Setenv(k, v)
+			}
+
 			s := Source{}
 			conn, err := anypb.New(tt.init.connection)
 			if err != nil {
--- a/pkg/sources/source_manager.go
+++ b/pkg/sources/source_manager.go
@ -36,7 +36,10 @@ type SourceManager struct {
 	handles     map[handle]sourceInfo
 	handlesLock sync.Mutex
 	// Pool limiting the amount of concurrent sources running.
-	pool            errgroup.Group
+	pool                errgroup.Group
+	poolLimit           int
+	currentRunningCount int32
+	// Max number of units to scan concurrently per source.
 	concurrentUnits int
 	// Run the sources using source unit enumeration / chunking if available.
 	useSourceUnits bool
@ -68,7 +71,10 @@ func WithReportHook(hook JobProgressHook) func(*SourceManager) {

 // WithConcurrentSources limits the concurrent number of sources a manager can run.
 func WithConcurrentSources(concurrency int) func(*SourceManager) {
-	return func(mgr *SourceManager) { mgr.pool.SetLimit(concurrency) }
+	return func(mgr *SourceManager) {
+		mgr.pool.SetLimit(concurrency)
+		mgr.poolLimit = concurrency
+	}
 }

 // WithBufferedOutput sets the size of the buffer used for the Chunks() channel.
@ -151,20 +157,30 @@ func (s *SourceManager) asyncRun(ctx context.Context, handle handle) (JobProgres
 	if err := s.preflightChecks(ctx, handle); err != nil {
 		return JobProgressRef{}, err
 	}
+	// Get the name. Should never fail due to preflight checks.
+	sourceInfo, ok := s.getSourceInfo(handle)
+	if !ok {
+		return JobProgressRef{SourceID: int64(handle)}, fmt.Errorf("unrecognized handle")
+	}
+	sourceName := sourceInfo.name
 	// Get a Job ID.
 	ctx = context.WithValue(ctx, "source_id", int64(handle))
 	jobID, err := s.api.GetJobID(ctx, int64(handle))
 	if err != nil {
-		return JobProgressRef{SourceID: int64(handle)}, err
+		return JobProgressRef{SourceID: int64(handle), SourceName: sourceName}, err
 	}
 	// Create a JobProgress object for tracking progress.
-	progress := NewJobProgress(int64(handle), jobID, WithHooks(s.hooks...))
+	ctx, cancel := context.WithCancelCause(ctx)
+	progress := NewJobProgress(jobID, int64(handle), sourceName, WithHooks(s.hooks...), WithCancel(cancel))
 	s.pool.Go(func() error {
+		atomic.AddInt32(&s.currentRunningCount, 1)
+		defer atomic.AddInt32(&s.currentRunningCount, -1)
 		ctx := context.WithValues(ctx,
 			"job_id", jobID,
 			"source_manager_worker_id", common.RandomID(5),
 		)
 		defer common.Recover(ctx)
+		defer cancel(nil)
 		return s.run(ctx, handle, jobID, progress)
 	})
 	return progress.Ref(), nil
@ -199,6 +215,16 @@ func (s *SourceManager) ScanChunk(chunk *Chunk) {
 	s.outputChunks <- chunk
 }

+// AvailableCapacity returns the number of concurrent jobs the manager can
+// accommodate at this time. If there is no limit, -1 is returned.
+func (s *SourceManager) AvailableCapacity() int {
+	if s.poolLimit == 0 {
+		return -1
+	}
+	runCount := atomic.LoadInt32(&s.currentRunningCount)
+	return s.poolLimit - int(runCount)
+}
+
 // preflightChecks is a helper method to check the Manager or the context isn't
 // done and that the handle is valid.
 func (s *SourceManager) preflightChecks(ctx context.Context, handle handle) error {
@ -221,6 +247,12 @@ func (s *SourceManager) run(ctx context.Context, handle handle, jobID int64, rep
 	report.Start(time.Now())
 	defer func() { report.End(time.Now()) }()

+	defer func() {
+		if err := context.Cause(ctx); err != nil {
+			report.ReportError(Fatal{err})
+		}
+	}()
+
 	// Initialize the source.
 	sourceInfo, ok := s.getSourceInfo(handle)
 	if !ok {
@ -234,6 +266,7 @@ func (s *SourceManager) run(ctx context.Context, handle handle, jobID int64, rep
 		report.ReportError(Fatal{err})
 		return Fatal{err}
 	}
+	report.TrackProgress(source.GetProgress())
 	ctx = context.WithValues(ctx,
 		"source_type", source.Type().String(),
 		"source_name", sourceInfo.name,
--- a/pkg/sources/source_manager_test.go
+++ b/pkg/sources/source_manager_test.go
@ -1,6 +1,7 @@
 package sources

 import (
+	"errors"
 	"fmt"
 	"testing"

@ -280,7 +281,62 @@ func TestSourceManagerJobAndSourceIDs(t *testing.T) {
 		})
 	assert.NoError(t, err)

-	_, _ = mgr.Run(context.Background(), handle)
+	ref, _ := mgr.Run(context.Background(), handle)
 	assert.Equal(t, int64(1337), initializedSourceID)
+	assert.Equal(t, int64(1337), ref.SourceID)
 	assert.Equal(t, int64(9001), initializedJobID)
+	assert.Equal(t, int64(9001), ref.JobID)
+	assert.Equal(t, "dummy", ref.SourceName)
+}
+
+// Chunk method that has a custom callback for the Chunks method.
+type callbackChunker struct {
+	cb func(context.Context, chan *Chunk) error
+}
+
+func (c callbackChunker) Chunks(ctx context.Context, ch chan *Chunk) error           { return c.cb(ctx, ch) }
+func (c callbackChunker) Enumerate(context.Context, UnitReporter) error              { return nil }
+func (c callbackChunker) ChunkUnit(context.Context, SourceUnit, ChunkReporter) error { return nil }
+
+func TestSourceManagerCancelRun(t *testing.T) {
+	mgr := NewManager(WithBufferedOutput(8))
+	var returnedErr error
+	handle, err := enrollDummy(mgr, callbackChunker{func(ctx context.Context, _ chan *Chunk) error {
+		// The context passed to Chunks should get cancelled when ref.CancelRun() is called.
+		<-ctx.Done()
+		returnedErr = fmt.Errorf("oh no: %w", ctx.Err())
+		return returnedErr
+	}})
+	assert.NoError(t, err)
+
+	ref, err := mgr.ScheduleRun(context.Background(), handle)
+	assert.NoError(t, err)
+
+	cancelErr := fmt.Errorf("abort! abort!")
+	ref.CancelRun(cancelErr)
+	<-ref.Done()
+	assert.Error(t, ref.Snapshot().FatalError())
+	assert.True(t, errors.Is(ref.Snapshot().FatalError(), returnedErr))
+	assert.True(t, errors.Is(ref.Snapshot().FatalErrors(), cancelErr))
+}
+
+func TestSourceManagerAvailableCapacity(t *testing.T) {
+	mgr := NewManager(WithConcurrentSources(1337))
+	start, end := make(chan struct{}), make(chan struct{})
+	handle, err := enrollDummy(mgr, callbackChunker{func(context.Context, chan *Chunk) error {
+		start <- struct{}{} // Send start signal.
+		<-end               // Wait for end signal.
+		return nil
+	}})
+	assert.NoError(t, err)
+
+	assert.Equal(t, 1337, mgr.AvailableCapacity())
+	ref, err := mgr.ScheduleRun(context.Background(), handle)
+	assert.NoError(t, err)
+
+	<-start // Wait for start signal.
+	assert.Equal(t, 1336, mgr.AvailableCapacity())
+	end <- struct{}{} // Send end signal.
+	<-ref.Done()      // Wait for the job to finish.
+	assert.Equal(t, 1337, mgr.AvailableCapacity())
 }
--- a/pkg/sources/syslog/syslog.go
+++ b/pkg/sources/syslog/syslog.go
@ -272,6 +272,7 @@ func (s *Source) monitorConnection(ctx context.Context, conn net.Conn, chunksCha
 			SourceName:     s.syslog.sourceName,
 			SourceID:       s.syslog.sourceID,
 			SourceType:     s.syslog.sourceType,
+			JobID:          s.JobID(),
 			SourceMetadata: metadata,
 			Data:           input,
 			Verify:         s.verify,
@ -313,6 +314,7 @@ func (s *Source) acceptUDPConnections(ctx context.Context, netListener net.Packe
 		chunksChan <- &sources.Chunk{
 			SourceName:     s.syslog.sourceName,
 			SourceID:       s.syslog.sourceID,
+			JobID:          s.JobID(),
 			SourceType:     s.syslog.sourceType,
 			SourceMetadata: metadata,
 			Data:           input,