Modularize scanning engine (#2887)

* POC: Modularize scanning engine. * fix typo * update interface name * fix tests * update test * fix moar tests * fix bug * fixes. * fix merge * add detector verification overrides * handle --no-verification flag * support fp * add test * update name * filter * update test * explicit use of detector * updates
2024-11-10 07:04:24 +00:00 · 2024-06-13 13:47:09 -07:00 · 2024-06-13 13:47:09 -07:00 · cb072603dc
commit cb072603dc
parent 4addd81e29
6 changed files with 765 additions and 555 deletions
--- a/main.go
+++ b/main.go
@ -1,6 +1,7 @@
 package main

 import (
+	"encoding/json"
 	"fmt"
 	"io"
 	"net/http"
@ -24,8 +25,6 @@ import (
 	"github.com/trufflesecurity/trufflehog/v3/pkg/common"
 	"github.com/trufflesecurity/trufflehog/v3/pkg/config"
 	"github.com/trufflesecurity/trufflehog/v3/pkg/context"
-	"github.com/trufflesecurity/trufflehog/v3/pkg/decoders"
-	"github.com/trufflesecurity/trufflehog/v3/pkg/detectors"
 	"github.com/trufflesecurity/trufflehog/v3/pkg/engine"
 	"github.com/trufflesecurity/trufflehog/v3/pkg/handlers"
 	"github.com/trufflesecurity/trufflehog/v3/pkg/log"
@ -352,88 +351,6 @@ func run(state overseer.State) {
 		handlers.SetArchiveMaxTimeout(*archiveTimeout)
 	}

-	// Build include and exclude detector sets for filtering on engine initialization.
-	// Exit if there was an error to inform the user of the misconfiguration.
-	var includeDetectorSet, excludeDetectorSet map[config.DetectorID]struct{}
-	var detectorsWithCustomVerifierEndpoints map[config.DetectorID][]string
-	{
-		includeList, err := config.ParseDetectors(*includeDetectors)
-		if err != nil {
-			logFatal(err, "invalid include list detector configuration")
-		}
-		excludeList, err := config.ParseDetectors(*excludeDetectors)
-		if err != nil {
-			logFatal(err, "invalid exclude list detector configuration")
-		}
-		detectorsWithCustomVerifierEndpoints, err = config.ParseVerifierEndpoints(*verifiers)
-		if err != nil {
-			logFatal(err, "invalid verifier detector configuration")
-		}
-		includeDetectorSet = detectorTypeToSet(includeList)
-		excludeDetectorSet = detectorTypeToSet(excludeList)
-	}
-
-	// Verify that all the user-provided detectors support the optional
-	// detector features.
-	{
-		if id, err := verifyDetectorsAreVersioner(includeDetectorSet); err != nil {
-			logFatal(err, "invalid include list detector configuration", "detector", id)
-		}
-		if id, err := verifyDetectorsAreVersioner(excludeDetectorSet); err != nil {
-			logFatal(err, "invalid exclude list detector configuration", "detector", id)
-		}
-		if id, err := verifyDetectorsAreVersioner(detectorsWithCustomVerifierEndpoints); err != nil {
-			logFatal(err, "invalid verifier detector configuration", "detector", id)
-		}
-		// Extra check for endpoint customization.
-		isEndpointCustomizer := engine.DefaultDetectorTypesImplementing[detectors.EndpointCustomizer]()
-		for id := range detectorsWithCustomVerifierEndpoints {
-			if _, ok := isEndpointCustomizer[id.ID]; !ok {
-				logFatal(
-					fmt.Errorf("endpoint provided but detector does not support endpoint customization"),
-					"invalid custom verifier endpoint detector configuration",
-					"detector", id,
-				)
-			}
-		}
-	}
-
-	includeFilter := func(d detectors.Detector) bool {
-		_, ok := getWithDetectorID(d, includeDetectorSet)
-		return ok
-	}
-	excludeFilter := func(d detectors.Detector) bool {
-		_, ok := getWithDetectorID(d, excludeDetectorSet)
-		return !ok
-	}
-	// Abuse filter to cause a side-effect.
-	endpointCustomizer := func(d detectors.Detector) bool {
-		urls, ok := getWithDetectorID(d, detectorsWithCustomVerifierEndpoints)
-		if !ok {
-			return true
-		}
-		id := config.GetDetectorID(d)
-		customizer, ok := d.(detectors.EndpointCustomizer)
-		if !ok {
-			// NOTE: We should never reach here due to validation above.
-			logFatal(
-				fmt.Errorf("failed to configure a detector endpoint"),
-				"the provided detector does not support endpoint configuration",
-				"detector", id,
-			)
-		}
-		if !*customVerifiersOnly || len(urls) == 0 {
-			urls = append(urls, customizer.DefaultEndpoint())
-		}
-		if err := customizer.SetEndpoints(urls...); err != nil {
-			logFatal(err, "failed configuring custom endpoint for detector", "detector", id)
-		}
-		logger.Info("configured detector with verification urls",
-			"detector", id, "urls", urls,
-		)
-		return true
-	}
-
 	// Set how the engine will print its results.
 	var printer engine.Printer
 	switch {
@ -451,11 +368,6 @@ func run(state overseer.State) {
 		fmt.Fprintf(os.Stderr, "🐷🔑🐷  TruffleHog. Unearth your secrets. 🐷🔑🐷\n\n")
 	}

-	var jobReportWriter io.WriteCloser
-	if *jobReportFile != nil {
-		jobReportWriter = *jobReportFile
-	}
-
 	// Parse --results flag.
 	if *onlyVerified {
 		r := "verified"
@ -466,34 +378,31 @@ func run(state overseer.State) {
 		logFatal(err, "failed to configure results flag")
 	}

-	scanConfig := scanConfig{
-		Command:                  cmd,
-		Concurrency:              *concurrency,
-		Decoders:                 decoders.DefaultDecoders(),
-		Conf:                     conf,
-		IncludeFilter:            includeFilter,
-		ExcludeFilter:            excludeFilter,
-		EndpointCustomizer:       endpointCustomizer,
-		NoVerification:           *noVerification,
-		PrintAvgDetectorTime:     *printAvgDetectorTime,
-		FilterUnverified:         *filterUnverified,
-		FilterEntropy:            *filterEntropy,
-		ScanEntireChunk:          *scanEntireChunk,
-		JobReportWriter:          jobReportWriter,
-		AllowVerificationOverlap: *allowVerificationOverlap,
-		ParsedResults:            parsedResults,
-		Printer:                  printer,
+	engConf := engine.Config{
+		Concurrency:           *concurrency,
+		Detectors:             conf.Detectors,
+		Verify:                !*noVerification,
+		IncludeDetectors:      *includeDetectors,
+		ExcludeDetectors:      *excludeDetectors,
+		CustomVerifiersOnly:   *customVerifiersOnly,
+		VerifierEndpoints:     *verifiers,
+		Dispatcher:            engine.NewPrinterDispatcher(printer),
+		FilterUnverified:      *filterUnverified,
+		FilterEntropy:         *filterEntropy,
+		VerificationOverlap:   *allowVerificationOverlap,
+		Results:               parsedResults,
+		PrintAvgDetectorTime:  *printAvgDetectorTime,
+		ShouldScanEntireChunk: *scanEntireChunk,
 	}

 	if *compareDetectionStrategies {
-		err := compareScans(ctx, scanConfig)
-		if err != nil {
+		if err := compareScans(ctx, cmd, engConf); err != nil {
 			logFatal(err, "error comparing detection strategies")
 		}
 		return
 	}

-	metrics, err := runSingleScan(ctx, scanConfig, *scanEntireChunk)
+	metrics, err := runSingleScan(ctx, cmd, engConf)
 	if err != nil {
 		logFatal(err, "error running scan")
 	}
@ -514,26 +423,7 @@ func run(state overseer.State) {
 	}
 }

-type scanConfig struct {
-	Command                  string
-	Concurrency              int
-	Decoders                 []decoders.Decoder
-	Conf                     *config.Config
-	IncludeFilter            func(detectors.Detector) bool
-	ExcludeFilter            func(detectors.Detector) bool
-	EndpointCustomizer       func(detectors.Detector) bool
-	NoVerification           bool
-	PrintAvgDetectorTime     bool
-	FilterUnverified         bool
-	FilterEntropy            float64
-	ScanEntireChunk          bool
-	JobReportWriter          io.WriteCloser
-	AllowVerificationOverlap bool
-	ParsedResults            map[string]struct{}
-	Printer                  engine.Printer
-}
-
-func compareScans(ctx context.Context, cfg scanConfig) error {
+func compareScans(ctx context.Context, cmd string, cfg engine.Config) error {
 	var (
 		entireMetrics    metrics
 		maxLengthMetrics metrics
@ -546,14 +436,15 @@ func compareScans(ctx context.Context, cfg scanConfig) error {
 	go func() {
 		defer wg.Done()
 		// Run scan with entire chunk span calculator.
-		entireMetrics, err = runSingleScan(ctx, cfg, true)
+		cfg.ShouldScanEntireChunk = true
+		entireMetrics, err = runSingleScan(ctx, cmd, cfg)
 		if err != nil {
 			ctx.Logger().Error(err, "error running scan with entire chunk span calculator")
 		}
 	}()

 	// Run scan with max-length span calculator.
-	maxLengthMetrics, err = runSingleScan(ctx, cfg, false)
+	maxLengthMetrics, err = runSingleScan(ctx, cmd, cfg)
 	if err != nil {
 		return fmt.Errorf("error running scan with custom span calculator: %v", err)
 	}
@ -585,28 +476,65 @@ type metrics struct {
 	hasFoundResults bool
 }

-func runSingleScan(ctx context.Context, cfg scanConfig, scanEntireChunk bool) (metrics, error) {
-	eng, err := engine.Start(ctx,
-		engine.WithConcurrency(cfg.Concurrency),
-		engine.WithDecoders(cfg.Decoders...),
-		engine.WithDetectors(engine.DefaultDetectors()...),
-		engine.WithDetectors(cfg.Conf.Detectors...),
-		engine.WithVerify(!cfg.NoVerification),
-		engine.WithFilterDetectors(cfg.IncludeFilter),
-		engine.WithFilterDetectors(cfg.ExcludeFilter),
-		engine.WithFilterDetectors(cfg.EndpointCustomizer),
-		engine.WithFilterUnverified(cfg.FilterUnverified),
-		engine.WithResults(cfg.ParsedResults),
-		engine.WithPrintAvgDetectorTime(cfg.PrintAvgDetectorTime),
-		engine.WithPrinter(cfg.Printer),
-		engine.WithFilterEntropy(cfg.FilterEntropy),
-		engine.WithVerificationOverlap(cfg.AllowVerificationOverlap),
-		engine.WithEntireChunkScan(scanEntireChunk),
-	)
-	if err != nil {
-		return metrics{}, fmt.Errorf("error initializing engine: %v", err)
+func runSingleScan(ctx context.Context, cmd string, cfg engine.Config) (metrics, error) {
+	var scanMetrics metrics
+
+	// Setup job report writer if provided
+	var jobReportWriter io.WriteCloser
+	if *jobReportFile != nil {
+		jobReportWriter = *jobReportFile
 	}

+	handleFinishedMetrics := func(ctx context.Context, finishedMetrics <-chan sources.UnitMetrics, jobReportWriter io.WriteCloser) {
+		go func() {
+			defer func() {
+				jobReportWriter.Close()
+				if namer, ok := jobReportWriter.(interface{ Name() string }); ok {
+					ctx.Logger().Info("report written", "path", namer.Name())
+				} else {
+					ctx.Logger().Info("report written")
+				}
+			}()
+
+			for metrics := range finishedMetrics {
+				metrics.Errors = common.ExportErrors(metrics.Errors...)
+				details, err := json.Marshal(map[string]any{
+					"version": 1,
+					"data":    metrics,
+				})
+				if err != nil {
+					ctx.Logger().Error(err, "error marshalling job details")
+					continue
+				}
+				if _, err := jobReportWriter.Write(append(details, '\n')); err != nil {
+					ctx.Logger().Error(err, "error writing to file")
+				}
+			}
+		}()
+	}
+
+	const defaultOutputBufferSize = 64
+	opts := []func(*sources.SourceManager){
+		sources.WithConcurrentSources(cfg.Concurrency),
+		sources.WithConcurrentUnits(cfg.Concurrency),
+		sources.WithSourceUnits(),
+		sources.WithBufferedOutput(defaultOutputBufferSize),
+	}
+
+	if jobReportWriter != nil {
+		unitHook, finishedMetrics := sources.NewUnitHook(ctx)
+		opts = append(opts, sources.WithReportHook(unitHook))
+		handleFinishedMetrics(ctx, finishedMetrics, jobReportWriter)
+	}
+
+	cfg.SourceManager = sources.NewManager(opts...)
+
+	eng, err := engine.NewEngine(ctx, &cfg)
+	if err != nil {
+		return scanMetrics, fmt.Errorf("error initializing engine: %v", err)
+	}
+	eng.Start(ctx)
+
 	defer func() {
 		// Clean up temporary artifacts.
 		if err := cleantemp.CleanTempArtifacts(ctx); err != nil {
@ -614,8 +542,7 @@ func runSingleScan(ctx context.Context, cfg scanConfig, scanEntireChunk bool) (m
 		}
 	}()

-	var scanMetrics metrics
-	switch cfg.Command {
+	switch cmd {
 	case gitScan.FullCommand():
 		gitCfg := sources.GitConfig{
 			URI:              *gitScanURI,
@ -812,7 +739,7 @@ func runSingleScan(ctx context.Context, cfg scanConfig, scanEntireChunk bool) (m
 			return scanMetrics, fmt.Errorf("failed to scan Jenkins: %v", err)
 		}
 	default:
-		return scanMetrics, fmt.Errorf("invalid command: %s", cfg.Command)
+		return scanMetrics, fmt.Errorf("invalid command: %s", cmd)
 	}

 	// Wait for all workers to finish.
@ -887,47 +814,3 @@ func printAverageDetectorTime(e *engine.Engine) {
 		fmt.Fprintf(os.Stderr, "%s: %s\n", detectorName, duration)
 	}
 }
-
-// detectorTypeToSet is a helper function to convert a slice of detector IDs into a set.
-func detectorTypeToSet(detectors []config.DetectorID) map[config.DetectorID]struct{} {
-	out := make(map[config.DetectorID]struct{}, len(detectors))
-	for _, d := range detectors {
-		out[d] = struct{}{}
-	}
-	return out
-}
-
-// getWithDetectorID is a helper function to get a value from a map using a
-// detector's ID. This function behaves like a normal map lookup, with an extra
-// step of checking for the non-specific version of a detector.
-func getWithDetectorID[T any](d detectors.Detector, data map[config.DetectorID]T) (T, bool) {
-	key := config.GetDetectorID(d)
-	// Check if the specific ID is provided.
-	if t, ok := data[key]; ok || key.Version == 0 {
-		return t, ok
-	}
-	// Check if the generic type is provided without a version.
-	// This means "all" versions of a type.
-	key.Version = 0
-	t, ok := data[key]
-	return t, ok
-}
-
-// verifyDetectorsAreVersioner checks all keys in a provided map to verify the
-// provided type is actually a Versioner.
-func verifyDetectorsAreVersioner[T any](data map[config.DetectorID]T) (config.DetectorID, error) {
-	isVersioner := engine.DefaultDetectorTypesImplementing[detectors.Versioner]()
-	for id := range data {
-		if id.Version == 0 {
-			// Version not provided.
-			continue
-		}
-		if _, ok := isVersioner[id.ID]; ok {
-			// Version provided for a Versioner detector.
-			continue
-		}
-		// Version provided on a non-Versioner detector.
-		return id, fmt.Errorf("version provided but detector does not have a version")
-	}
-	return config.DetectorID{}, nil
-}
--- a/pkg/detectors/detectors.go
+++ b/pkg/detectors/detectors.go
@ -121,10 +121,17 @@ func unwrapToLast(err error) error {
 }

 type ResultWithMetadata struct {
+	// IsWordlistFalsePositive indicates whether this secret was flagged as a false positive based on a wordlist check
+	IsWordlistFalsePositive bool
 	// SourceMetadata contains source-specific contextual information.
 	SourceMetadata *source_metadatapb.MetaData
 	// SourceID is the ID of the source that the API uses to map secrets to specific sources.
 	SourceID sources.SourceID
+	// JobID is the ID of the job that the API uses to map secrets to specific jobs.
+	JobID sources.JobID
+	// SecretID is the ID of the secret, if it exists.
+	// Only secrets that are being reverified will have a SecretID.
+	SecretID int64
 	// SourceType is the type of Source.
 	SourceType sourcespb.SourceType
 	// SourceName is the name of the Source.
@ -139,6 +146,8 @@ func CopyMetadata(chunk *sources.Chunk, result Result) ResultWithMetadata {
 	return ResultWithMetadata{
 		SourceMetadata: chunk.SourceMetadata,
 		SourceID:       chunk.SourceID,
+		JobID:          chunk.JobID,
+		SecretID:       chunk.SecretID,
 		SourceType:     chunk.SourceType,
 		SourceName:     chunk.SourceName,
 		Result:         result,
--- a/pkg/engine/engine.go
+++ b/pkg/engine/engine.go
@ -2,10 +2,8 @@ package engine

 import (
 	"bytes"
-	"encoding/json"
 	"errors"
 	"fmt"
-	"io"
 	"runtime"
 	"sync"
 	"sync/atomic"
@ -54,24 +52,106 @@ type runtimeMetrics struct {
 	detectorAvgTime sync.Map
 }

+// getScanDuration returns the duration of the scan.
+// If the scan is still running, it returns the time since the scan started.
+func (m *Metrics) getScanDuration() time.Duration {
+	if m.ScanDuration == 0 {
+		return time.Since(m.scanStartTime)
+	}
+
+	return m.ScanDuration
+}
+
+// ResultsDispatcher is an interface for dispatching findings of detected results.
+// Implementations can vary from printing results to the console to sending results to an external system.
+type ResultsDispatcher interface {
+	Dispatch(ctx context.Context, result detectors.ResultWithMetadata) error
+}
+
 // Printer is used to format found results and output them to the user. Ex JSON, plain text, etc.
 // Please note printer implementations SHOULD BE thread safe.
 type Printer interface {
 	Print(ctx context.Context, r *detectors.ResultWithMetadata) error
 }

+// PrinterDispatcher wraps an existing Printer implementation and adapts it to the ResultsDispatcher interface.
+type PrinterDispatcher struct{ printer Printer }
+
+// NewPrinterDispatcher creates a new PrinterDispatcher instance with the provided Printer.
+func NewPrinterDispatcher(printer Printer) *PrinterDispatcher { return &PrinterDispatcher{printer} }
+
+// Dispatch sends the result to the printer.
+func (p *PrinterDispatcher) Dispatch(ctx context.Context, result detectors.ResultWithMetadata) error {
+	return p.printer.Print(ctx, &result)
+}
+
+// Config used to configure the engine.
+type Config struct {
+	// Number of concurrent scanner workers,
+	// also serves as a multiplier for other worker types (e.g., detector workers, notifier workers)
+	Concurrency int
+
+	Decoders                      []decoders.Decoder
+	Detectors                     []detectors.Detector
+	DetectorVerificationOverrides map[config.DetectorID]bool
+	IncludeDetectors              string
+	ExcludeDetectors              string
+	CustomVerifiersOnly           bool
+	VerifierEndpoints             map[string]string
+
+	// Verify determines whether the scanner will verify candidate secrets.
+	Verify bool
+
+	// Defines which results will be notified by the engine
+	// (e.g., verified, unverified, unknown)
+	Results               map[string]struct{}
+	LogFilteredUnverified bool
+
+	// FilterEntropy filters out unverified results using Shannon entropy.
+	FilterEntropy float64
+	// FilterUnverified sets the filterUnverified flag on the engine. If set to
+	// true, the engine will only return the first unverified result for a chunk for a detector.
+	FilterUnverified      bool
+	ShouldScanEntireChunk bool
+
+	Dispatcher ResultsDispatcher
+
+	// SourceManager is used to manage the sources and units.
+	// TODO (ahrav): Update this comment, i'm dumb and don't really know what else it does.
+	SourceManager *sources.SourceManager
+
+	// PrintAvgDetectorTime sets the printAvgDetectorTime flag on the engine. If set to
+	// true, the engine will print the average time taken by each detector.
+	// This option allows us to measure the time taken for each detector ONLY if
+	// the engine is configured to print the results.
+	// Calculating the average time taken by each detector is an expensive operation
+	// and should be avoided unless specified by the user.
+	PrintAvgDetectorTime bool
+
+	// VerificationOverlap determines whether the scanner will attempt to verify candidate secrets
+	// that have been detected by multiple detectors.
+	// By default, it is set to true.
+	VerificationOverlap bool
+}
+
+// Engine represents the core scanning engine responsible for detecting secrets in input data.
+// It manages the lifecycle of the scanning process, including initialization, worker management,
+// and result notification. The engine is designed to be flexible and configurable, allowing for
+// customization through various options and configurations.
 type Engine struct {
 	// CLI flags.
-	concurrency     int
-	decoders        []decoders.Decoder
-	detectors       []detectors.Detector
-	jobReportWriter io.WriteCloser
+	concurrency int
+	decoders    []decoders.Decoder
+	detectors   []detectors.Detector
+	// Any detectors configured to override sources' verification flags
+	detectorVerificationOverrides map[config.DetectorID]bool
+
 	// filterUnverified is used to reduce the number of unverified results.
 	// If there are multiple unverified results for the same chunk for the same detector,
 	// only the first one will be kept.
 	filterUnverified bool
 	// entropyFilter is used to filter out unverified results using Shannon entropy.
-	filterEntropy           *float64
+	filterEntropy           float64
 	notifyVerifiedResults   bool
 	notifyUnverifiedResults bool
 	notifyUnknownResults    bool
@ -100,152 +180,242 @@ type Engine struct {
 	// numFoundResults is used to keep track of the number of results found.
 	numFoundResults uint32

-	// printer provides a method for formatting and outputting search results.
-	// The specific implementation (e.g., JSON, plain text)
-	// should be set during initialization based on user preference or program requirements.
-	printer Printer
+	// ResultsDispatcher is used to send results.
+	dispatcher ResultsDispatcher

 	// dedupeCache is used to deduplicate results by comparing the
 	// detector type, raw result, and source metadata
 	dedupeCache *lru.Cache[string, detectorspb.DecoderType]

-	// verify determines whether the scanner will attempt to verify candidate secrets
+	// verify determines whether the scanner will attempt to verify candidate secrets.
 	verify bool

-	// Note: bad hack only used for testing
+	// Note: bad hack only used for testing.
 	verificationOverlapTracker *verificationOverlapTracker
 }

-type verificationOverlapTracker struct {
-	verificationOverlapDuplicateCount int
-	mu                                sync.Mutex
-}
-
-func (r *verificationOverlapTracker) increment() {
-	r.mu.Lock()
-	r.verificationOverlapDuplicateCount++
-	r.mu.Unlock()
-}
-
-// Option is used to configure the engine during initialization using functional options.
-type Option func(*Engine)
-
-func WithJobReportWriter(w io.WriteCloser) Option {
-	return func(e *Engine) {
-		e.jobReportWriter = w
+// NewEngine creates a new Engine instance with the provided configuration.
+func NewEngine(ctx context.Context, cfg *Config) (*Engine, error) {
+	engine := &Engine{
+		concurrency:                   cfg.Concurrency,
+		decoders:                      cfg.Decoders,
+		detectors:                     cfg.Detectors,
+		dispatcher:                    cfg.Dispatcher,
+		verify:                        cfg.Verify,
+		filterUnverified:              cfg.FilterUnverified,
+		filterEntropy:                 cfg.FilterEntropy,
+		printAvgDetectorTime:          cfg.PrintAvgDetectorTime,
+		logFilteredUnverified:         cfg.LogFilteredUnverified,
+		verificationOverlap:           cfg.VerificationOverlap,
+		sourceManager:                 cfg.SourceManager,
+		scanEntireChunk:               cfg.ShouldScanEntireChunk,
+		detectorVerificationOverrides: cfg.DetectorVerificationOverrides,
 	}
-}
-
-func WithConcurrency(concurrency int) Option {
-	return func(e *Engine) {
-		e.concurrency = concurrency
+	if engine.sourceManager == nil {
+		return nil, fmt.Errorf("source manager is required")
 	}
-}

-const ignoreTag = "trufflehog:ignore"
+	engine.setDefaults(ctx)

-func WithDetectors(d ...detectors.Detector) Option {
-	return func(e *Engine) {
-		e.detectors = append(e.detectors, d...)
+	// Build include and exclude detector sets for filtering on engine initialization.
+	includeDetectorSet, excludeDetectorSet, err := buildDetectorSets(cfg)
+	if err != nil {
+		return nil, err
 	}
-}

-func WithDecoders(decoders ...decoders.Decoder) Option {
-	return func(e *Engine) {
-		e.decoders = decoders
+	// Apply include/exclude filters.
+	var filters []func(detectors.Detector) bool
+
+	if len(includeDetectorSet) > 0 {
+		filters = append(filters, func(d detectors.Detector) bool {
+			_, ok := getWithDetectorID(d, includeDetectorSet)
+			return ok
+		})
 	}
-}

-// WithFilterUnverified sets the filterUnverified flag on the engine. If set to
-// true, the engine will only return the first unverified result for a chunk for a detector.
-func WithFilterUnverified(filter bool) Option {
-	return func(e *Engine) {
-		e.filterUnverified = filter
+	if len(excludeDetectorSet) > 0 {
+		filters = append(filters, func(d detectors.Detector) bool {
+			_, ok := getWithDetectorID(d, excludeDetectorSet)
+			return !ok
+		})
 	}
-}

-// WithFilterEntropy filters out unverified results using Shannon entropy.
-func WithFilterEntropy(entropy float64) Option {
-	return func(e *Engine) {
-		if entropy > 0 {
-			e.filterEntropy = &entropy
-		}
+	// Apply custom verifier endpoints to detectors that support it.
+	detectorsWithCustomVerifierEndpoints, err := parseCustomVerifierEndpoints(cfg.VerifierEndpoints)
+	if err != nil {
+		return nil, err
 	}
-}

-// WithResults defines which results will be printed by the engine.
-func WithResults(results map[string]struct{}) Option {
-	return func(e *Engine) {
-		if len(results) == 0 {
-			return
-		}
+	if len(detectorsWithCustomVerifierEndpoints) > 0 {
+		filters = append(filters, func(d detectors.Detector) bool {
+			urls, ok := getWithDetectorID(d, detectorsWithCustomVerifierEndpoints)
+			if !ok {
+				return true
+			}
+			customizer, ok := d.(detectors.EndpointCustomizer)
+			if !ok {
+				return false
+			}

+			if !cfg.CustomVerifiersOnly || len(urls) == 0 {
+				urls = append(urls, customizer.DefaultEndpoint())
+			}
+			if err := customizer.SetEndpoints(urls...); err != nil {
+				return false
+			}
+			return true
+		})
+	}
+	engine.applyFilters(filters...)
+
+	if results := cfg.Results; len(results) > 0 {
 		_, ok := results["verified"]
-		e.notifyVerifiedResults = ok
+		engine.notifyVerifiedResults = ok

 		_, ok = results["unknown"]
-		e.notifyUnknownResults = ok
+		engine.notifyUnknownResults = ok

 		_, ok = results["unverified"]
-		e.notifyUnverifiedResults = ok
+		engine.notifyUnverifiedResults = ok

 		_, ok = results["filtered_unverified"]
-		e.logFilteredUnverified = ok
+		engine.logFilteredUnverified = ok
 	}
+
+	if err := engine.initialize(ctx); err != nil {
+		return nil, err
+	}
+
+	return engine, nil
 }

-// WithPrintAvgDetectorTime sets the printAvgDetectorTime flag on the engine. If set to
-// true, the engine will print the average time taken by each detector.
-// This option allows us to measure the time taken for each detector ONLY if
-// the engine is configured to print the results.
-// Calculating the average time taken by each detector is an expensive operation
-// and should be avoided unless specified by the user.
-func WithPrintAvgDetectorTime(printAvgDetectorTime bool) Option {
-	return func(e *Engine) {
-		e.printAvgDetectorTime = printAvgDetectorTime
+// setDefaults ensures that if specific engine properties aren't provided,
+// they're set to reasonable default values. It makes the engine robust to
+// incomplete configuration.
+func (e *Engine) setDefaults(ctx context.Context) {
+	if e.concurrency == 0 {
+		numCPU := runtime.NumCPU()
+		ctx.Logger().Info("No concurrency specified, defaulting to max", "cpu", numCPU)
+		e.concurrency = numCPU
 	}
+	ctx.Logger().V(3).Info("engine started", "workers", e.concurrency)
+
+	// Default decoders handle common encoding formats.
+	if len(e.decoders) == 0 {
+		e.decoders = decoders.DefaultDecoders()
+	}
+
+	if len(e.detectors) == 0 {
+		e.detectors = DefaultDetectors()
+	}
+
+	if e.dispatcher == nil {
+		e.dispatcher = NewPrinterDispatcher(new(output.PlainPrinter))
+	}
+	e.notifyVerifiedResults = true
+	e.notifyUnverifiedResults = true
+	e.notifyUnknownResults = true
+
+	ctx.Logger().V(4).Info("default engine options set")
 }

-// WithFilterDetectors applies a filter to the configured list of detectors. If
-// the filterFunc returns true, the detector will be included for scanning.
-// This option applies to the existing list of detectors configured, so the
-// order this option appears matters. All filtering happens before scanning.
-func WithFilterDetectors(filterFunc func(detectors.Detector) bool) Option {
-	return func(e *Engine) {
-		// If no detectors are configured, do nothing.
-		if e.detectors == nil {
-			return
-		}
-		e.detectors = filterDetectors(filterFunc, e.detectors)
+func buildDetectorSets(cfg *Config) (map[config.DetectorID]struct{}, map[config.DetectorID]struct{}, error) {
+	includeList, err := config.ParseDetectors(cfg.IncludeDetectors)
+	if err != nil {
+		return nil, nil, fmt.Errorf("invalid include list detector configuration: %w", err)
 	}
+	excludeList, err := config.ParseDetectors(cfg.ExcludeDetectors)
+	if err != nil {
+		return nil, nil, fmt.Errorf("invalid exclude list detector configuration: %w", err)
+	}
+
+	includeDetectorSet := detectorTypeToSet(includeList)
+	excludeDetectorSet := detectorTypeToSet(excludeList)
+
+	// Verify that all the user-provided detectors support the optional
+	// detector features.
+	if id, err := verifyDetectorsAreVersioner(includeDetectorSet); err != nil {
+		return nil, nil, fmt.Errorf("invalid include list detector configuration id %v: %w", id, err)
+	}
+
+	if id, err := verifyDetectorsAreVersioner(excludeDetectorSet); err != nil {
+		return nil, nil, fmt.Errorf("invalid exclude list detector configuration id %v: %w", id, err)
+	}
+
+	return includeDetectorSet, excludeDetectorSet, nil
 }

-// WithPrinter sets the Printer on the engine.
-func WithPrinter(printer Printer) Option {
-	return func(e *Engine) {
-		e.printer = printer
+func parseCustomVerifierEndpoints(endpoints map[string]string) (map[config.DetectorID][]string, error) {
+	if len(endpoints) == 0 {
+		return nil, nil
 	}
-}

-// WithVerify configures whether the scanner will verify candidate secrets.
-func WithVerify(verify bool) Option {
-	return func(e *Engine) {
-		e.verify = verify
+	customVerifierEndpoints, err := config.ParseVerifierEndpoints(endpoints)
+	if err != nil {
+		return nil, fmt.Errorf("invalid verifier detector configuration: %w", err)
 	}
-}

-func withVerificationOverlapTracking() Option {
-	return func(e *Engine) {
-		e.verificationOverlapTracker = &verificationOverlapTracker{
-			verificationOverlapDuplicateCount: 0,
+	if id, err := verifyDetectorsAreVersioner(customVerifierEndpoints); err != nil {
+		return nil, fmt.Errorf("invalid verifier detector configuration id %v: %w", id, err)
+	}
+	// Extra check for endpoint customization.
+	isEndpointCustomizer := DefaultDetectorTypesImplementing[detectors.EndpointCustomizer]()
+	for id := range customVerifierEndpoints {
+		if _, ok := isEndpointCustomizer[id.ID]; !ok {
+			return nil, fmt.Errorf("endpoint provided but detector does not support endpoint customization: %w", err)
 		}
 	}
+	return customVerifierEndpoints, nil
 }

-// WithVerificationOverlap
-func WithVerificationOverlap(verificationOverlap bool) Option {
-	return func(e *Engine) {
-		e.verificationOverlap = verificationOverlap
+// detectorTypeToSet is a helper function to convert a slice of detector IDs into a set.
+func detectorTypeToSet(detectors []config.DetectorID) map[config.DetectorID]struct{} {
+	out := make(map[config.DetectorID]struct{}, len(detectors))
+	for _, d := range detectors {
+		out[d] = struct{}{}
+	}
+	return out
+}
+
+// getWithDetectorID is a helper function to get a value from a map using a
+// detector's ID. This function behaves like a normal map lookup, with an extra
+// step of checking for the non-specific version of a detector.
+func getWithDetectorID[T any](d detectors.Detector, data map[config.DetectorID]T) (T, bool) {
+	key := config.GetDetectorID(d)
+	// Check if the specific ID is provided.
+	if t, ok := data[key]; ok || key.Version == 0 {
+		return t, ok
+	}
+	// Check if the generic type is provided without a version.
+	// This means "all" versions of a type.
+	key.Version = 0
+	t, ok := data[key]
+	return t, ok
+}
+
+// verifyDetectorsAreVersioner checks all keys in a provided map to verify the
+// provided type is actually a Versioner.
+func verifyDetectorsAreVersioner[T any](data map[config.DetectorID]T) (config.DetectorID, error) {
+	isVersioner := DefaultDetectorTypesImplementing[detectors.Versioner]()
+	for id := range data {
+		if id.Version == 0 {
+			// Version not provided.
+			continue
+		}
+		if _, ok := isVersioner[id.ID]; ok {
+			// Version provided for a Versioner detector.
+			continue
+		}
+		// Version provided on a non-Versioner detector.
+		return id, fmt.Errorf("version provided but detector does not have a version")
+	}
+	return config.DetectorID{}, nil
+}
+
+// applyFilters applies a variable number of filters to the detectors.
+func (e *Engine) applyFilters(filters ...func(detectors.Detector) bool) {
+	for _, filter := range filters {
+		e.detectors = filterDetectors(filter, e.detectors)
 	}
 }

@ -259,11 +429,68 @@ func filterDetectors(filterFunc func(detectors.Detector) bool, input []detectors
 	return out
 }

-// WithEntireChunkScan sets the flag to configure AhoCorasickCore to scan entire chunks.
-func WithEntireChunkScan(enabled bool) Option {
-	return func(e *Engine) { e.scanEntireChunk = enabled }
+// initialize prepares the engine's internal structures. The LRU cache optimizes
+// deduplication efforts, allowing the engine to quickly check if a chunk has
+// been processed before, thereby saving computational overhead.
+func (e *Engine) initialize(ctx context.Context) error {
+	// TODO (ahrav): Determine the optimal cache size.
+	const cacheSize = 512 // number of entries in the LRU cache
+
+	cache, err := lru.New[string, detectorspb.DecoderType](cacheSize)
+	if err != nil {
+		return fmt.Errorf("failed to initialize LRU cache: %w", err)
+	}
+	const (
+		// detectableChunksChanMultiplier is set to accommodate a high number of concurrent worker goroutines.
+		// This multiplier ensures that the detectableChunksChan channel has sufficient buffer capacity
+		// to hold messages from multiple worker groups (detector workers/ verificationOverlap workers) without blocking.
+		// A large buffer helps accommodate for the fact workers are producing data at a faster rate
+		// than it can be consumed.
+		detectableChunksChanMultiplier = 50
+		// verificationOverlapChunksChanMultiplier uses a smaller buffer compared to detectableChunksChanMultiplier.
+		// This reflects the anticipated lower volume of data that needs re-verification.
+		// The buffer size is a trade-off between memory usage and the need to prevent blocking.
+		verificationOverlapChunksChanMultiplier = 25
+	)
+
+	// Channels are used for communication between different parts of the engine,
+	// ensuring that data flows smoothly without race conditions.
+	// The buffer sizes for these channels are set to multiples of defaultChannelBuffer,
+	// considering the expected concurrency and workload in the system.
+	e.detectableChunksChan = make(chan detectableChunk, defaultChannelBuffer*detectableChunksChanMultiplier)
+	e.verificationOverlapChunksChan = make(
+		chan verificationOverlapChunk, defaultChannelBuffer*verificationOverlapChunksChanMultiplier,
+	)
+	e.results = make(chan detectors.ResultWithMetadata, defaultChannelBuffer)
+	e.dedupeCache = cache
+	ctx.Logger().V(4).Info("engine initialized")
+
+	// Configure the EntireChunkSpanCalculator if the engine is set to scan the entire chunk.
+	var ahoCOptions []ahocorasick.CoreOption
+	if e.scanEntireChunk {
+		ahoCOptions = append(ahoCOptions, ahocorasick.WithSpanCalculator(new(ahocorasick.EntireChunkSpanCalculator)))
+	}
+
+	ctx.Logger().V(4).Info("setting up aho-corasick core")
+	e.ahoCorasickCore = ahocorasick.NewAhoCorasickCore(e.detectors, ahoCOptions...)
+	ctx.Logger().V(4).Info("set up aho-corasick core")
+
+	return nil
 }

+type verificationOverlapTracker struct {
+	verificationOverlapDuplicateCount int
+	mu                                sync.Mutex
+}
+
+func (r *verificationOverlapTracker) increment() {
+	r.mu.Lock()
+	r.verificationOverlapDuplicateCount++
+	r.mu.Unlock()
+}
+
+const ignoreTag = "trufflehog:ignore"
+
 // HasFoundResults returns true if any results are found.
 func (e *Engine) HasFoundResults() bool {
 	return atomic.LoadUint32(&e.numFoundResults) > 0
@ -310,16 +537,6 @@ func (e *Engine) GetDetectorsMetrics() map[string]time.Duration {
 	return result
 }

-// getScanDuration returns the duration of the scan.
-// If the scan is still running, it returns the time since the scan started.
-func (m *Metrics) getScanDuration() time.Duration {
-	if m.ScanDuration == 0 {
-		return time.Since(m.scanStartTime)
-	}
-
-	return m.ScanDuration
-}
-
 // DetectorAvgTime returns the average time taken by each detector.
 func (e *Engine) DetectorAvgTime() map[string][]time.Duration {
 	logger := context.Background().Logger()
@ -344,147 +561,16 @@ func (e *Engine) DetectorAvgTime() map[string][]time.Duration {

 // Start initializes and activates the engine's processing pipeline.
 // It sets up various default configurations, prepares lookup structures for
-// detectors, conducts basic sanity checks, and kickstarts all necessary workers.
-// Once started, the engine begins processing input data to identify secrets.
-func Start(ctx context.Context, options ...Option) (*Engine, error) {
-	e := &Engine{}
-
-	if err := e.initialize(ctx, options...); err != nil {
-		return nil, err
-	}
-	e.initSourceManager(ctx)
-	e.setDefaults(ctx)
+// detectors, and kickstarts all necessary workers. Once started, the engine
+// begins processing input data to identify secrets.
+func (e *Engine) Start(ctx context.Context) {
+	e.metrics = runtimeMetrics{Metrics: Metrics{scanStartTime: time.Now()}}
 	e.sanityChecks(ctx)
 	e.startWorkers(ctx)
-
-	return e, nil
 }

 var defaultChannelBuffer = runtime.NumCPU()

-// initialize prepares the engine's internal structures. The LRU cache optimizes
-// deduplication efforts, allowing the engine to quickly check if a chunk has
-// been processed before, thereby saving computational overhead.
-func (e *Engine) initialize(ctx context.Context, options ...Option) error {
-	// TODO (ahrav): Determine the optimal cache size.
-	const cacheSize = 512 // number of entries in the LRU cache
-
-	cache, err := lru.New[string, detectorspb.DecoderType](cacheSize)
-	if err != nil {
-		return fmt.Errorf("failed to initialize LRU cache: %w", err)
-	}
-	const (
-		// detectableChunksChanMultiplier is set to accommodate a high number of concurrent worker goroutines.
-		// This multiplier ensures that the detectableChunksChan channel has sufficient buffer capacity
-		// to hold messages from multiple worker groups (detector workers/ verificationOverlap workers) without blocking.
-		// A large buffer helps accommodate for the fact workers are producing data at a faster rate
-		// than it can be consumed.
-		detectableChunksChanMultiplier = 50
-		// verificationOverlapChunksChanMultiplier uses a smaller buffer compared to detectableChunksChanMultiplier.
-		// This reflects the anticipated lower volume of data that needs re-verification.
-		// The buffer size is a trade-off between memory usage and the need to prevent blocking.
-		verificationOverlapChunksChanMultiplier = 25
-	)
-
-	// Channels are used for communication between different parts of the engine,
-	// ensuring that data flows smoothly without race conditions.
-	// The buffer sizes for these channels are set to multiples of defaultChannelBuffer,
-	// considering the expected concurrency and workload in the system.
-	e.detectableChunksChan = make(chan detectableChunk, defaultChannelBuffer*detectableChunksChanMultiplier)
-	e.notifyVerifiedResults = true
-	e.notifyUnknownResults = true
-	e.notifyUnverifiedResults = true
-	e.verificationOverlapChunksChan = make(
-		chan verificationOverlapChunk, defaultChannelBuffer*verificationOverlapChunksChanMultiplier,
-	)
-	e.results = make(chan detectors.ResultWithMetadata, defaultChannelBuffer)
-	e.dedupeCache = cache
-	e.printer = new(output.PlainPrinter)
-	e.metrics = runtimeMetrics{Metrics: Metrics{scanStartTime: time.Now()}}
-
-	for _, option := range options {
-		option(e)
-	}
-	ctx.Logger().V(4).Info("engine initialized")
-
-	// Configure the EntireChunkSpanCalculator if the engine is set to scan the entire chunk.
-	var ahoCOptions []ahocorasick.CoreOption
-	if e.scanEntireChunk {
-		ahoCOptions = append(ahoCOptions, ahocorasick.WithSpanCalculator(new(ahocorasick.EntireChunkSpanCalculator)))
-	}
-
-	ctx.Logger().V(4).Info("setting up aho-corasick core")
-	e.ahoCorasickCore = ahocorasick.NewAhoCorasickCore(e.detectors, ahoCOptions...)
-	ctx.Logger().V(4).Info("set up aho-corasick core")
-
-	return nil
-}
-
-func (e *Engine) initSourceManager(ctx context.Context) {
-	const defaultOutputBufferSize = 64
-
-	opts := []func(*sources.SourceManager){
-		sources.WithConcurrentSources(e.concurrency),
-		sources.WithConcurrentUnits(e.concurrency),
-		sources.WithSourceUnits(),
-		sources.WithBufferedOutput(defaultOutputBufferSize),
-	}
-	if e.jobReportWriter != nil {
-		unitHook, finishedMetrics := sources.NewUnitHook(ctx)
-		opts = append(opts, sources.WithReportHook(unitHook))
-		e.wgDetectorWorkers.Add(1)
-		go func() {
-			defer e.wgDetectorWorkers.Done()
-			defer func() {
-				e.jobReportWriter.Close()
-				// Add a bit of extra information if it's a *os.File.
-				if namer, ok := e.jobReportWriter.(interface{ Name() string }); ok {
-					ctx.Logger().Info("report written", "path", namer.Name())
-				} else {
-					ctx.Logger().Info("report written")
-				}
-			}()
-			for metrics := range finishedMetrics {
-				metrics.Errors = common.ExportErrors(metrics.Errors...)
-				details, err := json.Marshal(map[string]any{
-					"version": 1,
-					"data":    metrics,
-				})
-				if err != nil {
-					ctx.Logger().Error(err, "error marshalling job details")
-					continue
-				}
-				if _, err := e.jobReportWriter.Write(append(details, '\n')); err != nil {
-					ctx.Logger().Error(err, "error writing to file")
-				}
-			}
-		}()
-	}
-	e.sourceManager = sources.NewManager(opts...)
-}
-
-// setDefaults ensures that if specific engine properties aren't provided,
-// they're set to reasonable default values. It makes the engine robust to
-// incomplete configuration.
-func (e *Engine) setDefaults(ctx context.Context) {
-	if e.concurrency == 0 {
-		numCPU := runtime.NumCPU()
-		ctx.Logger().Info("No concurrency specified, defaulting to max", "cpu", numCPU)
-		e.concurrency = numCPU
-	}
-	ctx.Logger().V(3).Info("engine started", "workers", e.concurrency)
-
-	// Default decoders handle common encoding formats.
-	if len(e.decoders) == 0 {
-		e.decoders = decoders.DefaultDecoders()
-	}
-
-	if len(e.detectors) == 0 {
-		e.detectors = DefaultDetectors()
-	}
-	ctx.Logger().V(4).Info("default engine options set")
-}
-
 // Sanity check detectors for duplicate configuration. Only log in case
 // a detector has been configured in a way that isn't represented by
 // the DetectorID (type and version).
@ -504,19 +590,36 @@ func (e *Engine) sanityChecks(ctx context.Context) {
 // workers helps in scalability and makes it easier to diagnose issues.
 func (e *Engine) startWorkers(ctx context.Context) {
 	// Scanner workers process input data and extract chunks for detectors.
+	e.startScannerWorkers(ctx)
+
+	// Detector workers apply keyword matching, regexes and API calls to detect secrets in chunks.
+	e.startDetectorWorkers(ctx)
+
+	// verificationOverlap workers handle verification of chunks that have been detected by multiple detectors.
+	// They ensure that verification is disabled for any secrets that have been detected by multiple detectors.
+	e.startVerificationOverlapWorkers(ctx)
+
+	// ResultsDispatcher workers communicate detected issues to the user or any downstream systems.
+	// We want 1/4th of the notifier workers as the number of scanner workers.
+	e.startNotifierWorkers(ctx)
+}
+
+func (e *Engine) startScannerWorkers(ctx context.Context) {
 	ctx.Logger().V(2).Info("starting scanner workers", "count", e.concurrency)
 	for worker := uint64(0); worker < uint64(e.concurrency); worker++ {
 		e.workersWg.Add(1)
 		go func() {
-			ctx := context.WithValue(ctx, "secret_worker_id", common.RandomID(5))
+			ctx := context.WithValue(ctx, "scanner_worker_id", common.RandomID(5))
 			defer common.Recover(ctx)
 			defer e.workersWg.Done()
-			e.detectorWorker(ctx)
+			e.scannerWorker(ctx)
 		}()
 	}
+}

-	// Detector workers apply keyword matching, regexes and API calls to detect secrets in chunks.
-	const detectorWorkerMultiplier = 50
+const detectorWorkerMultiplier = 50
+
+func (e *Engine) startDetectorWorkers(ctx context.Context) {
 	ctx.Logger().V(2).Info("starting detector workers", "count", e.concurrency*detectorWorkerMultiplier)
 	for worker := uint64(0); worker < uint64(e.concurrency*detectorWorkerMultiplier); worker++ {
 		e.wgDetectorWorkers.Add(1)
@ -524,12 +627,12 @@ func (e *Engine) startWorkers(ctx context.Context) {
 			ctx := context.WithValue(ctx, "detector_worker_id", common.RandomID(5))
 			defer common.Recover(ctx)
 			defer e.wgDetectorWorkers.Done()
-			e.detectChunks(ctx)
+			e.detectorWorker(ctx)
 		}()
 	}
+}

-	// verificationOverlap workers handle verification of chunks that have been detected by multiple detectors.
-	// They ensure that verification is disabled for any secrets that have been detected by multiple detectors.
+func (e *Engine) startVerificationOverlapWorkers(ctx context.Context) {
 	const verificationOverlapWorkerMultiplier = detectorWorkerMultiplier
 	ctx.Logger().V(2).Info("starting verificationOverlap workers", "count", e.concurrency)
 	for worker := uint64(0); worker < uint64(e.concurrency*verificationOverlapWorkerMultiplier); worker++ {
@ -541,9 +644,9 @@ func (e *Engine) startWorkers(ctx context.Context) {
 			e.verificationOverlapWorker(ctx)
 		}()
 	}
+}

-	// Notifier workers communicate detected issues to the user or any downstream systems.
-	// We want 1/4th of the notifier workers as the number of scanner workers.
+func (e *Engine) startNotifierWorkers(ctx context.Context) {
 	const notifierWorkerRatio = 4
 	maxNotifierWorkers := 1
 	if numWorkers := e.concurrency / notifierWorkerRatio; numWorkers > 0 {
@ -556,7 +659,7 @@ func (e *Engine) startWorkers(ctx context.Context) {
 			ctx := context.WithValue(ctx, "notifier_worker_id", common.RandomID(5))
 			defer common.Recover(ctx)
 			defer e.WgNotifier.Done()
-			e.notifyResults(ctx)
+			e.notifierWorker(ctx)
 		}()
 	}
 }
@ -618,11 +721,12 @@ type verificationOverlapChunk struct {
 	verificationOverlapWgDoneFn func()
 }

-func (e *Engine) detectorWorker(ctx context.Context) {
+func (e *Engine) scannerWorker(ctx context.Context) {
 	var wgDetect sync.WaitGroup
 	var wgVerificationOverlap sync.WaitGroup

 	for chunk := range e.ChunksChan() {
+		sourceVerify := chunk.Verify
 		atomic.AddUint64(&e.metrics.BytesScanned, uint64(len(chunk.Data)))
 		for _, decoder := range e.decoders {
 			decoded := decoder.FromChunk(chunk)
@ -644,7 +748,7 @@ func (e *Engine) detectorWorker(ctx context.Context) {
 			}

 			for _, detector := range matchingDetectors {
-				decoded.Chunk.Verify = e.verify
+				decoded.Chunk.Verify = e.shouldVerifyChunk(sourceVerify, detector, e.detectorVerificationOverrides)
 				wgDetect.Add(1)
 				e.detectableChunksChan <- detectableChunk{
 					chunk:    *decoded.Chunk,
@ -664,6 +768,41 @@ func (e *Engine) detectorWorker(ctx context.Context) {
 	ctx.Logger().V(4).Info("finished scanning chunks")
 }

+func (e *Engine) shouldVerifyChunk(
+	sourceVerify bool,
+	detector detectors.Detector,
+	detectorVerificationOverrides map[config.DetectorID]bool,
+) bool {
+	// The verify flag takes precedence over the detector's verification flag.
+	if !e.verify {
+		return false
+	}
+
+	detectorId := config.DetectorID{ID: detector.Type(), Version: 0}
+
+	if v, ok := detector.(detectors.Versioner); ok {
+		detectorId.Version = v.Version()
+	}
+
+	if detectorVerify, ok := detectorVerificationOverrides[detectorId]; ok {
+		return detectorVerify
+	}
+
+	// If the user is running with a detector verification override that does not specify a particular detector version,
+	// then its override map entry will have version 0. We should check for that too, but if the detector being checked
+	// doesn't have any version information then its version is 0, so we've already done the check, and we don't need to
+	// do it a second time.
+	if detectorId.Version != 0 {
+		detectorId.Version = 0
+
+		if detectorVerify, ok := detectorVerificationOverrides[detectorId]; ok {
+			return detectorVerify
+		}
+	}
+
+	return sourceVerify
+}
+
 // chunkSecretKey ties secrets to the specific detector that found them. This allows identifying identical
 // credentials extracted by multiple different detectors processing the same chunk. Or duplicates found
 // by the same detector in the chunk. Exact matches on lookup indicate a duplicate secret for a detector
@ -720,6 +859,8 @@ func (e *Engine) verificationOverlapWorker(ctx context.Context) {

 	for chunk := range e.verificationOverlapChunksChan {
 		for _, detector := range chunk.detectors {
+			isFalsePositive := detectors.GetFalsePositiveCheck(detector.Detector)
+
 			// DO NOT VERIFY at this stage of the pipeline.
 			matchedBytes := detector.Matches()
 			for _, match := range matchedBytes {
@ -760,12 +901,17 @@ func (e *Engine) verificationOverlapWorker(ctx context.Context) {
 							e.verificationOverlapTracker.increment()
 						}
 						res.SetVerificationError(errOverlap)
-						e.processResult(ctx, detectableChunk{
-							chunk:    chunk.chunk,
-							detector: detector,
-							decoder:  chunk.decoder,
-							wgDoneFn: wgDetect.Done,
-						}, res)
+						e.processResult(
+							ctx,
+							detectableChunk{
+								chunk:    chunk.chunk,
+								detector: detector,
+								decoder:  chunk.decoder,
+								wgDoneFn: wgDetect.Done,
+							},
+							res,
+							isFalsePositive,
+						)

 						// Remove the detector key from the list of detector keys with results.
 						// This is to ensure that the chunk is not reprocessed with verification enabled
@ -779,7 +925,7 @@ func (e *Engine) verificationOverlapWorker(ctx context.Context) {

 		for _, detector := range detectorKeysWithResults {
 			wgDetect.Add(1)
-			chunk.chunk.Verify = e.verify
+			chunk.chunk.Verify = e.shouldVerifyChunk(chunk.chunk.Verify, detector, e.detectorVerificationOverrides)
 			e.detectableChunksChan <- detectableChunk{
 				chunk:    chunk.chunk,
 				detector: detector,
@ -802,7 +948,7 @@ func (e *Engine) verificationOverlapWorker(ctx context.Context) {
 	wgDetect.Wait()
 }

-func (e *Engine) detectChunks(ctx context.Context) {
+func (e *Engine) detectorWorker(ctx context.Context) {
 	for data := range e.detectableChunksChan {
 		e.detectChunk(ctx, data)
 	}
@ -817,6 +963,8 @@ func (e *Engine) detectChunk(ctx context.Context, data detectableChunk) {
 	defer common.Recover(ctx)
 	defer cancel()

+	isFalsePositive := detectors.GetFalsePositiveCheck(data.detector)
+
 	// To reduce the overhead of regex calls in the detector,
 	// we limit the amount of data passed to each detector.
 	// The matches field of the DetectorMatch struct contains the
@ -824,7 +972,7 @@ func (e *Engine) detectChunk(ctx context.Context, data detectableChunk) {
 	// This avoids the need for additional regex processing on the entire chunk data.
 	matchedBytes := data.detector.Matches()
 	for _, match := range matchedBytes {
-		results, err := data.detector.FromData(ctx, data.chunk.Verify, match)
+		results, err := data.detector.Detector.FromData(ctx, data.chunk.Verify, match)
 		if err != nil {
 			ctx.Logger().Error(err, "error scanning chunk")
 			continue
@ -848,16 +996,12 @@ func (e *Engine) detectChunk(ctx context.Context, data detectableChunk) {
 		results = e.filterResults(ctx, data.detector, results, e.logFilteredUnverified)

 		for _, res := range results {
-			e.processResult(ctx, data, res)
+			e.processResult(ctx, data, res, isFalsePositive)
 		}
 	}
 	data.wgDoneFn()
 }

-// filterResults applies multiple filters to the detection results to reduce false positives
-// and ensure the results meet specific criteria such as verification status and entropy level.
-// This function centralizes the filtering logic, making it reusable across different stages
-// of the detection pipeline.
 func (e *Engine) filterResults(
 	ctx context.Context,
 	detector detectors.Detector,
@ -868,13 +1012,18 @@ func (e *Engine) filterResults(
 		results = detectors.CleanResults(results)
 	}
 	results = detectors.FilterKnownFalsePositives(ctx, detector, results, logFilteredUnverified)
-	if e.filterEntropy != nil {
-		results = detectors.FilterResultsWithEntropy(ctx, results, *e.filterEntropy, logFilteredUnverified)
+	if e.filterEntropy != 0 {
+		results = detectors.FilterResultsWithEntropy(ctx, results, e.filterEntropy, logFilteredUnverified)
 	}
 	return results
 }

-func (e *Engine) processResult(ctx context.Context, data detectableChunk, res detectors.Result) {
+func (e *Engine) processResult(
+	ctx context.Context,
+	data detectableChunk,
+	res detectors.Result,
+	isFalsePositive func(detectors.Result) bool,
+) {
 	ignoreLinePresent := false
 	if SupportsLineNumbers(data.chunk.SourceType) {
 		copyChunk := data.chunk
@ -896,14 +1045,19 @@ func (e *Engine) processResult(ctx context.Context, data detectableChunk, res de

 	secret := detectors.CopyMetadata(&data.chunk, res)
 	secret.DecoderType = data.decoder
+
+	if !res.Verified && res.Raw != nil {
+		secret.IsWordlistFalsePositive = isFalsePositive(res)
+	}
+
 	e.results <- secret
 }

-func (e *Engine) notifyResults(ctx context.Context) {
-	for r := range e.ResultsChan() {
+func (e *Engine) notifierWorker(ctx context.Context) {
+	for result := range e.ResultsChan() {
 		// Filter unwanted results, based on `--results`.
-		if !r.Verified {
-			if r.VerificationError() != nil {
+		if !result.Verified {
+			if result.VerificationError() != nil {
 				if !e.notifyUnknownResults {
 					// Skip results with verification errors.
 					continue
@ -925,21 +1079,21 @@ func (e *Engine) notifyResults(ctx context.Context) {
 		// Duplicate results with the same decoder type SHOULD have their own entry in the
 		// results list, this would happen if the same secret is found multiple times.
 		// Note: If the source type is postman, we dedupe the results regardless of decoder type.
-		key := fmt.Sprintf("%s%s%s%+v", r.DetectorType.String(), r.Raw, r.RawV2, r.SourceMetadata)
-		if val, ok := e.dedupeCache.Get(key); ok && (val != r.DecoderType ||
-			r.SourceType == sourcespb.SourceType_SOURCE_TYPE_POSTMAN) {
+		key := fmt.Sprintf("%s%s%s%+v", result.DetectorType.String(), result.Raw, result.RawV2, result.SourceMetadata)
+		if val, ok := e.dedupeCache.Get(key); ok && (val != result.DecoderType ||
+			result.SourceType == sourcespb.SourceType_SOURCE_TYPE_POSTMAN) {
 			continue
 		}
-		e.dedupeCache.Add(key, r.DecoderType)
+		e.dedupeCache.Add(key, result.DecoderType)

-		if r.Verified {
+		if result.Verified {
 			atomic.AddUint64(&e.metrics.VerifiedSecretsFound, 1)
 		} else {
 			atomic.AddUint64(&e.metrics.UnverifiedSecretsFound, 1)
 		}

-		if err := e.printer.Print(ctx, &r); err != nil {
-			ctx.Logger().Error(err, "error printing result")
+		if err := e.dispatcher.Dispatch(ctx, result); err != nil {
+			ctx.Logger().Error(err, "error notifying result")
 		}
 	}
 }
--- a/pkg/engine/engine_test.go
+++ b/pkg/engine/engine_test.go
@ -13,6 +13,7 @@ import (

 	"github.com/stretchr/testify/assert"

+	"github.com/trufflesecurity/trufflehog/v3/pkg/detectors/gitlab/v2"
 	"github.com/trufflesecurity/trufflehog/v3/pkg/pb/detectorspb"

 	"github.com/trufflesecurity/trufflehog/v3/pkg/config"
@ -243,14 +244,27 @@ func TestEngine_DuplicateSecrets(t *testing.T) {
 	ctx, cancel := context.WithTimeout(ctx, 10*time.Second)
 	defer cancel()

-	e, err := Start(ctx,
-		WithConcurrency(1),
-		WithDecoders(decoders.DefaultDecoders()...),
-		WithDetectors(DefaultDetectors()...),
-		WithVerify(false),
-		WithPrinter(new(discardPrinter)),
-	)
-	assert.Nil(t, err)
+	const defaultOutputBufferSize = 64
+	opts := []func(*sources.SourceManager){
+		sources.WithSourceUnits(),
+		sources.WithBufferedOutput(defaultOutputBufferSize),
+	}
+
+	sourceManager := sources.NewManager(opts...)
+
+	conf := Config{
+		Concurrency:   1,
+		Decoders:      decoders.DefaultDecoders(),
+		Detectors:     DefaultDetectors(),
+		Verify:        false,
+		SourceManager: sourceManager,
+		Dispatcher:    NewPrinterDispatcher(new(discardPrinter)),
+	}
+
+	e, err := NewEngine(ctx, &conf)
+	assert.NoError(t, err)
+
+	e.Start(ctx)

 	cfg := sources.FilesystemConfig{Paths: []string{absPath}}
 	if err := e.ScanFileSystem(ctx, cfg); err != nil {
@ -277,15 +291,28 @@ func TestEngine_VersionedDetectorsVerifiedSecrets(t *testing.T) {
 	_, err = tmpFile.WriteString(fmt.Sprintf("test data using keyword %s", fakeDetectorKeyword))
 	assert.NoError(t, err)

-	e, err := Start(ctx,
-		WithConcurrency(1),
-		WithDecoders(decoders.DefaultDecoders()...),
-		WithDetectors(&fakeDetectorV1{}, &fakeDetectorV2{}),
-		WithVerify(true),
-		WithPrinter(new(discardPrinter)),
-	)
+	const defaultOutputBufferSize = 64
+	opts := []func(*sources.SourceManager){
+		sources.WithSourceUnits(),
+		sources.WithBufferedOutput(defaultOutputBufferSize),
+	}
+
+	sourceManager := sources.NewManager(opts...)
+
+	conf := Config{
+		Concurrency:   1,
+		Decoders:      decoders.DefaultDecoders(),
+		Detectors:     []detectors.Detector{new(fakeDetectorV1), new(fakeDetectorV2)},
+		Verify:        true,
+		SourceManager: sourceManager,
+		Dispatcher:    NewPrinterDispatcher(new(discardPrinter)),
+	}
+
+	e, err := NewEngine(ctx, &conf)
 	assert.NoError(t, err)

+	e.Start(ctx)
+
 	cfg := sources.FilesystemConfig{Paths: []string{tmpFile.Name()}}
 	if err := e.ScanFileSystem(ctx, cfg); err != nil {
 		return
@ -333,14 +360,28 @@ func TestEngine_CustomDetectorsDetectorsVerifiedSecrets(t *testing.T) {

 	ctx, cancel := context.WithTimeout(context.Background(), time.Second*5)
 	defer cancel()
-	e, err := Start(ctx,
-		WithConcurrency(1),
-		WithDecoders(decoders.DefaultDecoders()...),
-		WithDetectors(allDetectors...),
-		WithVerify(true),
-		WithPrinter(new(discardPrinter)),
-	)
-	assert.Nil(t, err)
+
+	const defaultOutputBufferSize = 64
+	opts := []func(*sources.SourceManager){
+		sources.WithSourceUnits(),
+		sources.WithBufferedOutput(defaultOutputBufferSize),
+	}
+
+	sourceManager := sources.NewManager(opts...)
+
+	conf := Config{
+		Concurrency:   1,
+		Decoders:      decoders.DefaultDecoders(),
+		Detectors:     allDetectors,
+		Verify:        true,
+		SourceManager: sourceManager,
+		Dispatcher:    NewPrinterDispatcher(new(discardPrinter)),
+	}
+
+	e, err := NewEngine(ctx, &conf)
+	assert.NoError(t, err)
+
+	e.Start(ctx)

 	cfg := sources.FilesystemConfig{Paths: []string{tmpFile.Name()}}
 	if err := e.ScanFileSystem(ctx, cfg); err != nil {
@ -367,15 +408,29 @@ func TestVerificationOverlapChunk(t *testing.T) {
 	conf, err := config.Read(confPath)
 	assert.Nil(t, err)

-	e, err := Start(ctx,
-		WithConcurrency(1),
-		WithDecoders(decoders.DefaultDecoders()...),
-		WithDetectors(conf.Detectors...),
-		WithVerify(false),
-		WithPrinter(new(discardPrinter)),
-		withVerificationOverlapTracking(),
-	)
-	assert.Nil(t, err)
+	const defaultOutputBufferSize = 64
+	opts := []func(*sources.SourceManager){
+		sources.WithSourceUnits(),
+		sources.WithBufferedOutput(defaultOutputBufferSize),
+	}
+
+	sourceManager := sources.NewManager(opts...)
+
+	c := Config{
+		Concurrency:   1,
+		Decoders:      decoders.DefaultDecoders(),
+		Detectors:     conf.Detectors,
+		Verify:        false,
+		SourceManager: sourceManager,
+		Dispatcher:    NewPrinterDispatcher(new(discardPrinter)),
+	}
+
+	e, err := NewEngine(ctx, &c)
+	assert.NoError(t, err)
+
+	e.verificationOverlapTracker = new(verificationOverlapTracker)
+
+	e.Start(ctx)

 	cfg := sources.FilesystemConfig{Paths: []string{absPath}}
 	if err := e.ScanFileSystem(ctx, cfg); err != nil {
@ -407,16 +462,30 @@ func TestVerificationOverlapChunkFalsePositive(t *testing.T) {
 	conf, err := config.Read(confPath)
 	assert.NoError(t, err)

-	e, err := Start(ctx,
-		WithConcurrency(1),
-		WithDecoders(decoders.DefaultDecoders()...),
-		WithDetectors(conf.Detectors...),
-		WithVerify(false),
-		WithPrinter(new(discardPrinter)),
-		withVerificationOverlapTracking(),
-	)
+	const defaultOutputBufferSize = 64
+	opts := []func(*sources.SourceManager){
+		sources.WithSourceUnits(),
+		sources.WithBufferedOutput(defaultOutputBufferSize),
+	}
+
+	sourceManager := sources.NewManager(opts...)
+
+	c := Config{
+		Concurrency:   1,
+		Decoders:      decoders.DefaultDecoders(),
+		Detectors:     conf.Detectors,
+		Verify:        false,
+		SourceManager: sourceManager,
+		Dispatcher:    NewPrinterDispatcher(new(discardPrinter)),
+	}
+
+	e, err := NewEngine(ctx, &c)
 	assert.NoError(t, err)

+	e.verificationOverlapTracker = new(verificationOverlapTracker)
+
+	e.Start(ctx)
+
 	cfg := sources.FilesystemConfig{Paths: []string{absPath}}
 	err = e.ScanFileSystem(ctx, cfg)
 	assert.NoError(t, err)
@ -757,3 +826,60 @@ func generateRandomDataWithKeywords(size int, detectors []detectors.Detector) st

 	return string(data)
 }
+
+func TestEngine_ShouldVerifyChunk(t *testing.T) {
+	tests := []struct {
+		name        string
+		detector    detectors.Detector
+		overrideKey config.DetectorID
+		want        func(sourceVerify, detectorVerify bool) bool
+	}{
+		{
+			name:        "detector override by exact version",
+			detector:    &gitlab.Scanner{},
+			overrideKey: config.DetectorID{ID: detectorspb.DetectorType_Gitlab, Version: 2},
+			want:        func(sourceVerify, detectorVerify bool) bool { return detectorVerify },
+		},
+		{
+			name:        "detector override by versionless config",
+			detector:    &gitlab.Scanner{},
+			overrideKey: config.DetectorID{ID: detectorspb.DetectorType_Gitlab, Version: 0},
+			want:        func(sourceVerify, detectorVerify bool) bool { return detectorVerify },
+		},
+		{
+			name:        "no detector override because of detector type mismatch",
+			detector:    &gitlab.Scanner{},
+			overrideKey: config.DetectorID{ID: detectorspb.DetectorType_NpmToken, Version: 2},
+			want:        func(sourceVerify, detectorVerify bool) bool { return sourceVerify },
+		},
+		{
+			name:        "no detector override because of detector version mismatch",
+			detector:    &gitlab.Scanner{},
+			overrideKey: config.DetectorID{ID: detectorspb.DetectorType_Gitlab, Version: 1},
+			want:        func(sourceVerify, detectorVerify bool) bool { return sourceVerify },
+		},
+	}
+
+	booleanChoices := [2]bool{true, false}
+
+	engine := &Engine{verify: true}
+
+	for _, tt := range tests {
+		for _, sourceVerify := range booleanChoices {
+			for _, detectorVerify := range booleanChoices {
+
+				t.Run(fmt.Sprintf("%s (source verify = %v, detector verify = %v)", tt.name, sourceVerify, detectorVerify), func(t *testing.T) {
+					overrides := map[config.DetectorID]bool{
+						tt.overrideKey: detectorVerify,
+					}
+
+					want := tt.want(sourceVerify, detectorVerify)
+
+					got := engine.shouldVerifyChunk(sourceVerify, tt.detector, overrides)
+
+					assert.Equal(t, want, got)
+				})
+			}
+		}
+	}
+}
--- a/pkg/engine/gcs_test.go
+++ b/pkg/engine/gcs_test.go
@ -61,13 +61,27 @@ func TestScanGCS(t *testing.T) {
 			ctx, cancel := context.WithCancel(context.TODO())
 			defer cancel()

-			e, err := Start(ctx,
-				WithConcurrency(1),
-				WithDecoders(decoders.DefaultDecoders()...),
-				WithDetectors(DefaultDetectors()...),
-				WithVerify(false),
-			)
-			assert.Nil(t, err)
+			const defaultOutputBufferSize = 64
+			opts := []func(*sources.SourceManager){
+				sources.WithSourceUnits(),
+				sources.WithBufferedOutput(defaultOutputBufferSize),
+			}
+
+			sourceManager := sources.NewManager(opts...)
+
+			conf := Config{
+				Concurrency:   1,
+				Decoders:      decoders.DefaultDecoders(),
+				Detectors:     DefaultDetectors(),
+				Verify:        false,
+				SourceManager: sourceManager,
+				Dispatcher:    NewPrinterDispatcher(new(discardPrinter)),
+			}
+
+			e, err := NewEngine(ctx, &conf)
+			assert.NoError(t, err)
+
+			e.Start(ctx)

 			go func() {
 				resultCount := 0
--- a/pkg/engine/git_test.go
+++ b/pkg/engine/git_test.go
@ -62,14 +62,27 @@ func TestGitEngine(t *testing.T) {
 		},
 	} {
 		t.Run(tName, func(t *testing.T) {
-			e, err := Start(ctx,
-				WithConcurrency(1),
-				WithDecoders(decoders.DefaultDecoders()...),
-				WithDetectors(DefaultDetectors()...),
-				WithVerify(true),
-				WithPrinter(new(discardPrinter)),
-			)
-			assert.Nil(t, err)
+			const defaultOutputBufferSize = 64
+			opts := []func(*sources.SourceManager){
+				sources.WithSourceUnits(),
+				sources.WithBufferedOutput(defaultOutputBufferSize),
+			}
+
+			sourceManager := sources.NewManager(opts...)
+
+			conf := Config{
+				Concurrency:   1,
+				Decoders:      decoders.DefaultDecoders(),
+				Detectors:     DefaultDetectors(),
+				Verify:        true,
+				SourceManager: sourceManager,
+				Dispatcher:    NewPrinterDispatcher(new(discardPrinter)),
+			}
+
+			e, err := NewEngine(ctx, &conf)
+			assert.NoError(t, err)
+
+			e.Start(ctx)

 			cfg := sources.GitConfig{
 				URI:      path,
@ -111,14 +124,25 @@ func BenchmarkGitEngine(b *testing.B) {
 	ctx, cancel := context.WithCancel(ctx)
 	defer cancel()

-	e, err := Start(ctx,
-		WithConcurrency(runtime.NumCPU()),
-		WithDecoders(decoders.DefaultDecoders()...),
-		WithDetectors(DefaultDetectors()...),
-		WithVerify(false),
-		WithPrinter(new(discardPrinter)),
-	)
-	assert.Nil(b, err)
+	const defaultOutputBufferSize = 64
+	opts := []func(*sources.SourceManager){
+		sources.WithSourceUnits(),
+		sources.WithBufferedOutput(defaultOutputBufferSize),
+	}
+
+	sourceManager := sources.NewManager(opts...)
+
+	conf := Config{
+		Concurrency:   runtime.NumCPU(),
+		Decoders:      decoders.DefaultDecoders(),
+		Detectors:     DefaultDetectors(),
+		Verify:        false,
+		SourceManager: sourceManager,
+		Dispatcher:    NewPrinterDispatcher(new(discardPrinter)),
+	}
+
+	e, err := NewEngine(ctx, &conf)
+	assert.NoError(b, err)

 	go func() {
 		resultCount := 0