package sources import ( "sync" "google.golang.org/protobuf/types/known/anypb" "github.com/trufflesecurity/trufflehog/v3/pkg/common" "github.com/trufflesecurity/trufflehog/v3/pkg/context" "github.com/trufflesecurity/trufflehog/v3/pkg/pb/source_metadatapb" "github.com/trufflesecurity/trufflehog/v3/pkg/pb/sourcespb" ) type ( SourceID int64 JobID int64 ) // Chunk contains data to be decoded and scanned along with context on where it came from. // // **Important:** The order of the fields in this struct is specifically designed to optimize // struct alignment and minimize memory usage. Do not change the field order without carefully considering // the potential impact on memory consumption. // Ex: https://go.dev/play/p/Azf4a7O-DhC type Chunk struct { // Data is the data to decode and scan. Data []byte // SourceName is the name of the Source that produced the chunk. SourceName string // SourceID is the ID of the source that the Chunk originated from. SourceID SourceID // JobID is the ID of the job that the Chunk originated from. JobID JobID // SecretID is the ID of the secret, if it exists. // Only secrets that are being reverified will have a SecretID. SecretID int64 // SourceMetadata holds the context of where the Chunk was found. SourceMetadata *source_metadatapb.MetaData // SourceType is the type of Source that produced the chunk. SourceType sourcespb.SourceType // Verify specifies whether any secrets in the Chunk should be verified. Verify bool } // ChunkingTarget specifies criteria for a targeted chunking process. // Instead of collecting data indiscriminately, this struct allows the caller // to specify particular subsets of data they're interested in. This becomes // especially useful when one needs to verify or recheck specific data points // without processing the entire dataset. type ChunkingTarget struct { // QueryCriteria represents specific parameters or conditions to target the chunking process. QueryCriteria *source_metadatapb.MetaData // SecretID is the ID of the secret. SecretID int64 } // Source defines the interface required to implement a source chunker. type Source interface { // Type returns the source type, used for matching against configuration and jobs. Type() sourcespb.SourceType // SourceID returns the initialized source ID used for tracking relationships in the DB. SourceID() SourceID // JobID returns the initialized job ID used for tracking relationships in the DB. JobID() JobID // Init initializes the source. Init(aCtx context.Context, name string, jobId JobID, sourceId SourceID, verify bool, connection *anypb.Any, concurrency int) error // Chunks emits data over a channel which is then decoded and scanned for secrets. // By default, data is obtained indiscriminately. However, by providing one or more // ChunkingTarget parameters, the caller can direct the function to retrieve // specific chunks of data. This targeted approach allows for efficient and // intentional data processing, beneficial when verifying or rechecking specific data points. Chunks(ctx context.Context, chunksChan chan *Chunk, targets ...ChunkingTarget) error // GetProgress is the completion progress (percentage) for Scanned Source. GetProgress() *Progress } // SourceUnitEnumChunker are the two required interfaces to support enumerating // and chunking of units. type SourceUnitEnumChunker interface { SourceUnitEnumerator SourceUnitChunker } // SourceUnitUnmarshaller defines an optional interface a Source can implement // to support units coming from an external source. type SourceUnitUnmarshaller interface { UnmarshalSourceUnit(data []byte) (SourceUnit, error) } // SourceUnitEnumerator defines an optional interface a Source can implement to // support enumerating an initialized Source into SourceUnits. type SourceUnitEnumerator interface { // Enumerate creates 0 or more units from an initialized source, // reporting them or any errors to the UnitReporter. This method is // synchronous but can be called in a goroutine to support concurrent // enumeration and chunking. An error should only be returned from this // method in the case of context cancellation, fatal source errors, or // errors returned by the reporter All other errors related to unit // enumeration are tracked by the UnitReporter. Enumerate(ctx context.Context, reporter UnitReporter) error } // UnitReporter defines the interface a source will use to report whether a // unit was found during enumeration. Either method may be called any number of // times. Implementors of this interface should allow for concurrent calls. type UnitReporter interface { UnitOk(ctx context.Context, unit SourceUnit) error UnitErr(ctx context.Context, err error) error } // SourceUnitChunker defines an optional interface a Source can implement to // support chunking a single SourceUnit. type SourceUnitChunker interface { // ChunkUnit creates 0 or more chunks from a unit, reporting them or // any errors to the ChunkReporter. An error should only be returned // from this method in the case of context cancellation, fatal source // errors, or errors returned by the reporter. All other errors related // to unit chunking are tracked by the ChunkReporter. ChunkUnit(ctx context.Context, unit SourceUnit, reporter ChunkReporter) error } // ChunkReporter defines the interface a source will use to report whether a // chunk was found during unit chunking. Either method may be called any number // of times. Implementors of this interface should allow for concurrent calls. type ChunkReporter interface { ChunkOk(ctx context.Context, chunk Chunk) error ChunkErr(ctx context.Context, err error) error } type SourceUnitKind string // SourceUnit is an object that represents a Source's unit of work. This is // used as the output of enumeration, progress reporting, and job distribution. type SourceUnit interface { // SourceUnitID uniquely identifies a source unit. It does not need to // be human readable or two-way, however, it should be canonical and // stable across runs. SourceUnitID() (string, SourceUnitKind) // Display is the human readable representation of the SourceUnit. Display() string } // DockerConfig defines the optional configuration for a Docker source. type DockerConfig struct { // Images is the list of images to scan. Images []string // BearerToken is the token to use to authenticate with the source. BearerToken string // UseDockerKeychain determines whether to use the Docker keychain. UseDockerKeychain bool } // GCSConfig defines the optional configuration for a GCS source. type GCSConfig struct { // CloudCred determines whether to use cloud credentials. // This can NOT be used with a secret. CloudCred, // WithoutAuth is a flag to indicate whether to use authentication. WithoutAuth bool // ApiKey is the API key to use to authenticate with the source. ApiKey, // ProjectID is the project ID to use to authenticate with the source. ProjectID, // ServiceAccount is the service account to use to authenticate with the source. ServiceAccount string // MaxObjectSize is the maximum object size to scan. MaxObjectSize int64 // Concurrency is the number of concurrent workers to use to scan the source. Concurrency int // IncludeBuckets is a list of buckets to include in the scan. IncludeBuckets, // ExcludeBuckets is a list of buckets to exclude from the scan. ExcludeBuckets, // IncludeObjects is a list of objects to include in the scan. IncludeObjects, // ExcludeObjects is a list of objects to exclude from the scan. ExcludeObjects []string } // GitConfig defines the optional configuration for a git source. type GitConfig struct { // HeadRef is the head reference to use to scan from. HeadRef string // BaseRef is the base reference to use to scan from. BaseRef string // MaxDepth is the maximum depth to scan the source. MaxDepth int // Bare is an indicator to handle bare repositories properly. Bare bool // URI is the URI of the repository to scan. file://, http://, https:// and ssh:// are supported. URI string // IncludePathsFile is the path to a file containing a list of regexps to include in the scan. IncludePathsFile string // ExcludePathsFile is the path to a file containing a list of regexps to exclude from the scan. ExcludePathsFile string // ExcludeGlobs is a list of comma separated globs to exclude from the scan. // This differs from the Filter exclusions as ExcludeGlobs is applied at the `git log -p` level ExcludeGlobs string // SkipBinaries allows skipping binary files from the scan. SkipBinaries bool } // GithubConfig defines the optional configuration for a github source. type GithubConfig struct { // Endpoint is the endpoint of the source. Endpoint string // Token is the token to use to authenticate with the source. Token string // IncludeForks indicates whether to include forks in the scan. IncludeForks bool // IncludeMembers indicates whether to include members in the scan. IncludeMembers bool // Concurrency is the number of concurrent workers to use to scan the source. Concurrency int // Repos is the list of repositories to scan. Repos []string // Orgs is the list of organizations to scan. Orgs []string // ExcludeRepos is a list of repositories to exclude from the scan. ExcludeRepos []string // IncludeRepos is a list of repositories to include in the scan. IncludeRepos []string // Filter is the filter to use to scan the source. Filter *common.Filter // IncludeIssueComments indicates whether to include GitHub issue comments in the scan. IncludeIssueComments bool // IncludePullRequestComments indicates whether to include GitHub pull request comments in the scan. IncludePullRequestComments bool // IncludeGistComments indicates whether to include GitHub gist comments in the scan. IncludeGistComments bool // SkipBinaries allows skipping binary files from the scan. SkipBinaries bool // IncludeWikis indicates whether to include repository wikis in the scan. IncludeWikis bool } // GitHubExperimentalConfig defines the optional configuration for an experimental GitHub source. type GitHubExperimentalConfig struct { // Repository is the repository to scan. Repository string // Token is the token to use to authenticate with the source. Token string // ObjectDiscovery indicates whether to discover all commit objects (CFOR) in the repository. ObjectDiscovery bool // CollisionThreshold is the number of short-sha collisions tolerated during hidden data enumeration. Default is 1. CollisionThreshold int // DeleteCachedData indicates whether to delete cached data. DeleteCachedData bool } // GitlabConfig defines the optional configuration for a gitlab source. type GitlabConfig struct { // Endpoint is the endpoint of the source. Endpoint string // Token is the token to use to authenticate with the source. Token string // Repos is the list of repositories to scan. Repos []string // Filter is the filter to use to scan the source. Filter *common.Filter // SkipBinaries allows skipping binary files from the scan. SkipBinaries bool } // FilesystemConfig defines the optional configuration for a filesystem source. type FilesystemConfig struct { // Paths is the list of files and directories to scan. Paths []string // IncludePathsFile is the path to a file containing a list of regexps to include in the scan. IncludePathsFile string // ExcludePathsFile is the path to a file containing a list of regexps to exclude from the scan. ExcludePathsFile string } // S3Config defines the optional configuration for an S3 source. type S3Config struct { // CloudCred determines whether to use cloud credentials. // This can NOT be used with a secret. CloudCred bool // Key is any key to use to authenticate with the source. Key, // Secret is any secret to use to authenticate with the source. Secret, // Temporary session token associated with a temporary access key id and secret key. SessionToken string // Buckets is the list of buckets to scan. Buckets []string // IgnoreBuckets is the list buckets to ignore. IgnoreBuckets []string // Roles is the list of Roles to use. Roles []string // MaxObjectSize is the maximum object size to scan. MaxObjectSize int64 } // SyslogConfig defines the optional configuration for a syslog source. type SyslogConfig struct { // Address used to connect to the source. Address, // Protocol used to connect to the source. Protocol, // CertPath is the path to the certificate to use to connect to the source. CertPath, // Format is the format used to connect to the source. Format, // KeyPath is the path to the key to use to connect to the source. KeyPath string // Concurrency is the number of concurrent workers to use to scan the source. Concurrency int } // PostmanConfig defines the optional configuration for a Postman source. type PostmanConfig struct { // Workspace UUID(s) or file path(s) to Postman workspace (.zip) Workspaces []string // Collection ID(s) or file path(s) to Postman collection (.json) Collections []string // Environment ID(s) or file path(s) to Postman environment (.json) Environments []string // Token is the token to use to authenticate with the API. Token string // IncludeCollections is a list of Collections to include in the scan. IncludeCollections []string // IncludeEnvironment is a list of Environments to include in the scan. IncludeEnvironments []string // ExcludeCollections is a list of Collections to exclude in the scan. ExcludeCollections []string // ExcludeEnvironment is a list of Environments to exclude in the scan. ExcludeEnvironments []string // Concurrency is the number of concurrent workers to use to scan the source. Concurrency int // CollectionPaths is the list of paths to Postman collections. CollectionPaths []string // WorkspacePaths is the list of paths to Postman workspaces. WorkspacePaths []string // EnvironmentPaths is the list of paths to Postman environments. EnvironmentPaths []string // Filter is the filter to use to scan the source. Filter *common.Filter } type ElasticsearchConfig struct { Nodes []string Username string Password string CloudID string APIKey string ServiceToken string IndexPattern string QueryJSON string SinceTimestamp string BestEffortScan bool } // Progress is used to update job completion progress across sources. type Progress struct { mut sync.Mutex PercentComplete int64 Message string EncodedResumeInfo string SectionsCompleted int32 SectionsRemaining int32 } // Validator is an interface for validating a source. Sources can optionally implement this interface to validate // their configuration. type Validator interface { Validate(ctx context.Context) []error } // SetProgressComplete sets job progress information for a running job based on the highest level objects in the source. // i is the current iteration in the loop of target scope // scope should be the len(scopedItems) // message is the public facing user information about the current progress // encodedResumeInfo is an optional string representing any information necessary to resume the job if interrupted // // NOTE: SetProgressOngoing should be used when source does not yet know how many items it is scanning (scope) // and does not want to display a percentage complete func (p *Progress) SetProgressComplete(i, scope int, message, encodedResumeInfo string) { p.mut.Lock() defer p.mut.Unlock() p.Message = message p.EncodedResumeInfo = encodedResumeInfo p.SectionsCompleted = int32(i) p.SectionsRemaining = int32(scope) // If the iteration and scope are both 0, completion is 100%. if i == 0 && scope == 0 { p.PercentComplete = 100 return } p.PercentComplete = int64((float64(i) / float64(scope)) * 100) } // SetProgressOngoing sets information about the current running job based on // the highest level objects in the source. // message is the public facing user information about the current progress // encodedResumeInfo is an optional string representing any information necessary to resume the job if interrupted // // NOTE: This method should be used over SetProgressComplete when the source does // not yet know how many items it is scanning and does not want to display a percentage complete. func (p *Progress) SetProgressOngoing(message string, encodedResumeInfo string) { p.mut.Lock() defer p.mut.Unlock() p.Message = message p.EncodedResumeInfo = encodedResumeInfo // Explicitly set SectionsRemaining to 0 so the frontend does not display a percent. p.SectionsRemaining = 0 } // GetProgress gets job completion percentage for metrics reporting. func (p *Progress) GetProgress() *Progress { p.mut.Lock() defer p.mut.Unlock() return p }