mirror of
https://github.com/trufflesecurity/trufflehog.git
synced 2024-11-10 07:04:24 +00:00
CFOR Commit Scanner (#3145)
* alpha feature for scanning hidden commits on github * improvements re: git operations * lint updates * updating with exec block due to no gh token * reworked logic into new source * fixed collisions threshold flag input * fixed IOutil issues * removed additions from GH config --------- Co-authored-by: Joe Leon <joe.leon@trufflesec.com>
This commit is contained in:
parent
38e844f968
commit
7d606e2480
11 changed files with 2546 additions and 1222 deletions
21
main.go
21
main.go
|
@ -150,6 +150,16 @@ var (
|
|||
githubScanPRComments = githubScan.Flag("pr-comments", "Include pull request descriptions and comments in scan.").Bool()
|
||||
githubScanGistComments = githubScan.Flag("gist-comments", "Include gist comments in scan.").Bool()
|
||||
|
||||
// GitHub Cross Fork Object Reference Experimental Feature
|
||||
githubExperimentalScan = cli.Command("github-experimental", "Run an experimental GitHub scan. Must specify at least one experimental sub-module to run: object-discovery.")
|
||||
// GitHub Experimental SubModules
|
||||
githubExperimentalObjectDiscovery = githubExperimentalScan.Flag("object-discovery", "Discover hidden data objects in GitHub repositories.").Bool()
|
||||
// GitHub Experimental Options
|
||||
githubExperimentalToken = githubExperimentalScan.Flag("token", "GitHub token. Can be provided with environment variable GITHUB_TOKEN.").Envar("GITHUB_TOKEN").String()
|
||||
githubExperimentalRepo = githubExperimentalScan.Flag("repo", "GitHub repository to scan. Example: https://github.com/<user>/<repo>.git").Required().String()
|
||||
githubExperimentalCollisionThreshold = githubExperimentalScan.Flag("collision-threshold", "Threshold for short-sha collisions in object-discovery submodule. Default is 1.").Default("1").Int()
|
||||
githubExperimentalDeleteCache = githubExperimentalScan.Flag("delete-cached-data", "Delete cached data after object-discovery secret scanning.").Bool()
|
||||
|
||||
gitlabScan = cli.Command("gitlab", "Find credentials in GitLab repositories.")
|
||||
// TODO: Add more GitLab options
|
||||
gitlabScanEndpoint = gitlabScan.Flag("endpoint", "GitLab endpoint.").Default("https://gitlab.com").String()
|
||||
|
@ -667,6 +677,17 @@ func runSingleScan(ctx context.Context, cmd string, cfg engine.Config) (metrics,
|
|||
if err := eng.ScanGitHub(ctx, cfg); err != nil {
|
||||
return scanMetrics, fmt.Errorf("failed to scan Github: %v", err)
|
||||
}
|
||||
case githubExperimentalScan.FullCommand():
|
||||
cfg := sources.GitHubExperimentalConfig{
|
||||
Token: *githubExperimentalToken,
|
||||
Repository: *githubExperimentalRepo,
|
||||
ObjectDiscovery: *githubExperimentalObjectDiscovery,
|
||||
CollisionThreshold: *githubExperimentalCollisionThreshold,
|
||||
DeleteCachedData: *githubExperimentalDeleteCache,
|
||||
}
|
||||
if err := eng.ScanGitHubExperimental(ctx, cfg); err != nil {
|
||||
return scanMetrics, fmt.Errorf("failed to scan using Github Experimental: %v", err)
|
||||
}
|
||||
case gitlabScan.FullCommand():
|
||||
filter, err := common.FilterFromFiles(*gitlabScanIncludePaths, *gitlabScanExcludePaths)
|
||||
if err != nil {
|
||||
|
|
65
pkg/engine/github_experimental.go
Normal file
65
pkg/engine/github_experimental.go
Normal file
|
@ -0,0 +1,65 @@
|
|||
package engine
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"runtime"
|
||||
|
||||
gogit "github.com/go-git/go-git/v5"
|
||||
"google.golang.org/protobuf/proto"
|
||||
"google.golang.org/protobuf/types/known/anypb"
|
||||
|
||||
"github.com/trufflesecurity/trufflehog/v3/pkg/context"
|
||||
"github.com/trufflesecurity/trufflehog/v3/pkg/pb/sourcespb"
|
||||
"github.com/trufflesecurity/trufflehog/v3/pkg/sources"
|
||||
"github.com/trufflesecurity/trufflehog/v3/pkg/sources/git"
|
||||
"github.com/trufflesecurity/trufflehog/v3/pkg/sources/github"
|
||||
"github.com/trufflesecurity/trufflehog/v3/pkg/sources/github_experimental"
|
||||
)
|
||||
|
||||
// ScanGitHubExperimental scans GitHub using an experimental feature. Consider all functionality to be in an alpha release here.
|
||||
func (e *Engine) ScanGitHubExperimental(ctx context.Context, c sources.GitHubExperimentalConfig) error {
|
||||
connection := sourcespb.GitHubExperimental{
|
||||
Repository: c.Repository,
|
||||
ObjectDiscovery: c.ObjectDiscovery,
|
||||
CollisionThreshold: int64(c.CollisionThreshold),
|
||||
DeleteCachedData: c.DeleteCachedData,
|
||||
}
|
||||
|
||||
// Check at least one experimental sub-module is being used.
|
||||
// Add to this list as more experimental sub-modules are added.
|
||||
if !c.ObjectDiscovery {
|
||||
return fmt.Errorf("at least one experimental submodule must be enabled")
|
||||
}
|
||||
|
||||
if len(c.Token) > 0 {
|
||||
connection.Credential = &sourcespb.GitHubExperimental_Token{
|
||||
Token: c.Token,
|
||||
}
|
||||
} else {
|
||||
return fmt.Errorf("token is required for github experimental")
|
||||
}
|
||||
|
||||
var conn anypb.Any
|
||||
err := anypb.MarshalFrom(&conn, &connection, proto.MarshalOptions{})
|
||||
if err != nil {
|
||||
ctx.Logger().Error(err, "failed to marshal github experimental connection")
|
||||
return err
|
||||
}
|
||||
|
||||
logOptions := &gogit.LogOptions{}
|
||||
opts := []git.ScanOption{
|
||||
git.ScanOptionLogOptions(logOptions),
|
||||
}
|
||||
scanOptions := git.NewScanOptions(opts...)
|
||||
|
||||
sourceName := "trufflehog - github experimental (alpha release)"
|
||||
sourceID, jobID, _ := e.sourceManager.GetIDs(ctx, sourceName, github.SourceType)
|
||||
|
||||
githubExperimentalSource := &github_experimental.Source{}
|
||||
if err := githubExperimentalSource.Init(ctx, sourceName, jobID, sourceID, true, &conn, runtime.NumCPU()); err != nil {
|
||||
return err
|
||||
}
|
||||
githubExperimentalSource.WithScanOptions(scanOptions)
|
||||
_, err = e.sourceManager.Run(ctx, sourceName, githubExperimentalSource)
|
||||
return err
|
||||
}
|
File diff suppressed because it is too large
Load diff
File diff suppressed because it is too large
Load diff
|
@ -2759,6 +2759,133 @@ var _ interface {
|
|||
ErrorName() string
|
||||
} = GitHubValidationError{}
|
||||
|
||||
// Validate checks the field values on GitHubExperimental with the rules
|
||||
// defined in the proto definition for this message. If any rules are
|
||||
// violated, the first error encountered is returned, or nil if there are no violations.
|
||||
func (m *GitHubExperimental) Validate() error {
|
||||
return m.validate(false)
|
||||
}
|
||||
|
||||
// ValidateAll checks the field values on GitHubExperimental with the rules
|
||||
// defined in the proto definition for this message. If any rules are
|
||||
// violated, the result is a list of violation errors wrapped in
|
||||
// GitHubExperimentalMultiError, or nil if none found.
|
||||
func (m *GitHubExperimental) ValidateAll() error {
|
||||
return m.validate(true)
|
||||
}
|
||||
|
||||
func (m *GitHubExperimental) validate(all bool) error {
|
||||
if m == nil {
|
||||
return nil
|
||||
}
|
||||
|
||||
var errors []error
|
||||
|
||||
// no validation rules for Repository
|
||||
|
||||
// no validation rules for ObjectDiscovery
|
||||
|
||||
// no validation rules for CollisionThreshold
|
||||
|
||||
// no validation rules for DeleteCachedData
|
||||
|
||||
switch v := m.Credential.(type) {
|
||||
case *GitHubExperimental_Token:
|
||||
if v == nil {
|
||||
err := GitHubExperimentalValidationError{
|
||||
field: "Credential",
|
||||
reason: "oneof value cannot be a typed-nil",
|
||||
}
|
||||
if !all {
|
||||
return err
|
||||
}
|
||||
errors = append(errors, err)
|
||||
}
|
||||
// no validation rules for Token
|
||||
default:
|
||||
_ = v // ensures v is used
|
||||
}
|
||||
|
||||
if len(errors) > 0 {
|
||||
return GitHubExperimentalMultiError(errors)
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
// GitHubExperimentalMultiError is an error wrapping multiple validation errors
|
||||
// returned by GitHubExperimental.ValidateAll() if the designated constraints
|
||||
// aren't met.
|
||||
type GitHubExperimentalMultiError []error
|
||||
|
||||
// Error returns a concatenation of all the error messages it wraps.
|
||||
func (m GitHubExperimentalMultiError) Error() string {
|
||||
var msgs []string
|
||||
for _, err := range m {
|
||||
msgs = append(msgs, err.Error())
|
||||
}
|
||||
return strings.Join(msgs, "; ")
|
||||
}
|
||||
|
||||
// AllErrors returns a list of validation violation errors.
|
||||
func (m GitHubExperimentalMultiError) AllErrors() []error { return m }
|
||||
|
||||
// GitHubExperimentalValidationError is the validation error returned by
|
||||
// GitHubExperimental.Validate if the designated constraints aren't met.
|
||||
type GitHubExperimentalValidationError struct {
|
||||
field string
|
||||
reason string
|
||||
cause error
|
||||
key bool
|
||||
}
|
||||
|
||||
// Field function returns field value.
|
||||
func (e GitHubExperimentalValidationError) Field() string { return e.field }
|
||||
|
||||
// Reason function returns reason value.
|
||||
func (e GitHubExperimentalValidationError) Reason() string { return e.reason }
|
||||
|
||||
// Cause function returns cause value.
|
||||
func (e GitHubExperimentalValidationError) Cause() error { return e.cause }
|
||||
|
||||
// Key function returns key value.
|
||||
func (e GitHubExperimentalValidationError) Key() bool { return e.key }
|
||||
|
||||
// ErrorName returns error name.
|
||||
func (e GitHubExperimentalValidationError) ErrorName() string {
|
||||
return "GitHubExperimentalValidationError"
|
||||
}
|
||||
|
||||
// Error satisfies the builtin error interface
|
||||
func (e GitHubExperimentalValidationError) Error() string {
|
||||
cause := ""
|
||||
if e.cause != nil {
|
||||
cause = fmt.Sprintf(" | caused by: %v", e.cause)
|
||||
}
|
||||
|
||||
key := ""
|
||||
if e.key {
|
||||
key = "key for "
|
||||
}
|
||||
|
||||
return fmt.Sprintf(
|
||||
"invalid %sGitHubExperimental.%s: %s%s",
|
||||
key,
|
||||
e.field,
|
||||
e.reason,
|
||||
cause)
|
||||
}
|
||||
|
||||
var _ error = GitHubExperimentalValidationError{}
|
||||
|
||||
var _ interface {
|
||||
Field() string
|
||||
Reason() string
|
||||
Key() bool
|
||||
Cause() error
|
||||
ErrorName() string
|
||||
} = GitHubExperimentalValidationError{}
|
||||
|
||||
// Validate checks the field values on GoogleDrive with the rules defined in
|
||||
// the proto definition for this message. If any rules are violated, the first
|
||||
// error encountered is returned, or nil if there are no violations.
|
||||
|
|
|
@ -310,7 +310,7 @@ func (s *Source) Validate(ctx context.Context) []error {
|
|||
errs = append(errs, fmt.Errorf("error creating GitHub client: %+v", err))
|
||||
}
|
||||
default:
|
||||
errs = append(errs, fmt.Errorf("Invalid configuration given for source. Name: %s, Type: %s", s.name, s.Type()))
|
||||
errs = append(errs, fmt.Errorf("invalid configuration given for source. Name: %s, Type: %s", s.name, s.Type()))
|
||||
}
|
||||
|
||||
// Run a simple query to check if the client is actually valid
|
||||
|
@ -400,7 +400,7 @@ func (s *Source) enumerate(ctx context.Context, apiEndpoint string) (*github.Cli
|
|||
}
|
||||
default:
|
||||
// TODO: move this error to Init
|
||||
return nil, fmt.Errorf("Invalid configuration given for source. Name: %s, Type: %s", s.name, s.Type())
|
||||
return nil, fmt.Errorf("invalid configuration given for source. Name: %s, Type: %s", s.name, s.Type())
|
||||
}
|
||||
|
||||
s.repos = make([]string, 0, s.filteredRepoCache.Count())
|
||||
|
|
224
pkg/sources/github_experimental/github_experimental.go
Normal file
224
pkg/sources/github_experimental/github_experimental.go
Normal file
|
@ -0,0 +1,224 @@
|
|||
package github_experimental
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"net/http"
|
||||
"strings"
|
||||
|
||||
"github.com/go-logr/logr"
|
||||
"github.com/google/go-github/v63/github"
|
||||
"golang.org/x/sync/errgroup"
|
||||
"google.golang.org/protobuf/proto"
|
||||
"google.golang.org/protobuf/types/known/anypb"
|
||||
|
||||
"github.com/trufflesecurity/trufflehog/v3/pkg/common"
|
||||
"github.com/trufflesecurity/trufflehog/v3/pkg/context"
|
||||
"github.com/trufflesecurity/trufflehog/v3/pkg/giturl"
|
||||
"github.com/trufflesecurity/trufflehog/v3/pkg/pb/source_metadatapb"
|
||||
"github.com/trufflesecurity/trufflehog/v3/pkg/pb/sourcespb"
|
||||
"github.com/trufflesecurity/trufflehog/v3/pkg/sanitizer"
|
||||
"github.com/trufflesecurity/trufflehog/v3/pkg/sources"
|
||||
"github.com/trufflesecurity/trufflehog/v3/pkg/sources/git"
|
||||
)
|
||||
|
||||
const (
|
||||
SourceType = sourcespb.SourceType_SOURCE_TYPE_GITHUB_EXPERIMENTAL
|
||||
|
||||
// unauthGithubOrgRateLimt = 30
|
||||
// defaultPagination = 100
|
||||
// membersAppPagination = 500
|
||||
)
|
||||
|
||||
type Source struct {
|
||||
name string
|
||||
|
||||
// Protects the user and token.
|
||||
//userMu sync.Mutex
|
||||
//githubUser string
|
||||
//githubToken string
|
||||
|
||||
sourceID sources.SourceID
|
||||
jobID sources.JobID
|
||||
verify bool
|
||||
//orgsCache cache.Cache[string]
|
||||
//memberCache map[string]struct{}
|
||||
//repos []string
|
||||
//filteredRepoCache *filteredRepoCache
|
||||
repoInfoCache repoInfoCache
|
||||
//totalRepoSize int // total size of all repos in kb
|
||||
|
||||
useCustomContentWriter bool
|
||||
git *git.Git
|
||||
|
||||
scanOptions *git.ScanOptions
|
||||
|
||||
httpClient *http.Client
|
||||
log logr.Logger
|
||||
conn *sourcespb.GitHubExperimental
|
||||
jobPool *errgroup.Group
|
||||
apiClient *github.Client
|
||||
|
||||
sources.Progress
|
||||
sources.CommonSourceUnitUnmarshaller
|
||||
}
|
||||
|
||||
// WithCustomContentWriter sets the useCustomContentWriter flag on the source.
|
||||
func (s *Source) WithCustomContentWriter() { s.useCustomContentWriter = true }
|
||||
|
||||
func (s *Source) WithScanOptions(scanOptions *git.ScanOptions) {
|
||||
s.scanOptions = scanOptions
|
||||
}
|
||||
|
||||
// Ensure the Source satisfies the interfaces at compile time
|
||||
var _ sources.Source = (*Source)(nil)
|
||||
var _ sources.SourceUnitUnmarshaller = (*Source)(nil)
|
||||
|
||||
// Type returns the type of source.
|
||||
// It is used for matching source types in configuration and job input.
|
||||
func (s *Source) Type() sourcespb.SourceType {
|
||||
return SourceType
|
||||
}
|
||||
|
||||
func (s *Source) SourceID() sources.SourceID {
|
||||
return s.sourceID
|
||||
}
|
||||
|
||||
func (s *Source) JobID() sources.JobID {
|
||||
return s.jobID
|
||||
}
|
||||
|
||||
// Init returns an initialized GitHubExperimental source.
|
||||
func (s *Source) Init(aCtx context.Context, name string, jobID sources.JobID, sourceID sources.SourceID, verify bool, connection *anypb.Any, concurrency int) error {
|
||||
err := git.CmdCheck()
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
s.log = aCtx.Logger()
|
||||
|
||||
s.name = name
|
||||
s.sourceID = sourceID
|
||||
s.jobID = jobID
|
||||
s.verify = verify
|
||||
s.jobPool = &errgroup.Group{}
|
||||
s.jobPool.SetLimit(concurrency)
|
||||
|
||||
s.httpClient = common.RetryableHTTPClientTimeout(60)
|
||||
s.apiClient = github.NewClient(s.httpClient)
|
||||
|
||||
var conn sourcespb.GitHubExperimental
|
||||
err = anypb.UnmarshalTo(connection, &conn, proto.UnmarshalOptions{})
|
||||
if err != nil {
|
||||
return fmt.Errorf("error unmarshalling connection: %w", err)
|
||||
}
|
||||
s.conn = &conn
|
||||
s.conn.Repository, err = s.normalizeRepo(s.conn.Repository)
|
||||
if err != nil {
|
||||
return fmt.Errorf("error normalizing repo: %w", err)
|
||||
}
|
||||
|
||||
s.repoInfoCache = newRepoInfoCache()
|
||||
|
||||
cfg := &git.Config{
|
||||
SourceName: s.name,
|
||||
JobID: s.jobID,
|
||||
SourceID: s.sourceID,
|
||||
SourceType: s.Type(),
|
||||
Verify: s.verify,
|
||||
SkipBinaries: false,
|
||||
SkipArchives: false,
|
||||
Concurrency: concurrency,
|
||||
SourceMetadataFunc: func(file, email, commit, timestamp, repository string, line int64) *source_metadatapb.MetaData {
|
||||
return &source_metadatapb.MetaData{
|
||||
Data: &source_metadatapb.MetaData_Github{
|
||||
Github: &source_metadatapb.Github{
|
||||
Commit: sanitizer.UTF8(commit),
|
||||
File: sanitizer.UTF8(file),
|
||||
Email: sanitizer.UTF8(email),
|
||||
Repository: sanitizer.UTF8(repository),
|
||||
Link: giturl.GenerateLink(repository, commit, file, line),
|
||||
Timestamp: sanitizer.UTF8(timestamp),
|
||||
Line: line,
|
||||
Visibility: s.visibilityOf(aCtx, repository),
|
||||
},
|
||||
},
|
||||
}
|
||||
},
|
||||
UseCustomContentWriter: s.useCustomContentWriter,
|
||||
}
|
||||
s.git = git.NewGit(cfg)
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
func (s *Source) visibilityOf(ctx context.Context, repoURL string) source_metadatapb.Visibility {
|
||||
// It isn't possible to get the visibility of a wiki.
|
||||
// We must use the visibility of the corresponding repository.
|
||||
if strings.HasSuffix(repoURL, ".wiki.git") {
|
||||
repoURL = strings.TrimSuffix(repoURL, ".wiki.git") + ".git"
|
||||
}
|
||||
|
||||
repoInfo, ok := s.repoInfoCache.get(repoURL)
|
||||
if !ok {
|
||||
// This should never happen.
|
||||
err := fmt.Errorf("no repoInfo for URL: %s", repoURL)
|
||||
ctx.Logger().Error(err, "failed to get repository visibility")
|
||||
return source_metadatapb.Visibility_unknown
|
||||
}
|
||||
|
||||
return repoInfo.visibility
|
||||
}
|
||||
|
||||
// Chunks emits chunks of bytes over a channel.
|
||||
func (s *Source) Chunks(ctx context.Context, chunksChan chan *sources.Chunk, targets ...sources.ChunkingTarget) error {
|
||||
if s.conn.ObjectDiscovery {
|
||||
err := s.EnumerateAndScanAllObjects(ctx, chunksChan)
|
||||
return err
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func getRepoURLParts(repoURLString string) (string, []string, error) {
|
||||
// Support ssh and https URLs.
|
||||
repoURL, err := git.GitURLParse(repoURLString)
|
||||
if err != nil {
|
||||
return "", nil, err
|
||||
}
|
||||
|
||||
// Remove the user information.
|
||||
// e.g., `git@github.com` -> `github.com`
|
||||
if repoURL.User != nil {
|
||||
repoURL.User = nil
|
||||
}
|
||||
|
||||
urlString := repoURL.String()
|
||||
trimmedURL := strings.TrimPrefix(urlString, repoURL.Scheme+"://")
|
||||
trimmedURL = strings.TrimSuffix(trimmedURL, ".git")
|
||||
urlParts := strings.Split(trimmedURL, "/")
|
||||
|
||||
// Validate
|
||||
switch len(urlParts) {
|
||||
case 2:
|
||||
// gist.github.com/<gist_id>
|
||||
if !strings.EqualFold(urlParts[0], "gist.github.com") {
|
||||
err = fmt.Errorf("failed to parse repository or gist URL (%s): 2 path segments are only expected if the host is 'gist.github.com' ('gist.github.com', '<gist_id>')", urlString)
|
||||
}
|
||||
case 3:
|
||||
// github.com/<user>/repo>
|
||||
// gist.github.com/<user>/<gist_id>
|
||||
// github.company.org/<user>/repo>
|
||||
// github.company.org/gist/<gist_id>
|
||||
case 4:
|
||||
// github.company.org/gist/<user/<id>
|
||||
if !strings.EqualFold(urlParts[1], "gist") || (strings.EqualFold(urlParts[0], "github.com") && strings.EqualFold(urlParts[1], "gist")) {
|
||||
err = fmt.Errorf("failed to parse repository or gist URL (%s): 4 path segments are only expected if the host isn't 'github.com' and the path starts with 'gist' ('github.example.com', 'gist', '<owner>', '<gist_id>')", urlString)
|
||||
}
|
||||
default:
|
||||
err = fmt.Errorf("invalid repository or gist URL (%s): length of URL segments should be between 2 and 4, not %d (%v)", urlString, len(urlParts), urlParts)
|
||||
}
|
||||
|
||||
if err != nil {
|
||||
return "", nil, err
|
||||
}
|
||||
return urlString, urlParts, nil
|
||||
}
|
654
pkg/sources/github_experimental/object_discovery.go
Normal file
654
pkg/sources/github_experimental/object_discovery.go
Normal file
|
@ -0,0 +1,654 @@
|
|||
package github_experimental
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
"io"
|
||||
"net/http"
|
||||
"os"
|
||||
"os/exec"
|
||||
"strings"
|
||||
"time"
|
||||
|
||||
"github.com/google/go-github/v63/github"
|
||||
"github.com/trufflesecurity/trufflehog/v3/pkg/context"
|
||||
"github.com/trufflesecurity/trufflehog/v3/pkg/sources"
|
||||
"github.com/trufflesecurity/trufflehog/v3/pkg/sources/git"
|
||||
"golang.org/x/oauth2"
|
||||
)
|
||||
|
||||
// Assumption: sleeping for 60 seconds is enough to reset the secondary rate limit
|
||||
// see https://docs.github.com/en/graphql/overview/rate-limits-and-node-limits-for-the-graphql-api#secondary-rate-limits
|
||||
const secondaryRateLimitSleep = 60
|
||||
|
||||
// Assumption: on average, a fork contributes 0.1% additional commits
|
||||
const forkCommitMultiplier = 0.001
|
||||
|
||||
// Threshold for estimated Short SHA-1 hash collisions (default to 1...so basically none)
|
||||
// as calculated using the Birthday Paradox
|
||||
// Adjust this to a higher value if you're willing to accept more collisions (and shorter runtime).
|
||||
var collisionThreshold float64
|
||||
|
||||
// Starting character length (4 is the minimum required by git)
|
||||
const startingCharLen = 4
|
||||
|
||||
// Max character length (6 is the default maximum)
|
||||
// 6 chars == 16M possibilities --> which will take 18k-55k queries.
|
||||
// that's really the max that's tolerable since it will take a long time to run.
|
||||
// If you increase this to accomdate a MASSIVE repository, it will take a long time to run.
|
||||
const maxCharLen = 6
|
||||
|
||||
// Starting GraphQL query chunk size.
|
||||
// Max that worked was 900.
|
||||
// 350 is a safe starting point.
|
||||
const maxChunkSize = 900
|
||||
const initialChunkSize = 350
|
||||
|
||||
// Max number of commits to fetch from the repository in one command
|
||||
// ex: git fetch origin <commit1> <commit2> ... <commit1000>
|
||||
const gitFetchMax = 1000
|
||||
|
||||
// Constants for commit types
|
||||
const (
|
||||
invalidCommit = "invalid"
|
||||
validHiddenCommit = "valid_hidden"
|
||||
)
|
||||
|
||||
type backoff struct {
|
||||
value float64
|
||||
decreasePercentage float64
|
||||
increasePercentage float64
|
||||
successThreshold int
|
||||
successCount int
|
||||
}
|
||||
|
||||
func newBackoff(initialValue, decreasePercentage, increasePercentage float64, successThreshold int) *backoff {
|
||||
return &backoff{
|
||||
value: initialValue,
|
||||
decreasePercentage: decreasePercentage,
|
||||
increasePercentage: increasePercentage,
|
||||
successThreshold: successThreshold,
|
||||
}
|
||||
}
|
||||
|
||||
func (b *backoff) errorOccurred() float64 {
|
||||
b.value -= b.value * (b.decreasePercentage / 100)
|
||||
b.successCount = 0 // Reset success count on error
|
||||
if b.value < 100 {
|
||||
b.value = 100
|
||||
}
|
||||
return b.value
|
||||
}
|
||||
|
||||
func (b *backoff) successOccurred() float64 {
|
||||
b.successCount++
|
||||
if b.successCount >= b.successThreshold {
|
||||
b.value += b.value * (b.increasePercentage / 100)
|
||||
b.successCount = 0 // Reset success count after increasing the value
|
||||
}
|
||||
if b.value > maxChunkSize {
|
||||
b.value = maxChunkSize
|
||||
}
|
||||
return b.value
|
||||
}
|
||||
|
||||
func (b *backoff) getValue() int {
|
||||
return int(b.value)
|
||||
}
|
||||
|
||||
// Github token
|
||||
var ghToken = ""
|
||||
|
||||
func getForksCount(owner, repoName string) (int, error) {
|
||||
ctx := context.Background()
|
||||
ts := oauth2.StaticTokenSource(
|
||||
&oauth2.Token{AccessToken: ghToken},
|
||||
)
|
||||
tc := oauth2.NewClient(ctx, ts)
|
||||
|
||||
client := github.NewClient(tc)
|
||||
|
||||
repo, _, err := client.Repositories.Get(ctx, owner, repoName)
|
||||
if err != nil {
|
||||
return 0, err
|
||||
}
|
||||
|
||||
return repo.GetForksCount(), nil
|
||||
}
|
||||
|
||||
func getGitHubUser() (string, error) {
|
||||
ctx := context.Background()
|
||||
ts := oauth2.StaticTokenSource(
|
||||
&oauth2.Token{AccessToken: ghToken},
|
||||
)
|
||||
tc := oauth2.NewClient(ctx, ts)
|
||||
|
||||
client := github.NewClient(tc)
|
||||
|
||||
ghUser, _, err := client.Users.Get(ctx, "")
|
||||
if err != nil {
|
||||
return "", err
|
||||
}
|
||||
return ghUser.GetLogin(), nil
|
||||
}
|
||||
|
||||
// runGitCommand runs a git command
|
||||
func runGitCommand(args []string) ([]byte, error) {
|
||||
cmd := exec.Command("git", args...)
|
||||
out, err := cmd.CombinedOutput()
|
||||
return out, err
|
||||
}
|
||||
|
||||
func getExistingHashes(path string) ([]string, error) {
|
||||
var hashes []string
|
||||
gitArgs := []string{
|
||||
"-C",
|
||||
path,
|
||||
"--work-tree",
|
||||
path,
|
||||
"cat-file",
|
||||
"--batch-check",
|
||||
"--batch-all-objects",
|
||||
}
|
||||
outputBytes, err := runGitCommand(gitArgs)
|
||||
if err != nil {
|
||||
return hashes, err
|
||||
}
|
||||
|
||||
output := string(outputBytes)
|
||||
lines := strings.Split(output, "\n")
|
||||
for _, line := range lines {
|
||||
if len(line) > 0 {
|
||||
parts := strings.Fields(line)
|
||||
if len(parts) > 0 {
|
||||
hashes = append(hashes, parts[0])
|
||||
}
|
||||
}
|
||||
}
|
||||
return hashes, nil
|
||||
}
|
||||
|
||||
// calculateUsedKeySet Estimates the total used key set -
|
||||
// meaning how many used hashes are in the repository.
|
||||
func calculateUsedKeySet(commitCount, forksCount int) int {
|
||||
// Calculate total known key set
|
||||
commits := float64(commitCount)
|
||||
forks := float64(forksCount)
|
||||
knownKeySet := (commits + (commits * forkCommitMultiplier * forks))
|
||||
|
||||
return int(knownKeySet)
|
||||
}
|
||||
|
||||
// Estimate the number of collisions using the Birthday Paradox
|
||||
func estimateCollisions(keySpace, knownKeySet int) float64 {
|
||||
keySpaceF := float64(keySpace)
|
||||
knownKeySetF := float64(knownKeySet)
|
||||
return (knownKeySetF * (knownKeySetF - 1)) / (2 * keySpaceF)
|
||||
}
|
||||
|
||||
func getShortShaLen(knownKeySet int) int {
|
||||
// Calculate the length of the short SHA-1 hash
|
||||
// This is the minimum length required to avoid collisions
|
||||
// in the estimated known key set
|
||||
shortShaLen := startingCharLen
|
||||
keySpace := 1 << (shortShaLen * 4)
|
||||
collisions := estimateCollisions(keySpace, knownKeySet)
|
||||
fmt.Println("Collisions: ", collisions)
|
||||
fmt.Println("Collision Threshold: ", collisionThreshold)
|
||||
for collisions > collisionThreshold {
|
||||
if shortShaLen >= maxCharLen {
|
||||
break
|
||||
}
|
||||
shortShaLen++
|
||||
keySpace = 1 << (shortShaLen * 4)
|
||||
collisions = estimateCollisions(keySpace, knownKeySet)
|
||||
}
|
||||
|
||||
return shortShaLen
|
||||
}
|
||||
|
||||
// Generate all possible min commit hashes
|
||||
func generateShortSHAStrings(charLen int) []string {
|
||||
hexDigits := "0123456789abcdef"
|
||||
var hexStrings []string
|
||||
var generateCombinations func(prefix string, length int)
|
||||
|
||||
generateCombinations = func(prefix string, length int) {
|
||||
if length == 0 {
|
||||
hexStrings = append(hexStrings, prefix)
|
||||
return
|
||||
}
|
||||
for _, digit := range hexDigits {
|
||||
generateCombinations(prefix+string(digit), length-1)
|
||||
}
|
||||
}
|
||||
|
||||
generateCombinations("", charLen)
|
||||
return hexStrings
|
||||
}
|
||||
|
||||
// Write commits to disk
|
||||
func writeCommitsToDisk(commits []string, commitsType, folder string) error {
|
||||
filename := fmt.Sprintf("%s/%s.txt", folder, commitsType)
|
||||
|
||||
// Open file in append mode, create if it doesn't exist
|
||||
file, err := os.OpenFile(filename, os.O_APPEND|os.O_CREATE|os.O_WRONLY, 0644)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
defer file.Close()
|
||||
|
||||
for _, commit := range commits {
|
||||
if _, err := file.WriteString(commit + "\n"); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// Read commits from disk
|
||||
func readCommitsFromDisk(commitsType, folder string) ([]string, error) {
|
||||
filename := fmt.Sprintf("%s/%s.txt", folder, commitsType)
|
||||
if _, err := os.Stat(filename); os.IsNotExist(err) {
|
||||
return nil, nil
|
||||
}
|
||||
|
||||
data, err := os.ReadFile(filename)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
lines := strings.Split(string(data), "\n")
|
||||
var commits []string
|
||||
for _, line := range lines {
|
||||
if line != "" {
|
||||
commits = append(commits, strings.TrimSpace(line))
|
||||
}
|
||||
}
|
||||
return removeNewlineAndUnique(commits), nil
|
||||
}
|
||||
|
||||
// Remove newlines from commits and make them unique
|
||||
func removeNewlineAndUnique(commits []string) []string {
|
||||
commitMap := make(map[string]struct{})
|
||||
for _, commit := range commits {
|
||||
cleanCommit := strings.TrimSpace(commit)
|
||||
commitMap[cleanCommit] = struct{}{}
|
||||
}
|
||||
var uniqueCommits []string
|
||||
for commit := range commitMap {
|
||||
uniqueCommits = append(uniqueCommits, commit)
|
||||
}
|
||||
return uniqueCommits
|
||||
}
|
||||
|
||||
// Remove commits that are already in the existing_commits list
|
||||
func removeByShortSHA(existingCommits, newCommits []string) []string {
|
||||
existingSet := make(map[string]struct{})
|
||||
for _, commit := range existingCommits {
|
||||
existingSet[commit] = struct{}{}
|
||||
}
|
||||
var filteredCommits []string
|
||||
for _, commit := range newCommits {
|
||||
if _, exists := existingSet[commit]; !exists {
|
||||
filteredCommits = append(filteredCommits, commit)
|
||||
}
|
||||
}
|
||||
return filteredCommits
|
||||
}
|
||||
|
||||
// Remove commits that are already in the existing_commits list (by char_len)
|
||||
func removeBySHA(existingCommits, newCommits []string, charLen int) []string {
|
||||
existingSet := make(map[string]struct{})
|
||||
for _, commit := range existingCommits {
|
||||
shortSHA := commit
|
||||
if len(commit) > charLen {
|
||||
shortSHA = commit[:charLen]
|
||||
}
|
||||
existingSet[shortSHA] = struct{}{}
|
||||
}
|
||||
var filteredCommits []string
|
||||
for _, commit := range newCommits {
|
||||
shortSHA := commit
|
||||
if len(commit) > charLen {
|
||||
shortSHA = commit[:charLen]
|
||||
}
|
||||
if _, exists := existingSet[shortSHA]; !exists {
|
||||
filteredCommits = append(filteredCommits, commit)
|
||||
}
|
||||
}
|
||||
return filteredCommits
|
||||
}
|
||||
|
||||
func processCommits(ctx context.Context, needsProcessing []string, owner, repo, path string) {
|
||||
repoCtx := context.WithValue(ctx, "repo", repo)
|
||||
|
||||
startingSize := float64(len(needsProcessing))
|
||||
queryChunkSize := newBackoff(initialChunkSize, 10, 10, 1)
|
||||
for len(needsProcessing) > 0 {
|
||||
if len(needsProcessing) < queryChunkSize.getValue() {
|
||||
queryChunkSize.value = float64(len(needsProcessing))
|
||||
}
|
||||
chunkSize := queryChunkSize.getValue()
|
||||
chunk := needsProcessing[:chunkSize]
|
||||
needsProcessing = needsProcessing[chunkSize:]
|
||||
|
||||
commitData, err := checkHashes(owner, repo, chunk)
|
||||
if err != nil {
|
||||
repoCtx.Logger().V(2).Info("Temporary error occurred in guessing commits", "error", err)
|
||||
needsProcessing = append(needsProcessing, chunk...)
|
||||
queryChunkSize.errorOccurred()
|
||||
if strings.Contains(err.Error(), "You have exceeded a secondary rate limit") {
|
||||
repoCtx.Logger().V(2).Info("Reached secondary GitHub Rate Limit. Sleeping for 60 seconds.")
|
||||
time.Sleep(secondaryRateLimitSleep * time.Second)
|
||||
}
|
||||
continue
|
||||
}
|
||||
|
||||
percentCompleted := (1 - (float64(len(needsProcessing)) / startingSize)) * 100
|
||||
|
||||
repoCtx.Logger().V(2).Info("Progress", "percent_completed", percentCompleted, "needs_processing", len(needsProcessing))
|
||||
|
||||
queryChunkSize.successOccurred()
|
||||
err = writeCommitsToDisk(commitData[validHiddenCommit], validHiddenCommit, path)
|
||||
if err != nil {
|
||||
repoCtx.Logger().V(2).Info("Failed to write valid hidden commits to disk", "error", err)
|
||||
}
|
||||
err = writeCommitsToDisk(commitData[invalidCommit], invalidCommit, path)
|
||||
if err != nil {
|
||||
repoCtx.Logger().V(2).Info("Failed to write invalid commits to disk", "error", err)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
type commitData struct {
|
||||
OID string `json:"oid"`
|
||||
}
|
||||
|
||||
type responseData struct {
|
||||
Data struct {
|
||||
Repository map[string]commitData `json:"repository"`
|
||||
} `json:"data"`
|
||||
Errors []struct {
|
||||
Message string `json:"message"`
|
||||
} `json:"errors"`
|
||||
Message string `json:"message"`
|
||||
}
|
||||
|
||||
func checkHashes(owner, repo string, hashes []string) (map[string][]string, error) {
|
||||
testCases := ""
|
||||
for _, h := range hashes {
|
||||
testCase := fmt.Sprintf(`
|
||||
commit%s: object(expression: "%s") {
|
||||
... on Commit {
|
||||
oid
|
||||
}
|
||||
}
|
||||
`, h, h)
|
||||
testCases += testCase
|
||||
}
|
||||
|
||||
query := fmt.Sprintf(`
|
||||
query {
|
||||
repository(owner: "%s", name: "%s") {
|
||||
%s
|
||||
}
|
||||
}
|
||||
`, owner, repo, testCases)
|
||||
|
||||
headers := map[string]string{
|
||||
"Authorization": "Bearer " + ghToken,
|
||||
"Content-Type": "application/json",
|
||||
"Github-Verified-Fetch": "true",
|
||||
"X-Requested-With": "XMLHttpRequest",
|
||||
"Accept-Language": "en-US,en;q=0.9",
|
||||
"Priority": "u=1, i",
|
||||
}
|
||||
|
||||
requestBody, err := json.Marshal(map[string]string{"query": query})
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to marshal request body: %w", err)
|
||||
}
|
||||
|
||||
req, err := http.NewRequest("POST", "https://api.github.com/graphql", bytes.NewBuffer(requestBody))
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to create request: %w", err)
|
||||
}
|
||||
|
||||
for key, value := range headers {
|
||||
req.Header.Set(key, value)
|
||||
}
|
||||
|
||||
client := &http.Client{}
|
||||
resp, err := client.Do(req)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("python request error: %w", err)
|
||||
}
|
||||
defer resp.Body.Close()
|
||||
|
||||
body, err := io.ReadAll(resp.Body)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to read response body: %w", err)
|
||||
}
|
||||
|
||||
var data responseData
|
||||
if err := json.Unmarshal(body, &data); err != nil {
|
||||
return nil, fmt.Errorf("failed to unmarshal response: %w", err)
|
||||
}
|
||||
|
||||
if len(data.Errors) > 0 {
|
||||
return nil, fmt.Errorf("%s (GitHub Request Error)", strings.Split(data.Errors[0].Message, ".")[0])
|
||||
}
|
||||
if data.Message != "" {
|
||||
return nil, fmt.Errorf("%s (GitHub Request Error)", strings.Split(data.Message, ".")[0])
|
||||
}
|
||||
|
||||
commits := data.Data.Repository
|
||||
|
||||
valid_cfor := []string{}
|
||||
invalid := []string{}
|
||||
|
||||
for commit, value := range commits {
|
||||
commit = strings.Replace(commit, "commit", "", 1)
|
||||
if value.OID == "{}" || value.OID == "" {
|
||||
invalid = append(invalid, commit)
|
||||
} else {
|
||||
valid_cfor = append(valid_cfor, value.OID)
|
||||
}
|
||||
}
|
||||
|
||||
res := map[string][]string{
|
||||
validHiddenCommit: valid_cfor,
|
||||
invalidCommit: invalid,
|
||||
}
|
||||
|
||||
return res, nil
|
||||
}
|
||||
|
||||
// createBatches divides a slice into batches of a specified size
|
||||
func createBatches(items []string, batchSize int) <-chan []string {
|
||||
out := make(chan []string)
|
||||
go func() {
|
||||
defer close(out)
|
||||
itemsCopy := append([]string(nil), items...)
|
||||
for len(itemsCopy) > 0 {
|
||||
end := batchSize
|
||||
if len(itemsCopy) < batchSize {
|
||||
end = len(itemsCopy)
|
||||
}
|
||||
batch := itemsCopy[:end]
|
||||
itemsCopy = itemsCopy[end:]
|
||||
out <- batch
|
||||
}
|
||||
}()
|
||||
return out
|
||||
}
|
||||
|
||||
// downloadPatches fetches and checks out cfor commits
|
||||
func downloadPatches(valid_cfor []string, path string) error {
|
||||
// Download all patches
|
||||
for batch := range createBatches(valid_cfor, gitFetchMax) {
|
||||
gitArgs := []string{
|
||||
"-C",
|
||||
path,
|
||||
"--work-tree",
|
||||
path,
|
||||
"fetch",
|
||||
"--quiet",
|
||||
"origin",
|
||||
}
|
||||
gitArgs = append(gitArgs, batch...)
|
||||
_, err := runGitCommand(gitArgs)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
|
||||
// Checkout each commit
|
||||
// Note: path and worktree are needed or else git will do something funny with the actual cwd
|
||||
for _, commit := range valid_cfor {
|
||||
branchName := fmt.Sprintf("_%s", commit)
|
||||
gitArgs := []string{
|
||||
"-C",
|
||||
path,
|
||||
"--work-tree",
|
||||
path,
|
||||
"checkout",
|
||||
"--quiet",
|
||||
"-b",
|
||||
branchName,
|
||||
commit,
|
||||
}
|
||||
_, err := runGitCommand(gitArgs)
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to checkout commit %s: %v", commit, err)
|
||||
}
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
// scanHiddenData scans hidden data (and non-hidden data) for secrets in a GitHub repository
|
||||
func (s *Source) EnumerateAndScanAllObjects(ctx context.Context, chunksChan chan *sources.Chunk) error {
|
||||
// assign github token to global variable
|
||||
ghToken = s.conn.GetToken()
|
||||
|
||||
// set collision threshold to user input
|
||||
collisionThreshold = float64(s.conn.CollisionThreshold)
|
||||
|
||||
// parse the repo URL
|
||||
repoURL, urlParts, err := getRepoURLParts(s.conn.Repository)
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to get repo URL parts: %w", err)
|
||||
}
|
||||
|
||||
// read in the owner and repo name
|
||||
owner := urlParts[1]
|
||||
repoName := urlParts[2]
|
||||
|
||||
// get repo metadata and store in cacheRepoInfo
|
||||
repoCtx := context.WithValue(ctx, "repo", owner+"/"+repoName)
|
||||
ghRepo, _, err := s.apiClient.Repositories.Get(repoCtx, owner, repoName)
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to fetch repository: %w", err)
|
||||
}
|
||||
s.cacheRepoInfo(ghRepo)
|
||||
|
||||
// Create a folder housing the repo and commit data
|
||||
userHomeDir, err := os.UserHomeDir()
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to get user home directory: %w", err)
|
||||
}
|
||||
|
||||
folderPath := userHomeDir + "/.trufflehog/" + owner + "/" + repoName
|
||||
err = os.MkdirAll(folderPath, 0755)
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to create .trufflehog folder in user's home directory: %w", err)
|
||||
}
|
||||
|
||||
// Get GitHub User tied to token
|
||||
ghUser, err := getGitHubUser()
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to get GitHub user details: %w", err)
|
||||
}
|
||||
|
||||
// get the number of forks
|
||||
forksCount, err := getForksCount(owner, repoName)
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to get forks count: %w", err)
|
||||
}
|
||||
|
||||
// download the repo
|
||||
path, repo, err := git.CloneRepoUsingToken(ctx, ghToken, repoURL, ghUser)
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to clone the repository: %w", err)
|
||||
}
|
||||
|
||||
defer os.RemoveAll(path)
|
||||
|
||||
// count total valid hashes
|
||||
validHashes, err := getExistingHashes(path)
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to enumerate existing commit object hashes: %w", err)
|
||||
}
|
||||
|
||||
// Calculate estimated used key set
|
||||
estimatedUsedKeySet := calculateUsedKeySet(len(validHashes), forksCount)
|
||||
|
||||
// Calculate Short SHA-1 Length for Unambiguous Commit Identifiers
|
||||
shortShaLen := getShortShaLen(estimatedUsedKeySet)
|
||||
|
||||
// Log stats
|
||||
repoCtx.Logger().V(2).Info("Estimated used keys", "count", estimatedUsedKeySet)
|
||||
repoCtx.Logger().V(2).Info("Target Short SHA-1 length", "length", shortShaLen)
|
||||
repoCtx.Logger().V(2).Info("Estimated collisions", "count", estimateCollisions(1<<(shortShaLen*4), estimatedUsedKeySet))
|
||||
|
||||
// Read in existing commits (if any)
|
||||
validHiddenCommits, err := readCommitsFromDisk(validHiddenCommit, folderPath)
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to read valid hidden commits from disk: %w", err)
|
||||
}
|
||||
|
||||
invalidCommits, err := readCommitsFromDisk(invalidCommit, folderPath)
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to read invalid commits from disk: %w", err)
|
||||
}
|
||||
|
||||
// Generate all possible commit hashes using the short SHA-1 length
|
||||
possibleCommits := generateShortSHAStrings(shortShaLen)
|
||||
|
||||
// Remove commits that are already used by the repo or previously calculated (on restart)
|
||||
possibleCommits = removeBySHA(validHashes, possibleCommits, shortShaLen)
|
||||
possibleCommits = removeBySHA(validHiddenCommits, possibleCommits, shortShaLen)
|
||||
possibleCommits = removeByShortSHA(invalidCommits, possibleCommits)
|
||||
|
||||
// Guess all possible commit hashes
|
||||
processCommits(ctx, possibleCommits, owner, repoName, folderPath)
|
||||
|
||||
// Download commit hashes and checkout into branches (only way scanner will pick them up)
|
||||
err = downloadPatches(validHiddenCommits, path)
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to download patches: %w", err)
|
||||
}
|
||||
|
||||
// Scan git for secrets
|
||||
repoCtx.Logger().V(2).Info("scanning for secrets in repo", "repo_url", repoURL)
|
||||
start := time.Now()
|
||||
err = s.git.ScanRepo(ctx, repo, path, s.scanOptions, sources.ChanReporter{Ch: chunksChan})
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to scan repo: %w", err)
|
||||
}
|
||||
duration := time.Since(start)
|
||||
repoCtx.Logger().V(2).Info("scanned 1 repo for hidden data", "duration_seconds", duration)
|
||||
|
||||
// Remove the folder if user requests
|
||||
if s.conn.DeleteCachedData {
|
||||
err = os.RemoveAll(folderPath)
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to delete cached data: %w", err)
|
||||
}
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
71
pkg/sources/github_experimental/repo.go
Normal file
71
pkg/sources/github_experimental/repo.go
Normal file
|
@ -0,0 +1,71 @@
|
|||
package github_experimental
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"strings"
|
||||
"sync"
|
||||
|
||||
"github.com/google/go-github/v63/github"
|
||||
|
||||
"github.com/trufflesecurity/trufflehog/v3/pkg/giturl"
|
||||
"github.com/trufflesecurity/trufflehog/v3/pkg/pb/source_metadatapb"
|
||||
)
|
||||
|
||||
type repoInfoCache struct {
|
||||
mu sync.RWMutex
|
||||
cache map[string]repoInfo
|
||||
}
|
||||
|
||||
func newRepoInfoCache() repoInfoCache {
|
||||
return repoInfoCache{
|
||||
cache: make(map[string]repoInfo),
|
||||
}
|
||||
}
|
||||
|
||||
func (r *repoInfoCache) put(repoURL string, info repoInfo) {
|
||||
r.mu.Lock()
|
||||
defer r.mu.Unlock()
|
||||
r.cache[repoURL] = info
|
||||
}
|
||||
|
||||
func (r *repoInfoCache) get(repoURL string) (repoInfo, bool) {
|
||||
r.mu.RLock()
|
||||
defer r.mu.RUnlock()
|
||||
|
||||
info, ok := r.cache[repoURL]
|
||||
return info, ok
|
||||
}
|
||||
|
||||
type repoInfo struct {
|
||||
owner string
|
||||
name string
|
||||
fullName string
|
||||
hasWiki bool // the repo is _likely_ to have a wiki (see the comment on wikiIsReachable func).
|
||||
size int
|
||||
visibility source_metadatapb.Visibility
|
||||
}
|
||||
|
||||
func (s *Source) cacheRepoInfo(r *github.Repository) {
|
||||
info := repoInfo{
|
||||
owner: r.GetOwner().GetLogin(),
|
||||
name: r.GetName(),
|
||||
fullName: r.GetFullName(),
|
||||
hasWiki: r.GetHasWiki(),
|
||||
size: r.GetSize(),
|
||||
}
|
||||
if r.GetPrivate() {
|
||||
info.visibility = source_metadatapb.Visibility_private
|
||||
} else {
|
||||
info.visibility = source_metadatapb.Visibility_public
|
||||
}
|
||||
s.repoInfoCache.put(r.GetCloneURL(), info)
|
||||
}
|
||||
|
||||
func (s *Source) normalizeRepo(repo string) (string, error) {
|
||||
// If there's a '/', assume it's a URL and try to normalize it.
|
||||
if strings.ContainsRune(repo, '/') {
|
||||
return giturl.NormalizeGithubRepo(repo)
|
||||
}
|
||||
|
||||
return "", fmt.Errorf("no repositories found for %s", repo)
|
||||
}
|
|
@ -238,6 +238,20 @@ type GithubConfig struct {
|
|||
IncludeWikis bool
|
||||
}
|
||||
|
||||
// GitHubExperimentalConfig defines the optional configuration for an experimental GitHub source.
|
||||
type GitHubExperimentalConfig struct {
|
||||
// Repository is the repository to scan.
|
||||
Repository string
|
||||
// Token is the token to use to authenticate with the source.
|
||||
Token string
|
||||
// ObjectDiscovery indicates whether to discover all commit objects (CFOR) in the repository.
|
||||
ObjectDiscovery bool
|
||||
// CollisionThreshold is the number of short-sha collisions tolerated during hidden data enumeration. Default is 1.
|
||||
CollisionThreshold int
|
||||
// DeleteCachedData indicates whether to delete cached data.
|
||||
DeleteCachedData bool
|
||||
}
|
||||
|
||||
// GitlabConfig defines the optional configuration for a gitlab source.
|
||||
type GitlabConfig struct {
|
||||
// Endpoint is the endpoint of the source.
|
||||
|
|
|
@ -49,6 +49,7 @@ enum SourceType {
|
|||
SOURCE_TYPE_WEBHOOK = 34;
|
||||
SOURCE_TYPE_ELASTICSEARCH = 35;
|
||||
SOURCE_TYPE_HUGGINGFACE = 36;
|
||||
SOURCE_TYPE_GITHUB_EXPERIMENTAL = 37;
|
||||
}
|
||||
|
||||
message LocalSource {
|
||||
|
@ -243,6 +244,16 @@ message GitHub {
|
|||
bool include_wikis = 19;
|
||||
}
|
||||
|
||||
message GitHubExperimental {
|
||||
string repository = 1;
|
||||
oneof credential {
|
||||
string token = 2;
|
||||
}
|
||||
bool object_discovery = 3;
|
||||
int64 collision_threshold = 4;
|
||||
bool delete_cached_data = 5;
|
||||
}
|
||||
|
||||
message GoogleDrive {
|
||||
oneof credential {
|
||||
string refresh_token = 1;
|
||||
|
|
Loading…
Reference in a new issue