mirror of
https://github.com/trufflesecurity/trufflehog.git
synced 2024-11-10 07:04:24 +00:00
Update GitHub enumeration to report unique filtered values (#3292)
The reported values should match the values populated in s.repos.
This commit is contained in:
parent
b2da2a6a5c
commit
77dc2720a8
2 changed files with 53 additions and 11 deletions
|
@ -331,7 +331,14 @@ func (s *Source) Chunks(ctx context.Context, chunksChan chan *sources.Chunk, tar
|
|||
githubSecondsSpentRateLimited.WithLabelValues(s.name).Set(0)
|
||||
githubReposScanned.WithLabelValues(s.name).Set(0)
|
||||
|
||||
err := s.enumerate(ctx)
|
||||
// We don't care about handling enumerated values as they happen during
|
||||
// the normal Chunks flow because we enumerate and scan in two steps.
|
||||
noopReporter := sources.VisitorReporter{
|
||||
VisitUnit: func(context.Context, sources.SourceUnit) error {
|
||||
return nil
|
||||
},
|
||||
}
|
||||
err := s.enumerate(ctx, noopReporter)
|
||||
if err != nil {
|
||||
return fmt.Errorf("error enumerating: %w", err)
|
||||
}
|
||||
|
@ -339,30 +346,52 @@ func (s *Source) Chunks(ctx context.Context, chunksChan chan *sources.Chunk, tar
|
|||
return s.scan(ctx, chunksReporter)
|
||||
}
|
||||
|
||||
func (s *Source) enumerate(ctx context.Context) error {
|
||||
// Create a reporter that does nothing for now.
|
||||
noopReporter := sources.VisitorReporter{
|
||||
// enumerate enumerates the GitHub source based on authentication method and
|
||||
// user configuration. It populates s.filteredRepoCache, s.repoInfoCache,
|
||||
// s.memberCache, s.totalRepoSize, s.orgsCache, and s.repos. Additionally,
|
||||
// repositories and gists are reported to the provided UnitReporter.
|
||||
func (s *Source) enumerate(ctx context.Context, reporter sources.UnitReporter) error {
|
||||
seenUnits := make(map[sources.SourceUnit]struct{})
|
||||
// Wrapper reporter to deduplicate and filter found units.
|
||||
dedupeReporter := sources.VisitorReporter{
|
||||
VisitUnit: func(ctx context.Context, su sources.SourceUnit) error {
|
||||
return nil
|
||||
// Only report units that passed the user configured filter.
|
||||
name := su.Display()
|
||||
if !s.filteredRepoCache.Exists(name) {
|
||||
return ctx.Err()
|
||||
}
|
||||
// Only report a unit once.
|
||||
if _, ok := seenUnits[su]; ok {
|
||||
return ctx.Err()
|
||||
}
|
||||
seenUnits[su] = struct{}{}
|
||||
return reporter.UnitOk(ctx, su)
|
||||
},
|
||||
VisitErr: reporter.UnitErr,
|
||||
}
|
||||
// Report any values that were already configured.
|
||||
for _, name := range s.filteredRepoCache.Keys() {
|
||||
url, _ := s.filteredRepoCache.Get(name)
|
||||
_ = dedupeReporter.UnitOk(ctx, RepoUnit{name: name, url: url})
|
||||
}
|
||||
|
||||
// I'm not wild about switching on the connector type here (as opposed to dispatching to the connector itself) but
|
||||
// this felt like a compromise that allowed me to isolate connection logic without rewriting the entire source.
|
||||
switch c := s.connector.(type) {
|
||||
case *appConnector:
|
||||
if err := s.enumerateWithApp(ctx, c.InstallationClient(), noopReporter); err != nil {
|
||||
if err := s.enumerateWithApp(ctx, c.InstallationClient(), dedupeReporter); err != nil {
|
||||
return err
|
||||
}
|
||||
case *basicAuthConnector:
|
||||
if err := s.enumerateBasicAuth(ctx, noopReporter); err != nil {
|
||||
if err := s.enumerateBasicAuth(ctx, dedupeReporter); err != nil {
|
||||
return err
|
||||
}
|
||||
case *tokenConnector:
|
||||
if err := s.enumerateWithToken(ctx, c.IsGithubEnterprise(), noopReporter); err != nil {
|
||||
if err := s.enumerateWithToken(ctx, c.IsGithubEnterprise(), dedupeReporter); err != nil {
|
||||
return err
|
||||
}
|
||||
case *unauthenticatedConnector:
|
||||
s.enumerateUnauthenticated(ctx, noopReporter)
|
||||
s.enumerateUnauthenticated(ctx, dedupeReporter)
|
||||
}
|
||||
s.repos = make([]string, 0, s.filteredRepoCache.Count())
|
||||
|
||||
|
|
|
@ -11,6 +11,7 @@ import (
|
|||
"net/http"
|
||||
"net/url"
|
||||
"reflect"
|
||||
"slices"
|
||||
"strconv"
|
||||
"testing"
|
||||
"time"
|
||||
|
@ -566,8 +567,18 @@ func TestEnumerate(t *testing.T) {
|
|||
s.cacheRepoInfo(repo)
|
||||
s.filteredRepoCache.Set(repo.GetFullName(), repo.GetCloneURL())
|
||||
|
||||
var reportedRepos []string
|
||||
reporter := sources.VisitorReporter{
|
||||
VisitUnit: func(ctx context.Context, su sources.SourceUnit) error {
|
||||
url, _ := su.SourceUnitID()
|
||||
reportedRepos = append(reportedRepos, url)
|
||||
return nil
|
||||
},
|
||||
}
|
||||
|
||||
// Act
|
||||
err := s.enumerate(context.Background())
|
||||
err := s.enumerate(context.Background(), reporter)
|
||||
slices.Sort(reportedRepos)
|
||||
|
||||
// Assert
|
||||
assert.Nil(t, err)
|
||||
|
@ -576,6 +587,8 @@ func TestEnumerate(t *testing.T) {
|
|||
assert.True(t, s.filteredRepoCache.Exists("super-secret-user/super-secret-repo"))
|
||||
assert.True(t, s.filteredRepoCache.Exists("cached-user/cached-repo"))
|
||||
assert.True(t, s.filteredRepoCache.Exists("2801a2b0523099d0614a951579d99ba9"))
|
||||
assert.Equal(t, 3, len(s.repos))
|
||||
assert.Equal(t, s.repos, reportedRepos)
|
||||
// Enumeration cached all repos.
|
||||
assert.Equal(t, 3, len(s.repoInfoCache.cache))
|
||||
_, ok := s.repoInfoCache.get("https://github.com/super-secret-user/super-secret-repo.git")
|
||||
|
@ -640,7 +653,7 @@ func BenchmarkEnumerate(b *testing.B) {
|
|||
setupMocks(b)
|
||||
|
||||
b.StartTimer()
|
||||
_ = s.enumerate(context.Background())
|
||||
_ = s.enumerate(context.Background(), noopReporter())
|
||||
}
|
||||
}
|
||||
|
||||
|
|
Loading…
Reference in a new issue