mirror of
https://github.com/trufflesecurity/trufflehog.git
synced 2024-11-10 07:04:24 +00:00
Update GitHub enumeration to report unique filtered values (#3292)
The reported values should match the values populated in s.repos.
This commit is contained in:
parent
b2da2a6a5c
commit
77dc2720a8
2 changed files with 53 additions and 11 deletions
|
@ -331,7 +331,14 @@ func (s *Source) Chunks(ctx context.Context, chunksChan chan *sources.Chunk, tar
|
||||||
githubSecondsSpentRateLimited.WithLabelValues(s.name).Set(0)
|
githubSecondsSpentRateLimited.WithLabelValues(s.name).Set(0)
|
||||||
githubReposScanned.WithLabelValues(s.name).Set(0)
|
githubReposScanned.WithLabelValues(s.name).Set(0)
|
||||||
|
|
||||||
err := s.enumerate(ctx)
|
// We don't care about handling enumerated values as they happen during
|
||||||
|
// the normal Chunks flow because we enumerate and scan in two steps.
|
||||||
|
noopReporter := sources.VisitorReporter{
|
||||||
|
VisitUnit: func(context.Context, sources.SourceUnit) error {
|
||||||
|
return nil
|
||||||
|
},
|
||||||
|
}
|
||||||
|
err := s.enumerate(ctx, noopReporter)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return fmt.Errorf("error enumerating: %w", err)
|
return fmt.Errorf("error enumerating: %w", err)
|
||||||
}
|
}
|
||||||
|
@ -339,30 +346,52 @@ func (s *Source) Chunks(ctx context.Context, chunksChan chan *sources.Chunk, tar
|
||||||
return s.scan(ctx, chunksReporter)
|
return s.scan(ctx, chunksReporter)
|
||||||
}
|
}
|
||||||
|
|
||||||
func (s *Source) enumerate(ctx context.Context) error {
|
// enumerate enumerates the GitHub source based on authentication method and
|
||||||
// Create a reporter that does nothing for now.
|
// user configuration. It populates s.filteredRepoCache, s.repoInfoCache,
|
||||||
noopReporter := sources.VisitorReporter{
|
// s.memberCache, s.totalRepoSize, s.orgsCache, and s.repos. Additionally,
|
||||||
|
// repositories and gists are reported to the provided UnitReporter.
|
||||||
|
func (s *Source) enumerate(ctx context.Context, reporter sources.UnitReporter) error {
|
||||||
|
seenUnits := make(map[sources.SourceUnit]struct{})
|
||||||
|
// Wrapper reporter to deduplicate and filter found units.
|
||||||
|
dedupeReporter := sources.VisitorReporter{
|
||||||
VisitUnit: func(ctx context.Context, su sources.SourceUnit) error {
|
VisitUnit: func(ctx context.Context, su sources.SourceUnit) error {
|
||||||
return nil
|
// Only report units that passed the user configured filter.
|
||||||
|
name := su.Display()
|
||||||
|
if !s.filteredRepoCache.Exists(name) {
|
||||||
|
return ctx.Err()
|
||||||
|
}
|
||||||
|
// Only report a unit once.
|
||||||
|
if _, ok := seenUnits[su]; ok {
|
||||||
|
return ctx.Err()
|
||||||
|
}
|
||||||
|
seenUnits[su] = struct{}{}
|
||||||
|
return reporter.UnitOk(ctx, su)
|
||||||
},
|
},
|
||||||
|
VisitErr: reporter.UnitErr,
|
||||||
}
|
}
|
||||||
|
// Report any values that were already configured.
|
||||||
|
for _, name := range s.filteredRepoCache.Keys() {
|
||||||
|
url, _ := s.filteredRepoCache.Get(name)
|
||||||
|
_ = dedupeReporter.UnitOk(ctx, RepoUnit{name: name, url: url})
|
||||||
|
}
|
||||||
|
|
||||||
// I'm not wild about switching on the connector type here (as opposed to dispatching to the connector itself) but
|
// I'm not wild about switching on the connector type here (as opposed to dispatching to the connector itself) but
|
||||||
// this felt like a compromise that allowed me to isolate connection logic without rewriting the entire source.
|
// this felt like a compromise that allowed me to isolate connection logic without rewriting the entire source.
|
||||||
switch c := s.connector.(type) {
|
switch c := s.connector.(type) {
|
||||||
case *appConnector:
|
case *appConnector:
|
||||||
if err := s.enumerateWithApp(ctx, c.InstallationClient(), noopReporter); err != nil {
|
if err := s.enumerateWithApp(ctx, c.InstallationClient(), dedupeReporter); err != nil {
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
case *basicAuthConnector:
|
case *basicAuthConnector:
|
||||||
if err := s.enumerateBasicAuth(ctx, noopReporter); err != nil {
|
if err := s.enumerateBasicAuth(ctx, dedupeReporter); err != nil {
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
case *tokenConnector:
|
case *tokenConnector:
|
||||||
if err := s.enumerateWithToken(ctx, c.IsGithubEnterprise(), noopReporter); err != nil {
|
if err := s.enumerateWithToken(ctx, c.IsGithubEnterprise(), dedupeReporter); err != nil {
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
case *unauthenticatedConnector:
|
case *unauthenticatedConnector:
|
||||||
s.enumerateUnauthenticated(ctx, noopReporter)
|
s.enumerateUnauthenticated(ctx, dedupeReporter)
|
||||||
}
|
}
|
||||||
s.repos = make([]string, 0, s.filteredRepoCache.Count())
|
s.repos = make([]string, 0, s.filteredRepoCache.Count())
|
||||||
|
|
||||||
|
|
|
@ -11,6 +11,7 @@ import (
|
||||||
"net/http"
|
"net/http"
|
||||||
"net/url"
|
"net/url"
|
||||||
"reflect"
|
"reflect"
|
||||||
|
"slices"
|
||||||
"strconv"
|
"strconv"
|
||||||
"testing"
|
"testing"
|
||||||
"time"
|
"time"
|
||||||
|
@ -566,8 +567,18 @@ func TestEnumerate(t *testing.T) {
|
||||||
s.cacheRepoInfo(repo)
|
s.cacheRepoInfo(repo)
|
||||||
s.filteredRepoCache.Set(repo.GetFullName(), repo.GetCloneURL())
|
s.filteredRepoCache.Set(repo.GetFullName(), repo.GetCloneURL())
|
||||||
|
|
||||||
|
var reportedRepos []string
|
||||||
|
reporter := sources.VisitorReporter{
|
||||||
|
VisitUnit: func(ctx context.Context, su sources.SourceUnit) error {
|
||||||
|
url, _ := su.SourceUnitID()
|
||||||
|
reportedRepos = append(reportedRepos, url)
|
||||||
|
return nil
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
// Act
|
// Act
|
||||||
err := s.enumerate(context.Background())
|
err := s.enumerate(context.Background(), reporter)
|
||||||
|
slices.Sort(reportedRepos)
|
||||||
|
|
||||||
// Assert
|
// Assert
|
||||||
assert.Nil(t, err)
|
assert.Nil(t, err)
|
||||||
|
@ -576,6 +587,8 @@ func TestEnumerate(t *testing.T) {
|
||||||
assert.True(t, s.filteredRepoCache.Exists("super-secret-user/super-secret-repo"))
|
assert.True(t, s.filteredRepoCache.Exists("super-secret-user/super-secret-repo"))
|
||||||
assert.True(t, s.filteredRepoCache.Exists("cached-user/cached-repo"))
|
assert.True(t, s.filteredRepoCache.Exists("cached-user/cached-repo"))
|
||||||
assert.True(t, s.filteredRepoCache.Exists("2801a2b0523099d0614a951579d99ba9"))
|
assert.True(t, s.filteredRepoCache.Exists("2801a2b0523099d0614a951579d99ba9"))
|
||||||
|
assert.Equal(t, 3, len(s.repos))
|
||||||
|
assert.Equal(t, s.repos, reportedRepos)
|
||||||
// Enumeration cached all repos.
|
// Enumeration cached all repos.
|
||||||
assert.Equal(t, 3, len(s.repoInfoCache.cache))
|
assert.Equal(t, 3, len(s.repoInfoCache.cache))
|
||||||
_, ok := s.repoInfoCache.get("https://github.com/super-secret-user/super-secret-repo.git")
|
_, ok := s.repoInfoCache.get("https://github.com/super-secret-user/super-secret-repo.git")
|
||||||
|
@ -640,7 +653,7 @@ func BenchmarkEnumerate(b *testing.B) {
|
||||||
setupMocks(b)
|
setupMocks(b)
|
||||||
|
|
||||||
b.StartTimer()
|
b.StartTimer()
|
||||||
_ = s.enumerate(context.Background())
|
_ = s.enumerate(context.Background(), noopReporter())
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
Loading…
Reference in a new issue