From 04c9bb535e5d548deec0f30c9a16e3b7695b2a59 Mon Sep 17 00:00:00 2001 From: ahrav Date: Wed, 12 Oct 2022 16:28:24 -0700 Subject: [PATCH] [THOG-768] - Add ability to skip scanning Github repos (#846) * Add ability to skip scanning Github repos. * remove old change. * rename method. --- pkg/sources/github/github.go | 49 +++++++++++++++++++++++++------ pkg/sources/github/github_test.go | 19 ++++++++---- 2 files changed, 54 insertions(+), 14 deletions(-) diff --git a/pkg/sources/github/github.go b/pkg/sources/github/github.go index f60f108fb..6a0ff0698 100644 --- a/pkg/sources/github/github.go +++ b/pkg/sources/github/github.go @@ -42,22 +42,23 @@ const ( ) type Source struct { - name string - sourceID int64 - jobID int64 - verify bool - repos []string - orgs []string - members []string + name string + token string + sourceID int64 + jobID int64 + verify bool + repos, + orgs, + members, + ignoreRepos []string git *git.Git httpClient *http.Client aCtx context.Context log *log.Entry - token string conn *sourcespb.GitHub jobPool *errgroup.Group - resumeInfoSlice []string resumeInfoMutex sync.Mutex + resumeInfoSlice []string apiClient *github.Client publicMap map[string]source_metadatapb.Visibility sources.Progress @@ -131,6 +132,7 @@ func (s *Source) Init(aCtx context.Context, name string, jobID, sourceID int64, s.repos = s.conn.Repositories s.orgs = s.conn.Organizations + s.ignoreRepos = s.conn.IgnoreRepos // Head or base should only be used with incoming webhooks if (len(s.conn.Head) > 0 || len(s.conn.Base) > 0) && len(s.repos) != 1 { @@ -600,6 +602,7 @@ func (s *Source) getReposByOrg(ctx context.Context, org string) ([]string, error PerPage: defaultPagination, }, } + var numRepos, numForks int for { someRepos, res, err := s.apiClient.Repositories.ListByOrg(ctx, org, opts) @@ -615,8 +618,13 @@ func (s *Source) getReposByOrg(ctx context.Context, org string) ([]string, error if len(someRepos) == 0 || res == nil { break } + s.log.Debugf("Listed repos for org %s page %d/%d", org, opts.Page, res.LastPage) for _, r := range someRepos { + if s.ignoreRepo(r.GetName()) { + continue + } + numRepos++ if r.GetFork() { numForks++ @@ -654,6 +662,7 @@ func (s *Source) getReposByUser(ctx context.Context, user string) ([]string, err PerPage: 50, }, } + for { someRepos, res, err := s.apiClient.Repositories.List(ctx, user, opts) if err == nil { @@ -668,8 +677,13 @@ func (s *Source) getReposByUser(ctx context.Context, user string) ([]string, err if res == nil { break } + s.log.Debugf("Listed repos for user %s page %d/%d", user, opts.Page, res.LastPage) for _, r := range someRepos { + if s.ignoreRepo(r.GetName()) { + continue + } + if r.GetFork() && !s.conn.IncludeForks { continue } @@ -683,6 +697,23 @@ func (s *Source) getReposByUser(ctx context.Context, user string) ([]string, err return repos, nil } +func (s *Source) ignoreRepo(r string) bool { + if stringInSlice(r, s.ignoreRepos) { + s.log.Debugf("ignoring repo %s", r) + return true + } + return false +} + +func stringInSlice(s string, l []string) bool { + for _, b := range l { + if b == s { + return true + } + } + return false +} + func (s *Source) getGistsByUser(ctx context.Context, user string) ([]string, error) { var gistURLs []string gistOpts := &github.GistListOptions{} diff --git a/pkg/sources/github/github_test.go b/pkg/sources/github/github_test.go index 41bd5f23b..a3dc968da 100644 --- a/pkg/sources/github/github_test.go +++ b/pkg/sources/github/github_test.go @@ -17,12 +17,13 @@ import ( "github.com/google/go-github/v42/github" "github.com/sirupsen/logrus" "github.com/stretchr/testify/assert" - "github.com/trufflesecurity/trufflehog/v3/pkg/context" - "github.com/trufflesecurity/trufflehog/v3/pkg/pb/credentialspb" - "github.com/trufflesecurity/trufflehog/v3/pkg/pb/sourcespb" "golang.org/x/sync/errgroup" "google.golang.org/protobuf/types/known/anypb" "gopkg.in/h2non/gock.v1" + + "github.com/trufflesecurity/trufflehog/v3/pkg/context" + "github.com/trufflesecurity/trufflehog/v3/pkg/pb/credentialspb" + "github.com/trufflesecurity/trufflehog/v3/pkg/pb/sourcespb" ) func createTestSource(src *sourcespb.GitHub) (*Source, *anypb.Any) { @@ -64,9 +65,13 @@ func TestAddReposByOrg(t *testing.T) { gock.New("https://api.github.com"). Get("/orgs/super-secret-org/repos"). Reply(200). - JSON([]map[string]string{{"clone_url": "super-secret-repo"}}) + JSON([]map[string]string{ + {"clone_url": "super-secret-repo", "name": "super-secret-repo"}, + {"clone_url": "super-secret-repo2", "name": "super-secret-repo2"}, + }) s := initTestSource(nil) + s.ignoreRepos = []string{"super-secret-repo2"} // gock works here because github.NewClient is using the default HTTP Transport err := s.addRepos(context.TODO(), "super-secret-org", s.getReposByOrg) assert.Nil(t, err) @@ -81,9 +86,13 @@ func TestAddReposByUser(t *testing.T) { gock.New("https://api.github.com"). Get("/users/super-secret-user/repos"). Reply(200). - JSON([]map[string]string{{"clone_url": "super-secret-repo"}}) + JSON([]map[string]string{ + {"clone_url": "super-secret-repo", "name": "super-secret-repo"}, + {"clone_url": "super-secret-repo2", "name": "super-secret-repo2"}, + }) s := initTestSource(nil) + s.ignoreRepos = []string{"super-secret-repo2"} err := s.addRepos(context.TODO(), "super-secret-user", s.getReposByUser) assert.Nil(t, err) assert.Equal(t, 1, len(s.repos))