mirror of
https://github.com/trufflesecurity/trufflehog.git
synced 2024-11-10 07:04:24 +00:00
Implement SourceUnitEnumChunker for GitLab (#2367)
* Implement SourceUnitEnumChunker for GitLab * Add GitLab engine integration test * Use a SliceReporter instead of checking for nil reporters * Use more generic VisitorReporter * Merge logic from getReposFromGitlab into getAllProjectRepos * Update integration test to have a lower bound Unfortunately, the GitLab integration test does not appear to be deterministic. Sometimes 36390 chunks are found, sometimes 36312, or even lower.
This commit is contained in:
parent
186cacc26d
commit
aace92b64d
3 changed files with 213 additions and 59 deletions
41
pkg/engine/gitlab_integration_test.go
Normal file
41
pkg/engine/gitlab_integration_test.go
Normal file
|
@ -0,0 +1,41 @@
|
|||
//go:build integration
|
||||
// +build integration
|
||||
|
||||
package engine
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"testing"
|
||||
|
||||
"github.com/stretchr/testify/assert"
|
||||
"github.com/trufflesecurity/trufflehog/v3/pkg/common"
|
||||
"github.com/trufflesecurity/trufflehog/v3/pkg/context"
|
||||
"github.com/trufflesecurity/trufflehog/v3/pkg/sources"
|
||||
)
|
||||
|
||||
func TestGitLab(t *testing.T) {
|
||||
// Run the scan.
|
||||
ctx := context.Background()
|
||||
e, err := Start(ctx,
|
||||
WithDetectors(DefaultDetectors()...),
|
||||
WithVerify(false),
|
||||
)
|
||||
assert.NoError(t, err)
|
||||
|
||||
secret, err := common.GetTestSecret(ctx)
|
||||
if err != nil {
|
||||
t.Fatal(fmt.Errorf("failed to access secret: %v", err))
|
||||
}
|
||||
err = e.ScanGitLab(ctx, sources.GitlabConfig{
|
||||
Token: secret.MustGetField("GITLAB_TOKEN"),
|
||||
})
|
||||
assert.NoError(t, err)
|
||||
|
||||
err = e.Finish(ctx)
|
||||
assert.NoError(t, err)
|
||||
|
||||
// Check the output provided by metrics.
|
||||
metrics := e.GetMetrics()
|
||||
assert.GreaterOrEqual(t, metrics.ChunksScanned, uint64(36312))
|
||||
assert.GreaterOrEqual(t, metrics.BytesScanned, uint64(91618854))
|
||||
}
|
|
@ -18,5 +18,24 @@ func (c ChanReporter) ChunkOk(ctx context.Context, chunk Chunk) error {
|
|||
|
||||
func (ChanReporter) ChunkErr(ctx context.Context, err error) error {
|
||||
ctx.Logger().Error(err, "error chunking")
|
||||
return nil
|
||||
return ctx.Err()
|
||||
}
|
||||
|
||||
var _ UnitReporter = (*VisitorReporter)(nil)
|
||||
|
||||
type VisitorReporter struct {
|
||||
VisitUnit func(context.Context, SourceUnit) error
|
||||
VisitErr func(context.Context, error) error
|
||||
}
|
||||
|
||||
func (v VisitorReporter) UnitOk(ctx context.Context, unit SourceUnit) error {
|
||||
return v.VisitUnit(ctx, unit)
|
||||
}
|
||||
|
||||
func (v VisitorReporter) UnitErr(ctx context.Context, err error) error {
|
||||
if v.VisitErr == nil {
|
||||
ctx.Logger().Error(err, "error enumerating")
|
||||
return ctx.Err()
|
||||
}
|
||||
return v.VisitErr(ctx, err)
|
||||
}
|
||||
|
|
|
@ -62,6 +62,7 @@ func (s *Source) WithCustomContentWriter() { s.useCustomContentWriter = true }
|
|||
var _ sources.Source = (*Source)(nil)
|
||||
var _ sources.SourceUnitUnmarshaller = (*Source)(nil)
|
||||
var _ sources.Validator = (*Source)(nil)
|
||||
var _ sources.SourceUnitEnumChunker = (*Source)(nil)
|
||||
|
||||
// Type returns the type of source.
|
||||
// It is used for matching source types in configuration and job input.
|
||||
|
@ -183,11 +184,15 @@ func (s *Source) Chunks(ctx context.Context, chunksChan chan *sources.Chunk, _ .
|
|||
ignoreRepo := buildIgnorer(s.ignoreRepos, func(err error, pattern string) {
|
||||
ctx.Logger().Error(err, "could not compile ignore repo glob", "glob", pattern)
|
||||
})
|
||||
gitlabRepos, err := s.getReposFromGitlab(ctx, apiClient, ignoreRepo)
|
||||
if err != nil {
|
||||
reporter := sources.VisitorReporter{
|
||||
VisitUnit: func(ctx context.Context, unit sources.SourceUnit) error {
|
||||
repos = append(repos, unit.SourceUnitID())
|
||||
return ctx.Err()
|
||||
},
|
||||
}
|
||||
if err := s.getAllProjectRepos(ctx, apiClient, ignoreRepo, reporter); err != nil {
|
||||
return err
|
||||
}
|
||||
repos = gitlabRepos
|
||||
}
|
||||
|
||||
s.repos = repos
|
||||
|
@ -246,19 +251,24 @@ func (s *Source) Validate(ctx context.Context) []error {
|
|||
errs = append(errs, fmt.Errorf("could not compile ignore repo pattern %q: %w", pattern, err))
|
||||
})
|
||||
|
||||
projects, err := s.getAllProjects(ctx, apiClient)
|
||||
if err != nil {
|
||||
// Query GitLab for the list of configured repos.
|
||||
var repos []string
|
||||
visitor := sources.VisitorReporter{
|
||||
VisitUnit: func(ctx context.Context, unit sources.SourceUnit) error {
|
||||
repos = append(repos, unit.SourceUnitID())
|
||||
return nil
|
||||
},
|
||||
}
|
||||
if err := s.getAllProjectRepos(ctx, apiClient, ignoreProject, visitor); err != nil {
|
||||
errs = append(errs, err)
|
||||
return errs
|
||||
}
|
||||
|
||||
for _, p := range projects {
|
||||
if !ignoreProject(p.PathWithNamespace) {
|
||||
return errs
|
||||
}
|
||||
if len(repos) == 0 {
|
||||
errs = append(errs, fmt.Errorf("ignore patterns excluded all projects"))
|
||||
}
|
||||
|
||||
return append(errs, fmt.Errorf("ignore patterns excluded all projects"))
|
||||
return errs
|
||||
}
|
||||
|
||||
func (s *Source) newClient() (*gitlab.Client, error) {
|
||||
|
@ -310,28 +320,54 @@ func (s *Source) basicAuthSuccessful(apiClient *gitlab.Client) bool {
|
|||
return false
|
||||
}
|
||||
|
||||
func (s *Source) getAllProjects(ctx context.Context, apiClient *gitlab.Client) ([]*gitlab.Project, error) {
|
||||
// getAllProjectRepos enumerates all GitLab projects using the provided API
|
||||
// client. The reporter is used to report the valid repository found for
|
||||
// projects that are not ignored.
|
||||
func (s *Source) getAllProjectRepos(
|
||||
ctx context.Context,
|
||||
apiClient *gitlab.Client,
|
||||
ignoreRepo func(string) bool,
|
||||
reporter sources.UnitReporter,
|
||||
) error {
|
||||
// Projects without repo will get user projects, groups projects, and subgroup projects.
|
||||
user, _, err := apiClient.Users.CurrentUser()
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("unable to authenticate using %s: %w", s.authMethod, err)
|
||||
return fmt.Errorf("unable to authenticate using %s: %w", s.authMethod, err)
|
||||
}
|
||||
|
||||
uniqueProjects := make(map[int]*gitlab.Project)
|
||||
var (
|
||||
projects []*gitlab.Project
|
||||
projectsWithNamespace []string
|
||||
)
|
||||
// Record the projectsWithNamespace for logging.
|
||||
var projectsWithNamespace []string
|
||||
|
||||
// Used to filter out duplicate projects.
|
||||
processProjects := func(projList []*gitlab.Project) {
|
||||
processProjects := func(projList []*gitlab.Project) error {
|
||||
for _, proj := range projList {
|
||||
if _, exists := uniqueProjects[proj.ID]; !exists {
|
||||
uniqueProjects[proj.ID] = proj
|
||||
projects = append(projects, proj)
|
||||
projectsWithNamespace = append(projectsWithNamespace, proj.NameWithNamespace)
|
||||
// Skip projects we've already seen.
|
||||
if _, exists := uniqueProjects[proj.ID]; exists {
|
||||
continue
|
||||
}
|
||||
// Skip projects configured to be ignored.
|
||||
if ignoreRepo(proj.PathWithNamespace) {
|
||||
continue
|
||||
}
|
||||
// Record that we've seen this project.
|
||||
uniqueProjects[proj.ID] = proj
|
||||
projectsWithNamespace = append(projectsWithNamespace, proj.NameWithNamespace)
|
||||
// Report an error if we could not convert the project into a URL.
|
||||
if _, err := url.Parse(proj.HTTPURLToRepo); err != nil {
|
||||
err = fmt.Errorf("could not parse url %q given by project: %w", proj.HTTPURLToRepo, err)
|
||||
if err := reporter.UnitErr(ctx, err); err != nil {
|
||||
return err
|
||||
}
|
||||
continue
|
||||
}
|
||||
// Report the unit.
|
||||
unit := sources.CommonSourceUnit{ID: proj.HTTPURLToRepo}
|
||||
if err := reporter.UnitOk(ctx, unit); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
const (
|
||||
|
@ -344,9 +380,15 @@ func (s *Source) getAllProjects(ctx context.Context, apiClient *gitlab.Client) (
|
|||
for {
|
||||
userProjects, res, err := apiClient.Projects.ListUserProjects(user.ID, projectQueryOptions)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("received error on listing user projects: %w", err)
|
||||
err = fmt.Errorf("received error on listing user projects: %w", err)
|
||||
if err := reporter.UnitErr(ctx, err); err != nil {
|
||||
return err
|
||||
}
|
||||
break
|
||||
}
|
||||
if err := processProjects(userProjects); err != nil {
|
||||
return err
|
||||
}
|
||||
processProjects(userProjects)
|
||||
projectQueryOptions.Page = res.NextPage
|
||||
if res.NextPage == 0 {
|
||||
break
|
||||
|
@ -368,7 +410,11 @@ func (s *Source) getAllProjects(ctx context.Context, apiClient *gitlab.Client) (
|
|||
for {
|
||||
groupList, res, err := apiClient.Groups.ListGroups(&listGroupsOptions)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("received error on listing groups, you probably don't have permissions to do that: %w", err)
|
||||
err = fmt.Errorf("received error on listing groups, you probably don't have permissions to do that: %w", err)
|
||||
if err := reporter.UnitErr(ctx, err); err != nil {
|
||||
return err
|
||||
}
|
||||
break
|
||||
}
|
||||
groups = append(groups, groupList...)
|
||||
listGroupsOptions.Page = res.NextPage
|
||||
|
@ -386,13 +432,18 @@ func (s *Source) getAllProjects(ctx context.Context, apiClient *gitlab.Client) (
|
|||
for {
|
||||
grpPrjs, res, err := apiClient.Groups.ListGroupProjects(group.ID, listGroupProjectOptions)
|
||||
if err != nil {
|
||||
ctx.Logger().Info("received error on listing group projects, you probably don't have permissions to do that",
|
||||
"group", group.FullPath,
|
||||
"error", err,
|
||||
err = fmt.Errorf(
|
||||
"received error on listing group projects for %q, you probably don't have permissions to do that: %w",
|
||||
group.FullPath, err,
|
||||
)
|
||||
if err := reporter.UnitErr(ctx, err); err != nil {
|
||||
return err
|
||||
}
|
||||
break
|
||||
}
|
||||
processProjects(grpPrjs)
|
||||
if err := processProjects(grpPrjs); err != nil {
|
||||
return err
|
||||
}
|
||||
listGroupProjectOptions.Page = res.NextPage
|
||||
if res.NextPage == 0 {
|
||||
break
|
||||
|
@ -400,38 +451,10 @@ func (s *Source) getAllProjects(ctx context.Context, apiClient *gitlab.Client) (
|
|||
}
|
||||
}
|
||||
|
||||
ctx.Logger().Info("Enumerated GitLab projects", "count", len(projects))
|
||||
ctx.Logger().Info("Enumerated GitLab projects", "count", len(projectsWithNamespace))
|
||||
ctx.Logger().V(2).Info("Enumerated GitLab projects", "projects", projectsWithNamespace)
|
||||
|
||||
return projects, nil
|
||||
}
|
||||
|
||||
func (s *Source) getReposFromGitlab(ctx context.Context, apiClient *gitlab.Client, ignoreRepo func(repo string) bool) ([]string, error) {
|
||||
projects, err := s.getAllProjects(ctx, apiClient)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("error getting all projects: %w", err)
|
||||
}
|
||||
|
||||
// Turn projects into URLs for Git cloner.
|
||||
var repos []string
|
||||
for _, prj := range projects {
|
||||
if ignoreRepo(prj.PathWithNamespace) {
|
||||
continue
|
||||
}
|
||||
|
||||
// Ensure the urls are valid before adding them to the repo list.
|
||||
_, err := url.Parse(prj.HTTPURLToRepo)
|
||||
if err != nil {
|
||||
ctx.Logger().Error(err, "could not parse url given by project", "project", prj.HTTPURLToRepo)
|
||||
continue
|
||||
}
|
||||
repos = append(repos, prj.HTTPURLToRepo)
|
||||
}
|
||||
if len(repos) == 0 {
|
||||
return nil, fmt.Errorf("unable to discover any repos")
|
||||
}
|
||||
|
||||
return repos, nil
|
||||
return nil
|
||||
}
|
||||
|
||||
func (s *Source) scanRepos(ctx context.Context, chunksChan chan *sources.Chunk) error {
|
||||
|
@ -564,3 +587,74 @@ func normalizeRepos(repos []string) ([]string, []error) {
|
|||
}
|
||||
return validRepos, errs
|
||||
}
|
||||
|
||||
// Enumerate reports all GitLab repositories to be scanned to the reporter. If
|
||||
// none are configured, it will find all repositories within all projects that
|
||||
// the configured user has access to, while respecting the configured ignore
|
||||
// rules.
|
||||
func (s *Source) Enumerate(ctx context.Context, reporter sources.UnitReporter) error {
|
||||
// Start client.
|
||||
apiClient, err := s.newClient()
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
// Get repos within target.
|
||||
repos, errs := normalizeRepos(s.repos)
|
||||
for _, repoErr := range errs {
|
||||
ctx.Logger().Info("error normalizing repo", "error", repoErr)
|
||||
if err := reporter.UnitErr(ctx, repoErr); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
|
||||
// End early if we had errors getting specified repos but none were validated.
|
||||
if len(errs) > 0 && len(repos) == 0 {
|
||||
return fmt.Errorf("all configured repos had validation issues")
|
||||
}
|
||||
|
||||
// Report all repos if specified.
|
||||
if len(repos) > 0 {
|
||||
for _, repo := range repos {
|
||||
unit := sources.CommonSourceUnit{ID: repo}
|
||||
if err := reporter.UnitOk(ctx, unit); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// Otherwise, enumerate all repos.
|
||||
ignoreRepo := buildIgnorer(s.ignoreRepos, func(err error, pattern string) {
|
||||
ctx.Logger().Error(err, "could not compile ignore repo glob", "glob", pattern)
|
||||
// TODO: Handle error returned from UnitErr.
|
||||
_ = reporter.UnitErr(ctx, fmt.Errorf("could not compile ignore repo glob: %w", err))
|
||||
})
|
||||
return s.getAllProjectRepos(ctx, apiClient, ignoreRepo, reporter)
|
||||
}
|
||||
|
||||
// ChunkUnit downloads and reports chunks for the given GitLab repository unit.
|
||||
func (s *Source) ChunkUnit(ctx context.Context, unit sources.SourceUnit, reporter sources.ChunkReporter) error {
|
||||
repoURL := unit.SourceUnitID()
|
||||
|
||||
var path string
|
||||
var repo *gogit.Repository
|
||||
var err error
|
||||
if s.authMethod == "UNAUTHENTICATED" {
|
||||
path, repo, err = git.CloneRepoUsingUnauthenticated(ctx, repoURL)
|
||||
} else {
|
||||
// If a username is not provided we need to use a default one in order to clone a private repo.
|
||||
// Not setting "placeholder" as s.user on purpose in case any downstream services rely on a "" value for s.user.
|
||||
user := s.user
|
||||
if user == "" {
|
||||
user = "placeholder"
|
||||
}
|
||||
path, repo, err = git.CloneRepoUsingToken(ctx, s.token, repoURL, user)
|
||||
}
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
defer os.RemoveAll(path)
|
||||
|
||||
return s.git.ScanRepo(ctx, repo, path, s.scanOptions, reporter)
|
||||
}
|
||||
|
|
Loading…
Reference in a new issue