Log reasons for GitLab repo exclusion (#2875)

We have some evidence that some GitLab repos are getting incorrectly ignored, but it's not clear why this is happening, so this PR adds some more logging to the relevant code.
This commit is contained in:
Cody Rose 2024-05-23 09:08:36 -04:00 committed by GitHub
parent b9ea2f5d4e
commit f7214cfee3
No known key found for this signature in database
GPG key ID: B5690EEEBB952194

View file

@ -82,7 +82,7 @@ func (s *Source) JobID() sources.JobID {
}
// Init returns an initialized Gitlab source.
func (s *Source) Init(_ context.Context, name string, jobId sources.JobID, sourceId sources.SourceID, verify bool, connection *anypb.Any, concurrency int) error {
func (s *Source) Init(ctx context.Context, name string, jobId sources.JobID, sourceId sources.SourceID, verify bool, connection *anypb.Any, concurrency int) error {
s.name = name
s.sourceID = sourceId
s.jobID = jobId
@ -102,6 +102,7 @@ func (s *Source) Init(_ context.Context, name string, jobId sources.JobID, sourc
s.repos = conn.Repositories
s.ignoreRepos = conn.IgnoreRepos
ctx.Logger().V(3).Info("setting ignore repos patterns", "patterns", s.ignoreRepos)
switch cred := conn.GetCredential().(type) {
case *sourcespb.GitLab_Token:
@ -409,19 +410,28 @@ func (s *Source) getAllProjectRepos(
// Used to filter out duplicate projects.
processProjects := func(projList []*gitlab.Project) error {
for _, proj := range projList {
ctx := context.WithValues(ctx,
"project_id", proj.ID,
"project_name", proj.NameWithNamespace)
// Skip projects we've already seen.
if _, exists := uniqueProjects[proj.ID]; exists {
ctx.Logger().V(3).Info("skipping project", "reason", "ID already seen")
continue
}
// Skip projects configured to be ignored.
if ignoreRepo(proj.PathWithNamespace) {
ctx.Logger().V(3).Info("skipping project", "reason", "ignored in config")
continue
}
// Record that we've seen this project.
uniqueProjects[proj.ID] = proj
projectsWithNamespace = append(projectsWithNamespace, proj.NameWithNamespace)
// Report an error if we could not convert the project into a URL.
if _, err := url.Parse(proj.HTTPURLToRepo); err != nil {
ctx.Logger().V(3).Info("skipping project",
"reason", "URL parse failure",
"url", proj.HTTPURLToRepo,
"parse_error", err)
err = fmt.Errorf("could not parse url %q given by project: %w", proj.HTTPURLToRepo, err)
if err := reporter.UnitErr(ctx, err); err != nil {
return err
@ -431,6 +441,7 @@ func (s *Source) getAllProjectRepos(
// Report the unit.
unit := git.SourceUnit{Kind: git.UnitRepo, ID: proj.HTTPURLToRepo}
gitlabReposEnumerated.WithLabelValues(s.name).Inc()
projectsWithNamespace = append(projectsWithNamespace, proj.NameWithNamespace)
if err := reporter.UnitOk(ctx, unit); err != nil {
return err
}