Only scan gist comments or repo comments. (#1646)

This commit is contained in:
ahrav 2023-08-20 11:38:28 -07:00 committed by GitHub
parent 64dd49f9ce
commit d51e3b6d83
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
2 changed files with 60 additions and 36 deletions

View file

@ -988,46 +988,51 @@ func (s *Source) scanComments(ctx context.Context, repoPath string, chunksChan c
trimmedURL := removeURLAndSplit(repoURL.String())
if repoURL.Host == "gist.github.com" && s.includeGistComments {
s.log.Info("scanning github gist comments", "repository", repoPath)
// GitHub Gist URL.
var gistId string
if len(trimmedURL) == 2 {
// https://gist.github.com/<id>
gistId = trimmedURL[1]
} else if len(trimmedURL) == 3 {
// https://gist.github.com/<owner>/<id>
gistId = trimmedURL[2]
} else {
return fmt.Errorf("failed to parse Gist URL: '%s'", repoURL.String())
}
options := &github.ListOptions{
PerPage: defaultPagination,
Page: initialPage,
}
for {
comments, resp, err := s.apiClient.Gists.ListComments(ctx, gistId, options)
if s.handleRateLimit(err, resp) {
break
}
if err != nil {
return err
}
err = s.chunkGistComments(ctx, repoURL.String(), comments, chunksChan)
if err != nil {
return err
}
options.Page++
if len(comments) < options.PerPage {
break
}
}
return s.processGistComments(ctx, repoPath, trimmedURL, repoURL, chunksChan)
}
return s.processRepoComments(ctx, repoPath, trimmedURL, repoURL, chunksChan)
}
func (s *Source) processGistComments(ctx context.Context, repoPath string, trimmedURL []string, repoURL *url.URL, chunksChan chan *sources.Chunk) error {
s.log.Info("scanning github gist comments", "repository", repoPath)
// GitHub Gist URL.
gistID, err := extractGistID(trimmedURL)
if err != nil {
return err
}
options := &github.ListOptions{
PerPage: defaultPagination,
Page: initialPage,
}
for {
comments, resp, err := s.apiClient.Gists.ListComments(ctx, gistID, options)
if s.handleRateLimit(err, resp) {
break
}
if err != nil {
return err
}
if err = s.chunkGistComments(ctx, repoURL.String(), comments, chunksChan); err != nil {
return err
}
options.Page++
if len(comments) < options.PerPage {
break
}
}
return nil
}
func extractGistID(url []string) (string, error) {
if len(url) < 2 || len(url) > 3 {
return "", fmt.Errorf("failed to parse Gist URL: length of trimmedURL should be 2 or 3")
}
return url[len(url)-1], nil
}
// Note: these can't be consts because the address is needed when using with the GitHub library.
var (
// sortType defines the criteria for sorting comments.

View file

@ -743,3 +743,22 @@ func TestProcessRepoComments(t *testing.T) {
})
}
}
func TestGetGistID(t *testing.T) {
tests := []struct {
trimmedURL []string
expected string
err bool
}{
{[]string{"https://gist.github.com", "12345"}, "12345", false},
{[]string{"https://gist.github.com", "owner", "12345"}, "12345", false},
{[]string{"https://gist.github.com"}, "", true},
{[]string{"https://gist.github.com", "owner", "12345", "extra"}, "", true},
}
for _, tt := range tests {
got, err := extractGistID(tt.trimmedURL)
assert.Equal(t, tt.err, err != nil)
assert.Equal(t, tt.expected, got)
}
}