From 9ebe39cd090ac49d4345ba5c8459328ecd146b91 Mon Sep 17 00:00:00 2001 From: Carlos Alexandro Becker Date: Tue, 9 Jul 2024 16:07:39 -0300 Subject: [PATCH] feat: improve gitlab/github readme url (#456) * Use GitHub API to find readme filename * Fix lint errors and typos * Bring back "tries to find" instead of "finds" * Rename `readmeURL` to `apiURL` * Don't close body * Use GitLab API to find readme filename * feat: improve gitlab/github readme url Signed-off-by: Carlos A Becker --------- Signed-off-by: Carlos A Becker Signed-off-by: Carlos Alexandro Becker Co-authored-by: danielwerg <35052399+danielwerg@users.noreply.github.com> --- github.go | 57 +++++++++++++++++++++----------------- gitlab.go | 60 +++++++++++++++++++++++----------------- main.go | 16 ++--------- url.go | 80 +++++++++++++++++++++++++++++++++++++++++++++++++++++ url_test.go | 29 +++++++++++++++++++ 5 files changed, 178 insertions(+), 64 deletions(-) create mode 100644 url.go create mode 100644 url_test.go diff --git a/github.go b/github.go index ac64096..6b169fa 100644 --- a/github.go +++ b/github.go @@ -1,50 +1,55 @@ package main import ( + "encoding/json" "errors" "fmt" + "io" "net/http" "net/url" "strings" ) -// isGitHubURL tests a string to determine if it is a well-structured GitHub URL. -func isGitHubURL(s string) (string, bool) { - if strings.HasPrefix(s, "github.com/") { - s = "https://" + s +// findGitHubREADME tries to find the correct README filename in a repository using GitHub API. +func findGitHubREADME(u *url.URL) (*source, error) { + owner, repo, ok := strings.Cut(strings.TrimPrefix(u.Path, "/"), "/") + if !ok { + return nil, fmt.Errorf("invalid url: %s", u.String()) } - u, err := url.ParseRequestURI(s) - if err != nil { - return "", false + type readme struct { + DownloadURL string `json:"download_url"` } - return u.String(), strings.ToLower(u.Host) == "github.com" -} + apiURL := fmt.Sprintf("https://api.%s/repos/%s/%s/readme", u.Hostname(), owner, repo) -// findGitHubREADME tries to find the correct README filename in a repository. -func findGitHubREADME(s string) (*source, error) { - u, err := url.ParseRequestURI(s) + // nolint:bodyclose + // it is closed on the caller + res, err := http.Get(apiURL) // nolint: gosec if err != nil { return nil, err } - u.Host = "raw.githubusercontent.com" - for _, b := range readmeBranches { - for _, r := range readmeNames { - v := *u - v.Path += fmt.Sprintf("/%s/%s", b, r) + body, err := io.ReadAll(res.Body) + if err != nil { + return nil, err + } - // nolint:bodyclose - // it is closed on the caller - resp, err := http.Get(v.String()) - if err != nil { - return nil, err - } + var result readme + if err := json.Unmarshal(body, &result); err != nil { + return nil, err + } - if resp.StatusCode == http.StatusOK { - return &source{resp.Body, v.String()}, nil - } + if res.StatusCode == http.StatusOK { + // nolint:bodyclose + // it is closed on the caller + resp, err := http.Get(result.DownloadURL) + if err != nil { + return nil, err + } + + if resp.StatusCode == http.StatusOK { + return &source{resp.Body, result.DownloadURL}, nil } } diff --git a/gitlab.go b/gitlab.go index 47059bb..05e1239 100644 --- a/gitlab.go +++ b/gitlab.go @@ -1,49 +1,59 @@ package main import ( + "encoding/json" "errors" "fmt" + "io" "net/http" "net/url" "strings" ) -// isGitLabURL tests a string to determine if it is a well-structured GitLab URL. -func isGitLabURL(s string) (string, bool) { - if strings.HasPrefix(s, "gitlab.com/") { - s = "https://" + s +// findGitLabREADME tries to find the correct README filename in a repository using GitLab API. +func findGitLabREADME(u *url.URL) (*source, error) { + owner, repo, ok := strings.Cut(strings.TrimPrefix(u.Path, "/"), "/") + if !ok { + return nil, fmt.Errorf("invalid url: %s", u.String()) } - u, err := url.ParseRequestURI(s) - if err != nil { - return "", false + projectPath := url.QueryEscape(owner + "/" + repo) + + type readme struct { + ReadmeURL string `json:"readme_url"` } - return u.String(), strings.ToLower(u.Host) == "gitlab.com" -} + apiURL := fmt.Sprintf("https://%s/api/v4/projects/%s", u.Hostname(), projectPath) -// findGitLabREADME tries to find the correct README filename in a repository. -func findGitLabREADME(s string) (*source, error) { - u, err := url.ParseRequestURI(s) + // nolint:bodyclose + // it is closed on the caller + res, err := http.Get(apiURL) // nolint: gosec if err != nil { return nil, err } - for _, b := range readmeBranches { - for _, r := range readmeNames { - v := *u - v.Path += fmt.Sprintf("/raw/%s/%s", b, r) + body, err := io.ReadAll(res.Body) + if err != nil { + return nil, err + } - // nolint:bodyclose - // it is closed on the caller - resp, err := http.Get(v.String()) - if err != nil { - return nil, err - } + var result readme + if err := json.Unmarshal(body, &result); err != nil { + return nil, err + } - if resp.StatusCode == http.StatusOK { - return &source{resp.Body, v.String()}, nil - } + readmeRawURL := strings.Replace(result.ReadmeURL, "blob", "raw", -1) + + if res.StatusCode == http.StatusOK { + // nolint:bodyclose + // it is closed on the caller + resp, err := http.Get(readmeRawURL) // nolint: gosec + if err != nil { + return nil, err + } + + if resp.StatusCode == http.StatusOK { + return &source{resp.Body, readmeRawURL}, nil } } diff --git a/main.go b/main.go index 16b0c05..832cb9a 100644 --- a/main.go +++ b/main.go @@ -72,19 +72,9 @@ func sourceFromArg(arg string) (*source, error) { } // a GitHub or GitLab URL (even without the protocol): - if u, ok := isGitHubURL(arg); ok { - src, err := findGitHubREADME(u) - if err != nil { - return nil, err - } - return src, nil - } - if u, ok := isGitLabURL(arg); ok { - src, err := findGitLabREADME(u) - if err != nil { - return nil, err - } - return src, nil + src, err := readmeURL(arg) + if src != nil || err != nil { + return src, err } // HTTP(S) URLs: diff --git a/url.go b/url.go new file mode 100644 index 0000000..43ca167 --- /dev/null +++ b/url.go @@ -0,0 +1,80 @@ +package main + +import ( + "net/url" + "strings" + "sync" +) + +const ( + protoGithub = "github://" + protoGitlab = "gitlab://" + protoHTTPS = "https://" +) + +var ( + githubURL *url.URL + gitlabURL *url.URL + urlsOnce sync.Once +) + +func init() { + urlsOnce.Do(func() { + githubURL, _ = url.Parse("https://github.com") + gitlabURL, _ = url.Parse("https://gitlab.com") + }) +} + +func readmeURL(path string) (*source, error) { + switch { + case strings.HasPrefix(path, protoGithub): + if u := githubReadmeURL(path); u != nil { + return readmeURL(u.String()) + } + return nil, nil + case strings.HasPrefix(path, protoGitlab): + if u := gitlabReadmeURL(path); u != nil { + return readmeURL(u.String()) + } + return nil, nil + } + + if !strings.HasPrefix(path, protoHTTPS) { + path = protoHTTPS + path + } + u, err := url.Parse(path) + if err != nil { + return nil, err + } + + switch { + case u.Hostname() == githubURL.Hostname(): + return findGitHubREADME(u) + case u.Hostname() == gitlabURL.Hostname(): + return findGitLabREADME(u) + } + + return nil, nil +} + +func githubReadmeURL(path string) *url.URL { + path = strings.TrimPrefix(path, protoGithub) + parts := strings.Split(path, "/") + if len(parts) != 2 { + // custom hostnames are not supported yet + return nil + } + u, _ := url.Parse(githubURL.String()) + return u.JoinPath(path) +} + +func gitlabReadmeURL(path string) *url.URL { + path = strings.TrimPrefix(path, protoGitlab) + parts := strings.Split(path, "/") + if len(parts) != 2 { + // custom hostnames are not supported yet + return nil + } + u, _ := url.Parse(gitlabURL.String()) + return u.JoinPath(path) +} diff --git a/url_test.go b/url_test.go new file mode 100644 index 0000000..f19e57f --- /dev/null +++ b/url_test.go @@ -0,0 +1,29 @@ +package main + +import "testing" + +func TestURLParser(t *testing.T) { + for path, url := range map[string]string{ + "github.com/charmbracelet/glow": "https://raw.githubusercontent.com/charmbracelet/glow/master/README.md", + "github://charmbracelet/glow": "https://raw.githubusercontent.com/charmbracelet/glow/master/README.md", + "github://caarlos0/dotfiles.fish": "https://raw.githubusercontent.com/caarlos0/dotfiles.fish/main/README.md", + "github://tj/git-extras": "https://raw.githubusercontent.com/tj/git-extras/main/Readme.md", + "https://github.com/goreleaser/nfpm": "https://raw.githubusercontent.com/goreleaser/nfpm/main/README.md", + "gitlab.com/caarlos0/test": "https://gitlab.com/caarlos0/test/-/raw/master/README.md", + "gitlab://caarlos0/test": "https://gitlab.com/caarlos0/test/-/raw/master/README.md", + "https://gitlab.com/terrakok/gitlab-client": "https://gitlab.com/terrakok/gitlab-client/-/raw/develop/Readme.md", + } { + t.Run(path, func(t *testing.T) { + got, err := readmeURL(path) + if err != nil { + t.Fatalf("expected no error, got %v", err) + } + if got == nil { + t.Fatalf("should not be nil") + } + if url != got.URL { + t.Errorf("expected url for %s to be %s, was %s", path, url, got.URL) + } + }) + } +}