Skip to content

Commit

Permalink
Implement pagination support for GitHub repos (bazelbuild#241)
Browse files Browse the repository at this point in the history
Previously Bazelisk only read the first page of API responses.

Tested locally.
  • Loading branch information
fweikert authored May 21, 2021
1 parent 42672df commit 7af7115
Show file tree
Hide file tree
Showing 5 changed files with 112 additions and 35 deletions.
8 changes: 5 additions & 3 deletions bazelisk_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -14,10 +14,12 @@ func TestScanIssuesForIncompatibleFlags(t *testing.T) {
if err != nil {
t.Errorf("Can not load sample github issues")
}
flags, err := core.ScanIssuesForIncompatibleFlags(samplesJSON)
if err != nil || flags == nil {
t.Errorf("Could not parse sample issues")
issues, err := core.ParseIssues(samplesJSON)
if err != nil {
t.Errorf("Can not parse sample github issues: %v", err)
}

flags := core.ScanIssuesForIncompatibleFlags(issues)
expectedFlagnames := []string{
"--//some/path:incompatible_user_defined_flag",
"--incompatible_always_check_depset_elements",
Expand Down
49 changes: 37 additions & 12 deletions core/core.go
Original file line number Diff line number Diff line change
Expand Up @@ -441,7 +441,8 @@ type issue struct {
Labels []label `json:"labels"`
}

type issueList struct {
// IssueList is visible for testing
type IssueList struct {
Items []issue `json:"items"`
}

Expand All @@ -464,27 +465,51 @@ func getIncompatibleFlags(bazeliskHome, resolvedBazelVersion string) (map[string
return nil, fmt.Errorf("invalid version %v", resolvedBazelVersion)
}
url := "https://api.github.com/search/issues?per_page=100&q=repo:bazelbuild/bazel+label:migration-" + version
issuesJSON, err := httputil.MaybeDownload(bazeliskHome, url, "flags-"+version, "list of flags from GitHub", GetEnvOrConfig("BAZELISK_GITHUB_TOKEN"))

var issueList IssueList

merger := func(chunks [][]byte) ([]byte, error) {
for _, chunk := range chunks {
current, err := ParseIssues(chunk)
if err != nil {
return nil, err
}
issueList.Items = append(issueList.Items, current.Items...)
}
return json.Marshal(issueList)
}

issuesJSON, err := httputil.MaybeDownload(bazeliskHome, url, "flags-"+version, "list of flags from GitHub", GetEnvOrConfig("BAZELISK_GITHUB_TOKEN"), merger)
if err != nil {
return nil, fmt.Errorf("could not get issues from GitHub: %v", err)
}

result, err := ScanIssuesForIncompatibleFlags(issuesJSON)
return result, err
if len(issueList.Items) == 0 {
issueList, err = ParseIssues(issuesJSON)
if err != nil {
return nil, err
}
}

return ScanIssuesForIncompatibleFlags(issueList), nil
}

// ParseIssues is visible for testing
func ParseIssues(data []byte) (IssueList, error) {
var result IssueList
if err := json.Unmarshal(data, &result); err != nil {
return result, fmt.Errorf("could not parse JSON into list of issues: %v", err)
}
return result, nil
}

// ScanIssuesForIncompatibleFlags is visible for testing.
// TODO: move this function and its test into a dedicated package.
func ScanIssuesForIncompatibleFlags(issuesJSON []byte) (map[string]*FlagDetails, error) {
func ScanIssuesForIncompatibleFlags(issues IssueList) map[string]*FlagDetails {
result := make(map[string]*FlagDetails)
var issueList issueList
if err := json.Unmarshal(issuesJSON, &issueList); err != nil {
return nil, fmt.Errorf("could not parse JSON into list of issues: %v", err)
}

re := regexp.MustCompile(`^incompatible_\w+`)
sRe := regexp.MustCompile(`^//.*[^/]:incompatible_\w+`)
for _, issue := range issueList.Items {
for _, issue := range issues.Items {
flag := re.FindString(issue.Title)
if len(flag) <= 0 {
flag = sRe.FindString(issue.Title)
Expand All @@ -499,7 +524,7 @@ func ScanIssuesForIncompatibleFlags(issuesJSON []byte) (map[string]*FlagDetails,
}
}

return result, nil
return result
}

func getBreakingRelease(labels []label) string {
Expand Down
55 changes: 41 additions & 14 deletions httputil/httputil.go
Original file line number Diff line number Diff line change
Expand Up @@ -9,24 +9,26 @@ import (
"net/http"
"os"
"path/filepath"
"regexp"
"time"
)

var (
// DefaultTransport specifies the http.RoundTripper that is used for any network traffic, and may be replaced with a dummy implementation for unit testing.
DefaultTransport = http.DefaultTransport
linkPattern = regexp.MustCompile(`<(.*?)>; rel="(\w+)"`)
)

func getClient() *http.Client {
return &http.Client{Transport: DefaultTransport}
}

// ReadRemoteFile returns the contents of the given file, using the supplied Authorization token, if set.
func ReadRemoteFile(url string, token string) ([]byte, error) {
// ReadRemoteFile returns the contents of the given file, using the supplied Authorization token, if set. It also returns the HTTP headers.
func ReadRemoteFile(url string, token string) ([]byte, http.Header, error) {
client := getClient()
req, err := http.NewRequest("GET", url, nil)
if err != nil {
return nil, fmt.Errorf("could not create request: %v", err)
return nil, nil, fmt.Errorf("could not create request: %v", err)
}

if token != "" {
Expand All @@ -35,19 +37,19 @@ func ReadRemoteFile(url string, token string) ([]byte, error) {

res, err := client.Do(req)
if err != nil {
return nil, fmt.Errorf("could not fetch %s: %v", url, err)
return nil, nil, fmt.Errorf("could not fetch %s: %v", url, err)
}
defer res.Body.Close()

if res.StatusCode != 200 {
return nil, fmt.Errorf("unexpected status code while reading %s: %v", url, res.StatusCode)
return nil, res.Header, fmt.Errorf("unexpected status code while reading %s: %v", url, res.StatusCode)
}

body, err := ioutil.ReadAll(res.Body)
if err != nil {
return nil, fmt.Errorf("failed to read content at %s: %v", url, err)
return nil, res.Header, fmt.Errorf("failed to read content at %s: %v", url, err)
}
return body, nil
return body, res.Header, nil
}

// DownloadBinary downloads a file from the given URL into the specified location, marks it executable and returns its full path.
Expand Down Expand Up @@ -101,12 +103,13 @@ func DownloadBinary(originURL, destDir, destFile string) (string, error) {
return destinationPath, nil
}

type ContentMerger func([][]byte) ([]byte, error)

// MaybeDownload downloads a file from the given url and caches the result under bazeliskHome.
// It skips the download if the file already exists and is not outdated.
// Parameter ´description´ is only used to provide better error messages.
func MaybeDownload(bazeliskHome, url, filename, description, token string) ([]byte, error) {
func MaybeDownload(bazeliskHome, url, filename, description, token string, merger ContentMerger) ([]byte, error) {
cachePath := filepath.Join(bazeliskHome, filename)

if cacheStat, err := os.Stat(cachePath); err == nil {
if time.Since(cacheStat.ModTime()).Hours() < 1 {
res, err := ioutil.ReadFile(cachePath)
Expand All @@ -117,16 +120,40 @@ func MaybeDownload(bazeliskHome, url, filename, description, token string) ([]by
}
}

// We could also use go-github here, but I can't get it to build with Bazel's rules_go and it pulls in a lot of dependencies.
body, err := ReadRemoteFile(url, token)
contents := make([][]byte, 0)
nextUrl := url
for nextUrl != "" {
// We could also use go-github here, but I can't get it to build with Bazel's rules_go and it pulls in a lot of dependencies.
body, headers, err := ReadRemoteFile(nextUrl, token)
if err != nil {
return nil, fmt.Errorf("could not download %s: %v", description, err)
}
contents = append(contents, body)
nextUrl = getNextUrl(headers)
}

merged, err := merger(contents)
if err != nil {
return nil, fmt.Errorf("could not download %s: %v", description, err)
return nil, fmt.Errorf("failed to merge %d chunks from %s: %v", len(contents), url, err)
}

err = ioutil.WriteFile(cachePath, body, 0666)
err = ioutil.WriteFile(cachePath, merged, 0666)
if err != nil {
return nil, fmt.Errorf("could not create %s: %v", cachePath, err)
}

return body, nil
return merged, nil
}

func getNextUrl(headers http.Header) string {
links := headers["Link"]
if len(links) != 1 {
return ""
}
for _, m := range linkPattern.FindAllStringSubmatch(links[0], -1) {
if m[2] == "next" {
return m[1]
}
}
return ""
}
4 changes: 2 additions & 2 deletions repositories/gcs.go
Original file line number Diff line number Diff line change
Expand Up @@ -76,7 +76,7 @@ func listDirectoriesInReleaseBucket(prefix string) ([]string, bool, error) {
if nextPageToken != "" {
url = fmt.Sprintf("%s&pageToken=%s", baseURL, nextPageToken)
}
content, err := httputil.ReadRemoteFile(url, "")
content, _, err := httputil.ReadRemoteFile(url, "")
if err != nil {
return nil, false, fmt.Errorf("could not list GCS objects at %s: %v", url, err)
}
Expand Down Expand Up @@ -226,7 +226,7 @@ func (gcs *GCSRepo) DownloadCandidate(version, destDir, destFile string) (string
// it's https://buildkite.com/bazel/bazel-bazel
func (gcs *GCSRepo) GetLastGreenCommit(bazeliskHome string, downstreamGreen bool) (string, error) {
pathSuffix := lastGreenCommitPathSuffixes[downstreamGreen]
content, err := httputil.ReadRemoteFile(lastGreenBaseURL+pathSuffix, "")
content, _, err := httputil.ReadRemoteFile(lastGreenBaseURL+pathSuffix, "")
if err != nil {
return "", fmt.Errorf("could not determine last green commit: %v", err)
}
Expand Down
31 changes: 27 additions & 4 deletions repositories/github.go
Original file line number Diff line number Diff line change
Expand Up @@ -31,15 +31,38 @@ func (gh *GitHubRepo) GetVersions(bazeliskHome, bazelFork string) ([]string, err
}

func (gh *GitHubRepo) getFilteredVersions(bazeliskHome, bazelFork string, wantPrerelease bool) ([]string, error) {
parse := func(data []byte) ([]gitHubRelease, error) {
var releases []gitHubRelease
if err := json.Unmarshal(data, &releases); err != nil {
return nil, fmt.Errorf("could not parse JSON into list of releases: %v", err)
}
return releases, nil
}

var releases []gitHubRelease

merger := func(chunks [][]byte) ([]byte, error) {
for _, chunk := range chunks {
current, err := parse(chunk)
if err != nil {
return nil, err
}
releases = append(releases, current...)
}
return json.Marshal(releases)
}

url := fmt.Sprintf("https://api.github.com/repos/%s/bazel/releases", bazelFork)
releasesJSON, err := httputil.MaybeDownload(bazeliskHome, url, bazelFork+"-releases.json", "list of Bazel releases from github.com/"+bazelFork, gh.token)
releasesJSON, err := httputil.MaybeDownload(bazeliskHome, url, bazelFork+"-releases.json", "list of Bazel releases from github.com/"+bazelFork, gh.token, merger)
if err != nil {
return []string{}, fmt.Errorf("unable to dermine '%s' releases: %v", bazelFork, err)
}

var releases []gitHubRelease
if err := json.Unmarshal(releasesJSON, &releases); err != nil {
return []string{}, fmt.Errorf("could not parse JSON into list of releases: %v", err)
if len(releases) == 0 {
releases, err = parse(releasesJSON)
if err != nil {
return nil, err
}
}

var tags []string
Expand Down

0 comments on commit 7af7115

Please sign in to comment.