From 7af7115a134fcc1a16348739c3e635ef3bd29c71 Mon Sep 17 00:00:00 2001 From: Florian Weikert Date: Fri, 21 May 2021 14:39:10 +0200 Subject: [PATCH] Implement pagination support for GitHub repos (#241) Previously Bazelisk only read the first page of API responses. Tested locally. --- bazelisk_test.go | 8 +++--- core/core.go | 49 ++++++++++++++++++++++++++++--------- httputil/httputil.go | 55 +++++++++++++++++++++++++++++++----------- repositories/gcs.go | 4 +-- repositories/github.go | 31 +++++++++++++++++++++--- 5 files changed, 112 insertions(+), 35 deletions(-) diff --git a/bazelisk_test.go b/bazelisk_test.go index cb612b6a..63448f37 100644 --- a/bazelisk_test.go +++ b/bazelisk_test.go @@ -14,10 +14,12 @@ func TestScanIssuesForIncompatibleFlags(t *testing.T) { if err != nil { t.Errorf("Can not load sample github issues") } - flags, err := core.ScanIssuesForIncompatibleFlags(samplesJSON) - if err != nil || flags == nil { - t.Errorf("Could not parse sample issues") + issues, err := core.ParseIssues(samplesJSON) + if err != nil { + t.Errorf("Can not parse sample github issues: %v", err) } + + flags := core.ScanIssuesForIncompatibleFlags(issues) expectedFlagnames := []string{ "--//some/path:incompatible_user_defined_flag", "--incompatible_always_check_depset_elements", diff --git a/core/core.go b/core/core.go index 24b2e9ad..b910ba40 100644 --- a/core/core.go +++ b/core/core.go @@ -441,7 +441,8 @@ type issue struct { Labels []label `json:"labels"` } -type issueList struct { +// IssueList is visible for testing +type IssueList struct { Items []issue `json:"items"` } @@ -464,27 +465,51 @@ func getIncompatibleFlags(bazeliskHome, resolvedBazelVersion string) (map[string return nil, fmt.Errorf("invalid version %v", resolvedBazelVersion) } url := "https://api.github.com/search/issues?per_page=100&q=repo:bazelbuild/bazel+label:migration-" + version - issuesJSON, err := httputil.MaybeDownload(bazeliskHome, url, "flags-"+version, "list of flags from GitHub", GetEnvOrConfig("BAZELISK_GITHUB_TOKEN")) + + var issueList IssueList + + merger := func(chunks [][]byte) ([]byte, error) { + for _, chunk := range chunks { + current, err := ParseIssues(chunk) + if err != nil { + return nil, err + } + issueList.Items = append(issueList.Items, current.Items...) + } + return json.Marshal(issueList) + } + + issuesJSON, err := httputil.MaybeDownload(bazeliskHome, url, "flags-"+version, "list of flags from GitHub", GetEnvOrConfig("BAZELISK_GITHUB_TOKEN"), merger) if err != nil { return nil, fmt.Errorf("could not get issues from GitHub: %v", err) } - result, err := ScanIssuesForIncompatibleFlags(issuesJSON) - return result, err + if len(issueList.Items) == 0 { + issueList, err = ParseIssues(issuesJSON) + if err != nil { + return nil, err + } + } + + return ScanIssuesForIncompatibleFlags(issueList), nil +} + +// ParseIssues is visible for testing +func ParseIssues(data []byte) (IssueList, error) { + var result IssueList + if err := json.Unmarshal(data, &result); err != nil { + return result, fmt.Errorf("could not parse JSON into list of issues: %v", err) + } + return result, nil } // ScanIssuesForIncompatibleFlags is visible for testing. // TODO: move this function and its test into a dedicated package. -func ScanIssuesForIncompatibleFlags(issuesJSON []byte) (map[string]*FlagDetails, error) { +func ScanIssuesForIncompatibleFlags(issues IssueList) map[string]*FlagDetails { result := make(map[string]*FlagDetails) - var issueList issueList - if err := json.Unmarshal(issuesJSON, &issueList); err != nil { - return nil, fmt.Errorf("could not parse JSON into list of issues: %v", err) - } - re := regexp.MustCompile(`^incompatible_\w+`) sRe := regexp.MustCompile(`^//.*[^/]:incompatible_\w+`) - for _, issue := range issueList.Items { + for _, issue := range issues.Items { flag := re.FindString(issue.Title) if len(flag) <= 0 { flag = sRe.FindString(issue.Title) @@ -499,7 +524,7 @@ func ScanIssuesForIncompatibleFlags(issuesJSON []byte) (map[string]*FlagDetails, } } - return result, nil + return result } func getBreakingRelease(labels []label) string { diff --git a/httputil/httputil.go b/httputil/httputil.go index 5a2b09d9..d177f45f 100644 --- a/httputil/httputil.go +++ b/httputil/httputil.go @@ -9,24 +9,26 @@ import ( "net/http" "os" "path/filepath" + "regexp" "time" ) var ( // DefaultTransport specifies the http.RoundTripper that is used for any network traffic, and may be replaced with a dummy implementation for unit testing. DefaultTransport = http.DefaultTransport + linkPattern = regexp.MustCompile(`<(.*?)>; rel="(\w+)"`) ) func getClient() *http.Client { return &http.Client{Transport: DefaultTransport} } -// ReadRemoteFile returns the contents of the given file, using the supplied Authorization token, if set. -func ReadRemoteFile(url string, token string) ([]byte, error) { +// ReadRemoteFile returns the contents of the given file, using the supplied Authorization token, if set. It also returns the HTTP headers. +func ReadRemoteFile(url string, token string) ([]byte, http.Header, error) { client := getClient() req, err := http.NewRequest("GET", url, nil) if err != nil { - return nil, fmt.Errorf("could not create request: %v", err) + return nil, nil, fmt.Errorf("could not create request: %v", err) } if token != "" { @@ -35,19 +37,19 @@ func ReadRemoteFile(url string, token string) ([]byte, error) { res, err := client.Do(req) if err != nil { - return nil, fmt.Errorf("could not fetch %s: %v", url, err) + return nil, nil, fmt.Errorf("could not fetch %s: %v", url, err) } defer res.Body.Close() if res.StatusCode != 200 { - return nil, fmt.Errorf("unexpected status code while reading %s: %v", url, res.StatusCode) + return nil, res.Header, fmt.Errorf("unexpected status code while reading %s: %v", url, res.StatusCode) } body, err := ioutil.ReadAll(res.Body) if err != nil { - return nil, fmt.Errorf("failed to read content at %s: %v", url, err) + return nil, res.Header, fmt.Errorf("failed to read content at %s: %v", url, err) } - return body, nil + return body, res.Header, nil } // DownloadBinary downloads a file from the given URL into the specified location, marks it executable and returns its full path. @@ -101,12 +103,13 @@ func DownloadBinary(originURL, destDir, destFile string) (string, error) { return destinationPath, nil } +type ContentMerger func([][]byte) ([]byte, error) + // MaybeDownload downloads a file from the given url and caches the result under bazeliskHome. // It skips the download if the file already exists and is not outdated. // Parameter ´description´ is only used to provide better error messages. -func MaybeDownload(bazeliskHome, url, filename, description, token string) ([]byte, error) { +func MaybeDownload(bazeliskHome, url, filename, description, token string, merger ContentMerger) ([]byte, error) { cachePath := filepath.Join(bazeliskHome, filename) - if cacheStat, err := os.Stat(cachePath); err == nil { if time.Since(cacheStat.ModTime()).Hours() < 1 { res, err := ioutil.ReadFile(cachePath) @@ -117,16 +120,40 @@ func MaybeDownload(bazeliskHome, url, filename, description, token string) ([]by } } - // We could also use go-github here, but I can't get it to build with Bazel's rules_go and it pulls in a lot of dependencies. - body, err := ReadRemoteFile(url, token) + contents := make([][]byte, 0) + nextUrl := url + for nextUrl != "" { + // We could also use go-github here, but I can't get it to build with Bazel's rules_go and it pulls in a lot of dependencies. + body, headers, err := ReadRemoteFile(nextUrl, token) + if err != nil { + return nil, fmt.Errorf("could not download %s: %v", description, err) + } + contents = append(contents, body) + nextUrl = getNextUrl(headers) + } + + merged, err := merger(contents) if err != nil { - return nil, fmt.Errorf("could not download %s: %v", description, err) + return nil, fmt.Errorf("failed to merge %d chunks from %s: %v", len(contents), url, err) } - err = ioutil.WriteFile(cachePath, body, 0666) + err = ioutil.WriteFile(cachePath, merged, 0666) if err != nil { return nil, fmt.Errorf("could not create %s: %v", cachePath, err) } - return body, nil + return merged, nil +} + +func getNextUrl(headers http.Header) string { + links := headers["Link"] + if len(links) != 1 { + return "" + } + for _, m := range linkPattern.FindAllStringSubmatch(links[0], -1) { + if m[2] == "next" { + return m[1] + } + } + return "" } diff --git a/repositories/gcs.go b/repositories/gcs.go index 6f74b369..82e33ddc 100644 --- a/repositories/gcs.go +++ b/repositories/gcs.go @@ -76,7 +76,7 @@ func listDirectoriesInReleaseBucket(prefix string) ([]string, bool, error) { if nextPageToken != "" { url = fmt.Sprintf("%s&pageToken=%s", baseURL, nextPageToken) } - content, err := httputil.ReadRemoteFile(url, "") + content, _, err := httputil.ReadRemoteFile(url, "") if err != nil { return nil, false, fmt.Errorf("could not list GCS objects at %s: %v", url, err) } @@ -226,7 +226,7 @@ func (gcs *GCSRepo) DownloadCandidate(version, destDir, destFile string) (string // it's https://buildkite.com/bazel/bazel-bazel func (gcs *GCSRepo) GetLastGreenCommit(bazeliskHome string, downstreamGreen bool) (string, error) { pathSuffix := lastGreenCommitPathSuffixes[downstreamGreen] - content, err := httputil.ReadRemoteFile(lastGreenBaseURL+pathSuffix, "") + content, _, err := httputil.ReadRemoteFile(lastGreenBaseURL+pathSuffix, "") if err != nil { return "", fmt.Errorf("could not determine last green commit: %v", err) } diff --git a/repositories/github.go b/repositories/github.go index 4fab3519..e3134cc4 100644 --- a/repositories/github.go +++ b/repositories/github.go @@ -31,15 +31,38 @@ func (gh *GitHubRepo) GetVersions(bazeliskHome, bazelFork string) ([]string, err } func (gh *GitHubRepo) getFilteredVersions(bazeliskHome, bazelFork string, wantPrerelease bool) ([]string, error) { + parse := func(data []byte) ([]gitHubRelease, error) { + var releases []gitHubRelease + if err := json.Unmarshal(data, &releases); err != nil { + return nil, fmt.Errorf("could not parse JSON into list of releases: %v", err) + } + return releases, nil + } + + var releases []gitHubRelease + + merger := func(chunks [][]byte) ([]byte, error) { + for _, chunk := range chunks { + current, err := parse(chunk) + if err != nil { + return nil, err + } + releases = append(releases, current...) + } + return json.Marshal(releases) + } + url := fmt.Sprintf("https://api.github.com/repos/%s/bazel/releases", bazelFork) - releasesJSON, err := httputil.MaybeDownload(bazeliskHome, url, bazelFork+"-releases.json", "list of Bazel releases from github.com/"+bazelFork, gh.token) + releasesJSON, err := httputil.MaybeDownload(bazeliskHome, url, bazelFork+"-releases.json", "list of Bazel releases from github.com/"+bazelFork, gh.token, merger) if err != nil { return []string{}, fmt.Errorf("unable to dermine '%s' releases: %v", bazelFork, err) } - var releases []gitHubRelease - if err := json.Unmarshal(releasesJSON, &releases); err != nil { - return []string{}, fmt.Errorf("could not parse JSON into list of releases: %v", err) + if len(releases) == 0 { + releases, err = parse(releasesJSON) + if err != nil { + return nil, err + } } var tags []string