Skip to content

Commit

Permalink
Use pkg.in/src-d/go-git.v4/ for Git indexing
Browse files Browse the repository at this point in the history
This library is written in pure Go, so:

* it simplifies compilation and deployment

* no more memory leaks due forgotten Free() calls

* potential for lower memory use in large superprojects, since
  Repository objects don't have to be Free()d individually.

This comes at the cost of a 30% end-to-end slowdown indexing the
Gerrit repo.

Change-Id: Id0e51e5cf9e6bc4cfe1bdbff0adb381a3814fab8
  • Loading branch information
hanwen committed Sep 6, 2017
1 parent 796ef0e commit fffb84d
Show file tree
Hide file tree
Showing 7 changed files with 206 additions and 192 deletions.
3 changes: 0 additions & 3 deletions cmd/zoekt-git-index/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -67,9 +67,6 @@ func main() {

gitRepos := map[string]string{}
for _, repoDir := range flag.Args() {
if _, err := os.Lstat(filepath.Join(repoDir, ".git")); err == nil {
repoDir = filepath.Join(repoDir, ".git")
}
repoDir, err := filepath.Abs(repoDir)
if err != nil {
log.Fatal(err)
Expand Down
55 changes: 41 additions & 14 deletions cmd/zoekt-repo-index/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,7 @@ import (
"crypto/sha1"
"flag"
"fmt"
"io/ioutil"
"log"
"net/url"
"path"
Expand All @@ -44,7 +45,9 @@ import (
"github.com/google/zoekt"
"github.com/google/zoekt/build"
"github.com/google/zoekt/gitindex"
git "github.com/libgit2/git2go"

git "gopkg.in/src-d/go-git.v4"
"gopkg.in/src-d/go-git.v4/plumbing"
)

var _ = log.Println
Expand All @@ -67,6 +70,7 @@ func parseBranches(manifestRepoURL, revPrefix string, cache *gitindex.RepoCache,
if err != nil {
return nil, err
}

for _, f := range args {
fs := strings.SplitN(f, ":", 2)
if len(fs) != 2 {
Expand All @@ -81,7 +85,7 @@ func parseBranches(manifestRepoURL, revPrefix string, cache *gitindex.RepoCache,
branch: fs[0],
file: fs[1],
mf: mf,
manifestPath: repo.Path(),
manifestPath: cache.Path(u),
})
}
} else {
Expand Down Expand Up @@ -169,7 +173,7 @@ func main() {
opts.SubRepositories = map[string]*zoekt.Repository{}

// branch => repo => version
versionMap := map[string]map[string]git.Oid{}
versionMap := map[string]map[string]plumbing.Hash{}
for _, br := range branches {
br.mf.Filter()
files, versions, err := iterateManifest(br.mf, *baseURL, *revPrefix, repoCache)
Expand Down Expand Up @@ -276,24 +280,46 @@ func main() {

// getManifest parses the manifest XML at the given branch/path inside a Git repository.
func getManifest(repo *git.Repository, branch, path string) (*manifest.Manifest, error) {
obj, err := repo.RevparseSingle(branch + ":" + path)
ref, err := repo.Reference(plumbing.ReferenceName("refs/heads/"+branch), true)
if err != nil {
return nil, err
}

commit, err := repo.CommitObject(ref.Hash())
if err != nil {
return nil, err
}

tree, err := repo.TreeObject(commit.TreeHash)
if err != nil {
return nil, err
}

entry, err := tree.FindEntry(path)
if err != nil {
return nil, err
}
defer obj.Free()
blob, err := obj.AsBlob()

blob, err := repo.BlobObject(entry.Hash)
if err != nil {
return nil, err
}
return manifest.Parse(blob.Contents())
r, err := blob.Reader()
if err != nil {
return nil, err
}
defer r.Close()

content, err := ioutil.ReadAll(r)
return manifest.Parse(content)
}

// iterateManifest constructs a complete tree from the given Manifest.
func iterateManifest(mf *manifest.Manifest,
baseURL url.URL, revPrefix string,
cache *gitindex.RepoCache) (map[gitindex.FileKey]gitindex.BlobLocation, map[string]git.Oid, error) {
cache *gitindex.RepoCache) (map[gitindex.FileKey]gitindex.BlobLocation, map[string]plumbing.Hash, error) {
allFiles := map[gitindex.FileKey]gitindex.BlobLocation{}
allVersions := map[string]git.Oid{}
allVersions := map[string]plumbing.Hash{}
for _, p := range mf.Project {
rev := mf.ProjectRevision(&p)

Expand All @@ -305,24 +331,25 @@ func iterateManifest(mf *manifest.Manifest,
return nil, nil, err
}

obj, err := topRepo.RevparseSingle(revPrefix + rev)
ref, err := topRepo.Reference(plumbing.ReferenceName(revPrefix+rev), true)
if err != nil {
return nil, nil, err
}
defer obj.Free()

commit, err := obj.AsCommit()
commit, err := topRepo.CommitObject(ref.Hash())
if err != nil {
return nil, nil, err
}
if err != nil {
return nil, nil, err
}

allVersions[p.GetPath()] = *commit.Id()
allVersions[p.GetPath()] = commit.Hash

tree, err := commit.Tree()
if err != nil {
return nil, nil, err
}
defer tree.Free()

files, versions, err := gitindex.TreeToFiles(topRepo, tree, projURL.String(), cache)
if err != nil {
Expand Down
16 changes: 9 additions & 7 deletions gitindex/cache.go
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@ import (
"strings"
"sync"

"github.com/libgit2/git2go"
git "gopkg.in/src-d/go-git.v4"
)

type RepoCache struct {
Expand All @@ -40,9 +40,6 @@ func NewRepoCache(dir string) *RepoCache {
func (rc *RepoCache) Close() {
rc.reposMu.Lock()
defer rc.reposMu.Unlock()
for _, v := range rc.repos {
v.Free()
}
}

func repoKey(u *url.URL) string {
Expand All @@ -59,21 +56,26 @@ func Path(baseDir string, u *url.URL) string {
return filepath.Join(baseDir, key)
}

func (rc *RepoCache) Path(u *url.URL) string {
key := repoKey(u)
return filepath.Join(rc.baseDir, key)
}

// Open opens a git repository. The cache retains a pointer to the
// repository, so it cannot be freed.
func (rc *RepoCache) Open(u *url.URL) (*git.Repository, error) {
key := repoKey(u)
dir := filepath.Join(rc.baseDir, key)

dir := rc.Path(u)
rc.reposMu.Lock()
defer rc.reposMu.Unlock()

key := repoKey(u)
r := rc.repos[key]
if r != nil {
return r, nil
}

repo, err := git.OpenRepository(dir)
repo, err := git.PlainOpen(dir)
if err == nil {
rc.repos[key] = repo
}
Expand Down
Loading

0 comments on commit fffb84d

Please sign in to comment.