Skip to content

Commit

Permalink
Add tests to cover GitTree resolver (sourcegraph#57671)
Browse files Browse the repository at this point in the history
I wanted to look into this resolver because it seemed to cause a lot of load on S2 and dotcom.
Looking at it, I realize that there are zero tests so I didn't even know what's expected to be returned here.
This PR adds a baseline for what should be returned and fixed a bunch of bugs:

- Improper argument validation that could cause panics
- Improperly counted results for `first` argument
- Removed arguments from GraphQL schema that weren't actually respected
- Fix line ending handling in content pagination to return indication if file ended with no newline
- Fix TotalLines for files without a final newline
- Improves performance of line parsing
- Fix ancestors option creating too many results by recursively including all children into the set again (was able to get 90000 entries returned from S2 for a relatively small folder structure)
- Make behavior of recursiveSingleChild match what's documented in GraphQL. Before we would only recurse if the looked at entry is a single child (or first:1 was set)

Ultimately, the dir traversal we do in here should IMO happen in gitserver directly where we already have it in tree structure, but that will have to be for later.
For now, this mostly adds some confidence to these resolvers.
  • Loading branch information
eseliger authored Oct 23, 2023
1 parent cb48b18 commit ababbb9
Show file tree
Hide file tree
Showing 7 changed files with 691 additions and 181 deletions.
7 changes: 1 addition & 6 deletions cmd/frontend/graphqlbackend/git_commit.go
Original file line number Diff line number Diff line change
Expand Up @@ -224,8 +224,7 @@ func (r *GitCommitResolver) ExternalURLs(ctx context.Context) ([]*externallink.R
}

func (r *GitCommitResolver) Tree(ctx context.Context, args *struct {
Path string
Recursive bool
Path string
}) (*GitTreeEntryResolver, error) {
treeEntry, err := r.path(ctx, args.Path, func(stat fs.FileInfo) error {
if !stat.Mode().IsDir() {
Expand All @@ -238,10 +237,6 @@ func (r *GitCommitResolver) Tree(ctx context.Context, args *struct {
return nil, err
}

// Note: args.Recursive is deprecated
if treeEntry != nil {
treeEntry.isRecursive = args.Recursive
}
return treeEntry, nil
}

Expand Down
17 changes: 8 additions & 9 deletions cmd/frontend/graphqlbackend/git_ref.go
Original file line number Diff line number Diff line change
Expand Up @@ -106,21 +106,20 @@ func unmarshalGitRefID(id graphql.ID) (repoID graphql.ID, rev string, err error)
return spec.Repository, spec.Rev, err
}

type GitTarget interface {
OID(context.Context) (GitObjectID, error)
AbbreviatedOID(context.Context) (string, error)
Commit(context.Context) (*GitCommitResolver, error)
Type(context.Context) (GitObjectType, error)
}

func (r *GitRefResolver) ID() graphql.ID { return marshalGitRefID(r.repo.ID(), r.name) }
func (r *GitRefResolver) Name() string { return r.name }
func (r *GitRefResolver) AbbrevName() string { return strings.TrimPrefix(r.name, gitRefPrefix(r.name)) }
func (r *GitRefResolver) DisplayName() string { return gitRefDisplayName(r.name) }
func (r *GitRefResolver) Prefix() string { return gitRefPrefix(r.name) }
func (r *GitRefResolver) Type() string { return gitRefType(r.name) }
func (r *GitRefResolver) Target() interface {
OID(context.Context) (GitObjectID, error)
//lint:ignore U1000 is used by graphql via reflection
AbbreviatedOID(context.Context) (string, error)
//lint:ignore U1000 is used by graphql via reflection
Commit(context.Context) (*GitCommitResolver, error)
//lint:ignore U1000 is used by graphql via reflection
Type(context.Context) (GitObjectType, error)
} {
func (r *GitRefResolver) Target() GitTarget {
if r.target != "" {
return &gitObject{repo: r.repo, oid: r.target, typ: GitObjectTypeCommit}
}
Expand Down
159 changes: 127 additions & 32 deletions cmd/frontend/graphqlbackend/git_tree.go
Original file line number Diff line number Diff line change
Expand Up @@ -4,12 +4,14 @@ import (
"context"
"io/fs"
"path"
"path/filepath"
"sort"
"strings"

"github.com/sourcegraph/sourcegraph/cmd/frontend/graphqlbackend/graphqlutil"
"github.com/sourcegraph/sourcegraph/internal/api"
"github.com/sourcegraph/sourcegraph/internal/trace"
"github.com/sourcegraph/sourcegraph/lib/errors"
)

func (r *GitTreeEntryResolver) IsRoot() bool {
Expand All @@ -22,6 +24,7 @@ type gitTreeEntryConnectionArgs struct {
Recursive bool
// If recurseSingleChild is true, we will return a flat list of every
// directory and file in a single-child nest.
// Only respected when Recursive is false.
RecursiveSingleChild bool
// If Ancestors is true and the tree is loaded from a subdirectory, we will
// return a flat list of all entries in all parent directories.
Expand All @@ -44,7 +47,19 @@ func (r *GitTreeEntryResolver) entries(ctx context.Context, args *gitTreeEntryCo
tr, ctx := trace.New(ctx, "GitTreeEntryResolver.entries")
defer tr.EndWithErr(&err)

entries, err := r.gitserverClient.ReadDir(ctx, r.commit.repoResolver.RepoName(), api.CommitID(r.commit.OID()), r.Path(), r.isRecursive || args.Recursive)
if args.First != nil && *args.First < 0 {
return nil, errors.Newf("invalid argument for first, must be non-negative")
}

if args.Recursive && args.RecursiveSingleChild {
// No extra work needed, recursive includes all these.
args.RecursiveSingleChild = false
}

// If RecursiveSingleChild is true, we also get all files recursively. Otherwise, we would
// have to do a readdir for every single directory to see if it has only one child (and nested)
// dirs, too.
entries, err := r.gitserverClient.ReadDir(ctx, r.commit.repoResolver.RepoName(), api.CommitID(r.commit.OID()), r.Path(), args.Recursive || args.RecursiveSingleChild)
if err != nil {
if strings.Contains(err.Error(), "file does not exist") { // TODO proper error value
// empty tree is not an error
Expand All @@ -53,57 +68,88 @@ func (r *GitTreeEntryResolver) entries(ctx context.Context, args *gitTreeEntryCo
}
}

sort.Sort(byDirectory(entries))
// If RecursiveSingleChild is true, we need to filter out non-single-childs
// again.
filteredEntries := entries
if args.RecursiveSingleChild {
// Reset filteredEntries.
filteredEntries = filteredEntries[:0]
// Convert the entries into a tree fs.
fs := entriesToFsTree(r.stat, entries)
// Keep all files in the current directory.
for _, entry := range entries {
if normalizePath(filepath.Dir(normalizePath(entry.Name()))) == normalizePath(r.Path()) {
filteredEntries = append(filteredEntries, entry)
}
}
// And now traverse all the children and check if there's at most 1 item
// in it.
var traverseFs func(*fsNode, int)
traverseFs = func(fs *fsNode, l int) {
if fs.IsDir() && len(fs.children) == 1 {
if l != 0 {
filteredEntries = append(filteredEntries, fs.node)
}
traverseFs(fs.children[0], l+1)
return
} else {
if !fs.IsDir() {
if l != 0 {
filteredEntries = append(filteredEntries, fs.node)
}
}
}
}
for _, c := range fs.children {
traverseFs(c, 0)
}
}

if args.First != nil && len(entries) > int(*args.First) {
entries = entries[:int(*args.First)]
maxResolvers := len(filteredEntries)
if args.First != nil && int(*args.First) < maxResolvers {
maxResolvers = int(*args.First)
}

l := make([]*GitTreeEntryResolver, 0, len(entries))
for _, entry := range entries {
// Apply any additional filtering
sort.Sort(byDirectory(filteredEntries))

resolvers := make([]*GitTreeEntryResolver, 0, maxResolvers)
seen := 0
for _, entry := range filteredEntries {
if seen == maxResolvers {
break
}

// Apply any additional filtering
if filter == nil || filter(entry) {
seen++
opts := GitTreeEntryResolverOpts{
Commit: r.Commit(),
Stat: entry,
}
l = append(l, NewGitTreeEntryResolver(r.db, r.gitserverClient, opts))
resolvers = append(resolvers, NewGitTreeEntryResolver(r.db, r.gitserverClient, opts))
}
}

// Update endLine filtering
hasSingleChild := len(l) == 1
for i := range l {
l[i].isSingleChild = &hasSingleChild
}

if !args.Recursive && args.RecursiveSingleChild && len(l) == 1 {
subEntries, err := l[0].entries(ctx, args, filter)
if err != nil {
return nil, err
}
l = append(l, subEntries...)
}

if args.Ancestors && !r.IsRoot() {
var parent *GitTreeEntryResolver
parent, err = r.parent(ctx)
p := r.Path()
p = strings.Trim(p, "/")
parent := NewGitTreeEntryResolver(r.db, r.gitserverClient, GitTreeEntryResolverOpts{
Commit: r.commit,
Stat: CreateFileInfo(filepath.Dir(p), true),
})
parentEntries, err := parent.entries(ctx, &gitTreeEntryConnectionArgs{
Ancestors: true,
}, nil)
if err != nil {
return nil, err
}
if parent != nil {
parentEntries, err := parent.Entries(ctx, args)
if err != nil {
return nil, err
}
l = append(parentEntries, l...)
}
resolvers = append(parentEntries, resolvers...)
}

return l, nil
return resolvers, nil
}

// byDirectory implements sort.Sortable and orders directories before files.
type byDirectory []fs.FileInfo

func (s byDirectory) Len() int {
Expand All @@ -125,3 +171,52 @@ func (s byDirectory) Less(i, j int) bool {

return s[i].Name() < s[j].Name()
}

type fsNode struct {
name string
node fs.FileInfo
children []*fsNode
}

func (n *fsNode) IsDir() bool {
return n.node.IsDir()
}

// entriesToFsTree takes a slice of fs.FileInfo entries and converts them into a
// tree structure like a file system. This makes it easy to traverse the tree
// for what would otherwise be a simple slice of strings basically.
func entriesToFsTree(root fs.FileInfo, entries []fs.FileInfo) *fsNode {
// Make sure we order by length, this means that dirs are always inserted before files.
sort.Slice(entries, func(i, j int) bool {
return len(entries[i].Name()) < len(entries[j].Name())
})
normRoot := normalizePath(root.Name())
tree := &fsNode{name: normRoot, node: root}
for _, entry := range entries {
path := normalizePath(entry.Name())
path = strings.TrimPrefix(path, normRoot)
segments := strings.Split(path, "/")
curTree := tree
for i, s := range segments {
if i == len(segments)-1 {
node := &fsNode{name: s, node: entry}
curTree.children = append(curTree.children, node)
} else {
for _, c := range curTree.children {
if c.name == s {
curTree = c
break
}
}
}
}
}
return tree
}

func normalizePath(path string) string {
if path == "." || path == "/" {
path = ""
}
return strings.Trim(path, "/")
}
Loading

0 comments on commit ababbb9

Please sign in to comment.