Skip to content
This repository has been archived by the owner on May 6, 2021. It is now read-only.

Commit

Permalink
Merge "Speed up globs with sharding"
Browse files Browse the repository at this point in the history
  • Loading branch information
colincross authored and Gerrit Code Review committed Apr 20, 2021
2 parents d04dcc8 + 2523698 commit 9021eef
Show file tree
Hide file tree
Showing 6 changed files with 245 additions and 114 deletions.
81 changes: 68 additions & 13 deletions bootstrap/bpglob/bpglob.go
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@ import (
"strconv"
"time"

"github.com/google/blueprint/deptools"
"github.com/google/blueprint/pathtools"
)

Expand All @@ -38,13 +39,14 @@ var (

out = flagSet.String("o", "", "file to write list of files that match glob")

excludes multiArg
versionMatch versionArg
globs []globArg
)

func init() {
flagSet.Var(&versionMatch, "v", "version number the command line was generated for")
flagSet.Var(&excludes, "e", "pattern to exclude from results")
flagSet.Var((*patternsArgs)(&globs), "p", "pattern to include in results")
flagSet.Var((*excludeArgs)(&globs), "e", "pattern to exclude from results from the most recent pattern")
}

// bpglob is executed through the rules in build-globs.ninja to determine whether soong_build
Expand Down Expand Up @@ -90,23 +92,42 @@ func (v *versionArg) Set(s string) error {
return nil
}

type multiArg []string

func (m *multiArg) String() string {
return `""`
// A glob arg holds a single -p argument with zero or more following -e arguments.
type globArg struct {
pattern string
excludes []string
}

func (m *multiArg) Set(s string) error {
*m = append(*m, s)
// patternsArgs implements flag.Value to handle -p arguments by adding a new globArg to the list.
type patternsArgs []globArg

func (p *patternsArgs) String() string { return `""` }

func (p *patternsArgs) Set(s string) error {
globs = append(globs, globArg{
pattern: s,
})
return nil
}

func (m *multiArg) Get() interface{} {
return m
// excludeArgs implements flag.Value to handle -e arguments by adding to the last globArg in the
// list.
type excludeArgs []globArg

func (e *excludeArgs) String() string { return `""` }

func (e *excludeArgs) Set(s string) error {
if len(*e) == 0 {
return fmt.Errorf("-p argument is required before the first -e argument")
}

glob := &(*e)[len(*e)-1]
glob.excludes = append(glob.excludes, s)
return nil
}

func usage() {
fmt.Fprintln(os.Stderr, "usage: bpglob -o out -v version [-e excludes ...] glob")
fmt.Fprintln(os.Stderr, "usage: bpglob -o out -v version -p glob [-e excludes ...] [-p glob ...]")
flagSet.PrintDefaults()
os.Exit(2)
}
Expand Down Expand Up @@ -143,11 +164,11 @@ func main() {
usage()
}

if flagSet.NArg() != 1 {
if flagSet.NArg() > 0 {
usage()
}

_, err = pathtools.GlobWithDepFile(flagSet.Arg(0), *out, *out+".d", excludes)
err = globsWithDepFile(*out, *out+".d", globs)
if err != nil {
// Globs here were already run in the primary builder without error. The only errors here should be if the glob
// pattern was made invalid by a change in the pathtools glob implementation, in which case the primary builder
Expand All @@ -167,3 +188,37 @@ func writeErrorOutput(path string, globErr error) {
os.Exit(1)
}
}

// globsWithDepFile finds all files and directories that match glob. Directories
// will have a trailing '/'. It compares the list of matches against the
// contents of fileListFile, and rewrites fileListFile if it has changed. It
// also writes all of the directories it traversed as dependencies on fileListFile
// to depFile.
//
// The format of glob is either path/*.ext for a single directory glob, or
// path/**/*.ext for a recursive glob.
func globsWithDepFile(fileListFile, depFile string, globs []globArg) error {
var results pathtools.MultipleGlobResults
for _, glob := range globs {
result, err := pathtools.Glob(glob.pattern, glob.excludes, pathtools.FollowSymlinks)
if err != nil {
return err
}
results = append(results, result)
}

// Only write the output file if it has changed.
err := pathtools.WriteFileIfChanged(fileListFile, results.FileList(), 0666)
if err != nil {
return fmt.Errorf("failed to write file list to %q: %w", fileListFile, err)
}

// The depfile can be written unconditionally as its timestamp doesn't affect ninja's restat
// feature.
err = deptools.WriteDepFile(depFile, fileListFile, results.Deps())
if err != nil {
return fmt.Errorf("failed to write dep file to %q: %w", depFile, err)
}

return nil
}
4 changes: 2 additions & 2 deletions bootstrap/command.go
Original file line number Diff line number Diff line change
Expand Up @@ -224,7 +224,7 @@ func RunBlueprint(args Args, ctx *blueprint.Context, config interface{}) []strin
ctx.RegisterModuleType("blueprint_go_binary", newGoBinaryModuleFactory(bootstrapConfig, true))
ctx.RegisterSingletonType("bootstrap", newSingletonFactory(bootstrapConfig))

ctx.RegisterSingletonType("glob", globSingletonFactory(ctx))
ctx.RegisterSingletonType("glob", globSingletonFactory(bootstrapConfig, ctx))

blueprintFiles, errs := ctx.ParseFileList(filepath.Dir(args.TopFile), filesToParse, config)
if len(errs) > 0 {
Expand Down Expand Up @@ -289,7 +289,7 @@ func RunBlueprint(args Args, ctx *blueprint.Context, config interface{}) []strin
}

if args.GlobFile != "" {
buffer, errs := generateGlobNinjaFile(config, ctx.Globs)
buffer, errs := generateGlobNinjaFile(bootstrapConfig, config, ctx.Globs)
if len(errs) > 0 {
fatalErrors(errs)
}
Expand Down
121 changes: 109 additions & 12 deletions bootstrap/glob.go
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,11 @@ package bootstrap
import (
"bytes"
"fmt"
"hash/fnv"
"io"
"path/filepath"
"strconv"
"strings"

"github.com/google/blueprint"
"github.com/google/blueprint/pathtools"
Expand Down Expand Up @@ -45,20 +49,21 @@ var (
// and writes it to $out if it has changed, and writes the directories to $out.d
GlobRule = pctx.StaticRule("GlobRule",
blueprint.RuleParams{
Command: fmt.Sprintf(`%s -o $out -v %d $excludes "$glob"`,
Command: fmt.Sprintf(`%s -o $out -v %d $args`,
globCmd, pathtools.BPGlobArgumentVersion),
CommandDeps: []string{globCmd},
Description: "glob $glob",
Description: "glob",

Restat: true,
Deps: blueprint.DepsGCC,
Depfile: "$out.d",
},
"glob", "excludes")
"args")
)

// GlobFileContext is the subset of ModuleContext and SingletonContext needed by GlobFile
type GlobFileContext interface {
Config() interface{}
Build(pctx blueprint.PackageContext, params blueprint.BuildParams)
}

Expand All @@ -67,13 +72,48 @@ type GlobFileContext interface {
// appropriate dependencies to regenerate the file if and only if the list of matching files has
// changed.
func GlobFile(ctx GlobFileContext, pattern string, excludes []string, fileListFile string) {
args := `-p "` + pattern + `"`
if len(excludes) > 0 {
args += " " + joinWithPrefixAndQuote(excludes, "-e ")
}
ctx.Build(pctx, blueprint.BuildParams{
Rule: GlobRule,
Outputs: []string{fileListFile},
Args: map[string]string{
"glob": pattern,
"excludes": joinWithPrefixAndQuote(excludes, "-e "),
"args": args,
},
Description: "glob " + pattern,
})
}

// multipleGlobFilesRule creates a rule to write to fileListFile a list of the files that match the specified
// pattern but do not match any of the patterns specified in excludes. The file will include
// appropriate dependencies to regenerate the file if and only if the list of matching files has
// changed.
func multipleGlobFilesRule(ctx GlobFileContext, fileListFile string, shard int, globs pathtools.MultipleGlobResults) {
args := strings.Builder{}

for i, glob := range globs {
if i != 0 {
args.WriteString(" ")
}
args.WriteString(`-p "`)
args.WriteString(glob.Pattern)
args.WriteString(`"`)
for _, exclude := range glob.Excludes {
args.WriteString(` -e "`)
args.WriteString(exclude)
args.WriteString(`"`)
}
}

ctx.Build(pctx, blueprint.BuildParams{
Rule: GlobRule,
Outputs: []string{fileListFile},
Args: map[string]string{
"args": args.String(),
},
Description: fmt.Sprintf("regenerate globs shard %d of %d", shard, numGlobBuckets),
})
}

Expand Down Expand Up @@ -108,47 +148,70 @@ func joinWithPrefixAndQuote(strs []string, prefix string) string {
// re-evaluate them whenever the contents of the searched directories change, and retrigger the
// primary builder if the results change.
type globSingleton struct {
globLister func() []blueprint.GlobPath
config *Config
globLister func() pathtools.MultipleGlobResults
writeRule bool
}

func globSingletonFactory(ctx *blueprint.Context) func() blueprint.Singleton {
func globSingletonFactory(config *Config, ctx *blueprint.Context) func() blueprint.Singleton {
return func() blueprint.Singleton {
return &globSingleton{
config: config,
globLister: ctx.Globs,
}
}
}

func (s *globSingleton) GenerateBuildActions(ctx blueprint.SingletonContext) {
// Sort the list of globs into buckets. A hash function is used instead of sharding so that
// adding a new glob doesn't force rerunning all the buckets by shifting them all by 1.
globBuckets := make([]pathtools.MultipleGlobResults, numGlobBuckets)
for _, g := range s.globLister() {
fileListFile := g.FileListFile(ctx.Config().(BootstrapConfig).BuildDir())
bucket := globToBucket(g)
globBuckets[bucket] = append(globBuckets[bucket], g)
}

// The directory for the intermediates needs to be different for bootstrap and the primary
// builder.
globsDir := globsDir(ctx.Config().(BootstrapConfig), s.config.stage)

for i, globs := range globBuckets {
fileListFile := filepath.Join(globsDir, strconv.Itoa(i))

if s.writeRule {
// Called from generateGlobNinjaFile. Write out the file list to disk, and add a ninja
// rule to run bpglob if any of the dependencies (usually directories that contain
// globbed files) have changed. The file list produced by bpglob should match exactly
// with the file written here so that restat can prevent rerunning the primary builder.
//
// We need to write the file list here so that it has an older modified date
// than the build.ninja (otherwise we'd run the primary builder twice on
// every new glob)
//
// We don't need to write the depfile because we're guaranteed that ninja
// will run the command at least once (to record it into the ninja_log), so
// the depfile will be loaded from that execution.
err := pathtools.WriteFileIfChanged(absolutePath(fileListFile), g.FileList(), 0666)
err := pathtools.WriteFileIfChanged(absolutePath(fileListFile), globs.FileList(), 0666)
if err != nil {
panic(fmt.Errorf("error writing %s: %s", fileListFile, err))
}

GlobFile(ctx, g.Pattern, g.Excludes, fileListFile)
// Write out the ninja rule to run bpglob.
multipleGlobFilesRule(ctx, fileListFile, i, globs)
} else {
// Make build.ninja depend on the fileListFile
// Called from the main Context, make build.ninja depend on the fileListFile.
ctx.AddNinjaFileDeps(fileListFile)
}
}
}

func generateGlobNinjaFile(config interface{}, globLister func() []blueprint.GlobPath) ([]byte, []error) {
func generateGlobNinjaFile(bootstrapConfig *Config, config interface{},
globLister func() pathtools.MultipleGlobResults) ([]byte, []error) {

ctx := blueprint.NewContext()
ctx.RegisterSingletonType("glob", func() blueprint.Singleton {
return &globSingleton{
config: bootstrapConfig,
globLister: globLister,
writeRule: true,
}
Expand Down Expand Up @@ -178,3 +241,37 @@ func generateGlobNinjaFile(config interface{}, globLister func() []blueprint.Glo

return buf.Bytes(), nil
}

// globsDir returns a different directory to store glob intermediates for the bootstrap and
// primary builder executions.
func globsDir(config BootstrapConfig, stage Stage) string {
buildDir := config.BuildDir()
if stage == StageMain {
return filepath.Join(buildDir, mainSubDir, "globs")
} else {
return filepath.Join(buildDir, bootstrapSubDir, "globs")
}
}

// GlobFileListFiles returns the list of sharded glob file list files for the main stage.
func GlobFileListFiles(config BootstrapConfig) []string {
globsDir := globsDir(config, StageMain)
var fileListFiles []string
for i := 0; i < numGlobBuckets; i++ {
fileListFiles = append(fileListFiles, filepath.Join(globsDir, strconv.Itoa(i)))
}
return fileListFiles
}

const numGlobBuckets = 1024

// globToBucket converts a pathtools.GlobResult into a hashed bucket number in the range
// [0, numGlobBuckets).
func globToBucket(g pathtools.GlobResult) int {
hash := fnv.New32a()
io.WriteString(hash, g.Pattern)
for _, e := range g.Excludes {
io.WriteString(hash, e)
}
return int(hash.Sum32() % numGlobBuckets)
}
4 changes: 2 additions & 2 deletions context.go
Original file line number Diff line number Diff line change
Expand Up @@ -114,7 +114,7 @@ type Context struct {
// cache deps modified to determine whether cachedSortedModuleGroups needs to be recalculated
cachedDepsModified bool

globs map[string]GlobPath
globs map[globKey]pathtools.GlobResult
globLock sync.Mutex

srcDir string
Expand Down Expand Up @@ -385,7 +385,7 @@ func newContext() *Context {
moduleFactories: make(map[string]ModuleFactory),
nameInterface: NewSimpleNameInterface(),
moduleInfo: make(map[Module]*moduleInfo),
globs: make(map[string]GlobPath),
globs: make(map[globKey]pathtools.GlobResult),
fs: pathtools.OsFs,
finishedMutators: make(map[*mutatorInfo]bool),
ninjaBuildDir: nil,
Expand Down
Loading

0 comments on commit 9021eef

Please sign in to comment.