diff --git a/bootstrap/bpglob/bpglob.go b/bootstrap/bpglob/bpglob.go index 58ddba0e..81c0dd04 100644 --- a/bootstrap/bpglob/bpglob.go +++ b/bootstrap/bpglob/bpglob.go @@ -28,6 +28,7 @@ import ( "strconv" "time" + "github.com/google/blueprint/deptools" "github.com/google/blueprint/pathtools" ) @@ -38,13 +39,14 @@ var ( out = flagSet.String("o", "", "file to write list of files that match glob") - excludes multiArg versionMatch versionArg + globs []globArg ) func init() { flagSet.Var(&versionMatch, "v", "version number the command line was generated for") - flagSet.Var(&excludes, "e", "pattern to exclude from results") + flagSet.Var((*patternsArgs)(&globs), "p", "pattern to include in results") + flagSet.Var((*excludeArgs)(&globs), "e", "pattern to exclude from results from the most recent pattern") } // bpglob is executed through the rules in build-globs.ninja to determine whether soong_build @@ -90,23 +92,42 @@ func (v *versionArg) Set(s string) error { return nil } -type multiArg []string - -func (m *multiArg) String() string { - return `""` +// A glob arg holds a single -p argument with zero or more following -e arguments. +type globArg struct { + pattern string + excludes []string } -func (m *multiArg) Set(s string) error { - *m = append(*m, s) +// patternsArgs implements flag.Value to handle -p arguments by adding a new globArg to the list. +type patternsArgs []globArg + +func (p *patternsArgs) String() string { return `""` } + +func (p *patternsArgs) Set(s string) error { + globs = append(globs, globArg{ + pattern: s, + }) return nil } -func (m *multiArg) Get() interface{} { - return m +// excludeArgs implements flag.Value to handle -e arguments by adding to the last globArg in the +// list. +type excludeArgs []globArg + +func (e *excludeArgs) String() string { return `""` } + +func (e *excludeArgs) Set(s string) error { + if len(*e) == 0 { + return fmt.Errorf("-p argument is required before the first -e argument") + } + + glob := &(*e)[len(*e)-1] + glob.excludes = append(glob.excludes, s) + return nil } func usage() { - fmt.Fprintln(os.Stderr, "usage: bpglob -o out -v version [-e excludes ...] glob") + fmt.Fprintln(os.Stderr, "usage: bpglob -o out -v version -p glob [-e excludes ...] [-p glob ...]") flagSet.PrintDefaults() os.Exit(2) } @@ -143,11 +164,11 @@ func main() { usage() } - if flagSet.NArg() != 1 { + if flagSet.NArg() > 0 { usage() } - _, err = pathtools.GlobWithDepFile(flagSet.Arg(0), *out, *out+".d", excludes) + err = globsWithDepFile(*out, *out+".d", globs) if err != nil { // Globs here were already run in the primary builder without error. The only errors here should be if the glob // pattern was made invalid by a change in the pathtools glob implementation, in which case the primary builder @@ -167,3 +188,37 @@ func writeErrorOutput(path string, globErr error) { os.Exit(1) } } + +// globsWithDepFile finds all files and directories that match glob. Directories +// will have a trailing '/'. It compares the list of matches against the +// contents of fileListFile, and rewrites fileListFile if it has changed. It +// also writes all of the directories it traversed as dependencies on fileListFile +// to depFile. +// +// The format of glob is either path/*.ext for a single directory glob, or +// path/**/*.ext for a recursive glob. +func globsWithDepFile(fileListFile, depFile string, globs []globArg) error { + var results pathtools.MultipleGlobResults + for _, glob := range globs { + result, err := pathtools.Glob(glob.pattern, glob.excludes, pathtools.FollowSymlinks) + if err != nil { + return err + } + results = append(results, result) + } + + // Only write the output file if it has changed. + err := pathtools.WriteFileIfChanged(fileListFile, results.FileList(), 0666) + if err != nil { + return fmt.Errorf("failed to write file list to %q: %w", fileListFile, err) + } + + // The depfile can be written unconditionally as its timestamp doesn't affect ninja's restat + // feature. + err = deptools.WriteDepFile(depFile, fileListFile, results.Deps()) + if err != nil { + return fmt.Errorf("failed to write dep file to %q: %w", depFile, err) + } + + return nil +} diff --git a/bootstrap/command.go b/bootstrap/command.go index 54eaa0ab..4a938db4 100644 --- a/bootstrap/command.go +++ b/bootstrap/command.go @@ -224,7 +224,7 @@ func RunBlueprint(args Args, ctx *blueprint.Context, config interface{}) []strin ctx.RegisterModuleType("blueprint_go_binary", newGoBinaryModuleFactory(bootstrapConfig, true)) ctx.RegisterSingletonType("bootstrap", newSingletonFactory(bootstrapConfig)) - ctx.RegisterSingletonType("glob", globSingletonFactory(ctx)) + ctx.RegisterSingletonType("glob", globSingletonFactory(bootstrapConfig, ctx)) blueprintFiles, errs := ctx.ParseFileList(filepath.Dir(args.TopFile), filesToParse, config) if len(errs) > 0 { @@ -289,7 +289,7 @@ func RunBlueprint(args Args, ctx *blueprint.Context, config interface{}) []strin } if args.GlobFile != "" { - buffer, errs := generateGlobNinjaFile(config, ctx.Globs) + buffer, errs := generateGlobNinjaFile(bootstrapConfig, config, ctx.Globs) if len(errs) > 0 { fatalErrors(errs) } diff --git a/bootstrap/glob.go b/bootstrap/glob.go index 9b507bb4..39c662b5 100644 --- a/bootstrap/glob.go +++ b/bootstrap/glob.go @@ -17,7 +17,11 @@ package bootstrap import ( "bytes" "fmt" + "hash/fnv" + "io" "path/filepath" + "strconv" + "strings" "github.com/google/blueprint" "github.com/google/blueprint/pathtools" @@ -45,20 +49,21 @@ var ( // and writes it to $out if it has changed, and writes the directories to $out.d GlobRule = pctx.StaticRule("GlobRule", blueprint.RuleParams{ - Command: fmt.Sprintf(`%s -o $out -v %d $excludes "$glob"`, + Command: fmt.Sprintf(`%s -o $out -v %d $args`, globCmd, pathtools.BPGlobArgumentVersion), CommandDeps: []string{globCmd}, - Description: "glob $glob", + Description: "glob", Restat: true, Deps: blueprint.DepsGCC, Depfile: "$out.d", }, - "glob", "excludes") + "args") ) // GlobFileContext is the subset of ModuleContext and SingletonContext needed by GlobFile type GlobFileContext interface { + Config() interface{} Build(pctx blueprint.PackageContext, params blueprint.BuildParams) } @@ -67,13 +72,48 @@ type GlobFileContext interface { // appropriate dependencies to regenerate the file if and only if the list of matching files has // changed. func GlobFile(ctx GlobFileContext, pattern string, excludes []string, fileListFile string) { + args := `-p "` + pattern + `"` + if len(excludes) > 0 { + args += " " + joinWithPrefixAndQuote(excludes, "-e ") + } ctx.Build(pctx, blueprint.BuildParams{ Rule: GlobRule, Outputs: []string{fileListFile}, Args: map[string]string{ - "glob": pattern, - "excludes": joinWithPrefixAndQuote(excludes, "-e "), + "args": args, }, + Description: "glob " + pattern, + }) +} + +// multipleGlobFilesRule creates a rule to write to fileListFile a list of the files that match the specified +// pattern but do not match any of the patterns specified in excludes. The file will include +// appropriate dependencies to regenerate the file if and only if the list of matching files has +// changed. +func multipleGlobFilesRule(ctx GlobFileContext, fileListFile string, shard int, globs pathtools.MultipleGlobResults) { + args := strings.Builder{} + + for i, glob := range globs { + if i != 0 { + args.WriteString(" ") + } + args.WriteString(`-p "`) + args.WriteString(glob.Pattern) + args.WriteString(`"`) + for _, exclude := range glob.Excludes { + args.WriteString(` -e "`) + args.WriteString(exclude) + args.WriteString(`"`) + } + } + + ctx.Build(pctx, blueprint.BuildParams{ + Rule: GlobRule, + Outputs: []string{fileListFile}, + Args: map[string]string{ + "args": args.String(), + }, + Description: fmt.Sprintf("regenerate globs shard %d of %d", shard, numGlobBuckets), }) } @@ -108,23 +148,42 @@ func joinWithPrefixAndQuote(strs []string, prefix string) string { // re-evaluate them whenever the contents of the searched directories change, and retrigger the // primary builder if the results change. type globSingleton struct { - globLister func() []blueprint.GlobPath + config *Config + globLister func() pathtools.MultipleGlobResults writeRule bool } -func globSingletonFactory(ctx *blueprint.Context) func() blueprint.Singleton { +func globSingletonFactory(config *Config, ctx *blueprint.Context) func() blueprint.Singleton { return func() blueprint.Singleton { return &globSingleton{ + config: config, globLister: ctx.Globs, } } } func (s *globSingleton) GenerateBuildActions(ctx blueprint.SingletonContext) { + // Sort the list of globs into buckets. A hash function is used instead of sharding so that + // adding a new glob doesn't force rerunning all the buckets by shifting them all by 1. + globBuckets := make([]pathtools.MultipleGlobResults, numGlobBuckets) for _, g := range s.globLister() { - fileListFile := g.FileListFile(ctx.Config().(BootstrapConfig).BuildDir()) + bucket := globToBucket(g) + globBuckets[bucket] = append(globBuckets[bucket], g) + } + + // The directory for the intermediates needs to be different for bootstrap and the primary + // builder. + globsDir := globsDir(ctx.Config().(BootstrapConfig), s.config.stage) + + for i, globs := range globBuckets { + fileListFile := filepath.Join(globsDir, strconv.Itoa(i)) if s.writeRule { + // Called from generateGlobNinjaFile. Write out the file list to disk, and add a ninja + // rule to run bpglob if any of the dependencies (usually directories that contain + // globbed files) have changed. The file list produced by bpglob should match exactly + // with the file written here so that restat can prevent rerunning the primary builder. + // // We need to write the file list here so that it has an older modified date // than the build.ninja (otherwise we'd run the primary builder twice on // every new glob) @@ -132,23 +191,27 @@ func (s *globSingleton) GenerateBuildActions(ctx blueprint.SingletonContext) { // We don't need to write the depfile because we're guaranteed that ninja // will run the command at least once (to record it into the ninja_log), so // the depfile will be loaded from that execution. - err := pathtools.WriteFileIfChanged(absolutePath(fileListFile), g.FileList(), 0666) + err := pathtools.WriteFileIfChanged(absolutePath(fileListFile), globs.FileList(), 0666) if err != nil { panic(fmt.Errorf("error writing %s: %s", fileListFile, err)) } - GlobFile(ctx, g.Pattern, g.Excludes, fileListFile) + // Write out the ninja rule to run bpglob. + multipleGlobFilesRule(ctx, fileListFile, i, globs) } else { - // Make build.ninja depend on the fileListFile + // Called from the main Context, make build.ninja depend on the fileListFile. ctx.AddNinjaFileDeps(fileListFile) } } } -func generateGlobNinjaFile(config interface{}, globLister func() []blueprint.GlobPath) ([]byte, []error) { +func generateGlobNinjaFile(bootstrapConfig *Config, config interface{}, + globLister func() pathtools.MultipleGlobResults) ([]byte, []error) { + ctx := blueprint.NewContext() ctx.RegisterSingletonType("glob", func() blueprint.Singleton { return &globSingleton{ + config: bootstrapConfig, globLister: globLister, writeRule: true, } @@ -178,3 +241,37 @@ func generateGlobNinjaFile(config interface{}, globLister func() []blueprint.Glo return buf.Bytes(), nil } + +// globsDir returns a different directory to store glob intermediates for the bootstrap and +// primary builder executions. +func globsDir(config BootstrapConfig, stage Stage) string { + buildDir := config.BuildDir() + if stage == StageMain { + return filepath.Join(buildDir, mainSubDir, "globs") + } else { + return filepath.Join(buildDir, bootstrapSubDir, "globs") + } +} + +// GlobFileListFiles returns the list of sharded glob file list files for the main stage. +func GlobFileListFiles(config BootstrapConfig) []string { + globsDir := globsDir(config, StageMain) + var fileListFiles []string + for i := 0; i < numGlobBuckets; i++ { + fileListFiles = append(fileListFiles, filepath.Join(globsDir, strconv.Itoa(i))) + } + return fileListFiles +} + +const numGlobBuckets = 1024 + +// globToBucket converts a pathtools.GlobResult into a hashed bucket number in the range +// [0, numGlobBuckets). +func globToBucket(g pathtools.GlobResult) int { + hash := fnv.New32a() + io.WriteString(hash, g.Pattern) + for _, e := range g.Excludes { + io.WriteString(hash, e) + } + return int(hash.Sum32() % numGlobBuckets) +} diff --git a/context.go b/context.go index c900e28b..f5f095b8 100644 --- a/context.go +++ b/context.go @@ -114,7 +114,7 @@ type Context struct { // cache deps modified to determine whether cachedSortedModuleGroups needs to be recalculated cachedDepsModified bool - globs map[string]GlobPath + globs map[globKey]pathtools.GlobResult globLock sync.Mutex srcDir string @@ -385,7 +385,7 @@ func newContext() *Context { moduleFactories: make(map[string]ModuleFactory), nameInterface: NewSimpleNameInterface(), moduleInfo: make(map[Module]*moduleInfo), - globs: make(map[string]GlobPath), + globs: make(map[globKey]pathtools.GlobResult), fs: pathtools.OsFs, finishedMutators: make(map[*mutatorInfo]bool), ninjaBuildDir: nil, diff --git a/glob.go b/glob.go index 67d060e5..91ae7232 100644 --- a/glob.go +++ b/glob.go @@ -15,50 +15,44 @@ package blueprint import ( - "crypto/md5" "fmt" - "path/filepath" "sort" "strings" "github.com/google/blueprint/pathtools" ) -type GlobPath struct { - pathtools.GlobResult - Name string -} - -func (g *GlobPath) FileListFile(buildDir string) string { - return filepath.Join(buildDir, ".glob", g.Name) -} - -func verifyGlob(fileName, pattern string, excludes []string, g GlobPath) { +func verifyGlob(key globKey, pattern string, excludes []string, g pathtools.GlobResult) { if pattern != g.Pattern { - panic(fmt.Errorf("Mismatched patterns %q and %q for glob file %q", pattern, g.Pattern, fileName)) + panic(fmt.Errorf("Mismatched patterns %q and %q for glob key %q", pattern, g.Pattern, key)) } if len(excludes) != len(g.Excludes) { - panic(fmt.Errorf("Mismatched excludes %v and %v for glob file %q", excludes, g.Excludes, fileName)) + panic(fmt.Errorf("Mismatched excludes %v and %v for glob key %q", excludes, g.Excludes, key)) } for i := range excludes { if g.Excludes[i] != excludes[i] { - panic(fmt.Errorf("Mismatched excludes %v and %v for glob file %q", excludes, g.Excludes, fileName)) + panic(fmt.Errorf("Mismatched excludes %v and %v for glob key %q", excludes, g.Excludes, key)) } } } func (c *Context) glob(pattern string, excludes []string) ([]string, error) { - fileName := globToFileName(pattern, excludes) + // Sort excludes so that two globs with the same excludes in a different order reuse the same + // key. Make a copy first to avoid modifying the caller's version. + excludes = append([]string(nil), excludes...) + sort.Strings(excludes) + + key := globToKey(pattern, excludes) // Try to get existing glob from the stored results c.globLock.Lock() - g, exists := c.globs[fileName] + g, exists := c.globs[key] c.globLock.Unlock() if exists { // Glob has already been done, double check it is identical - verifyGlob(fileName, pattern, excludes, g) + verifyGlob(key, pattern, excludes, g) // Return a copy so that modifications don't affect the cached value. return append([]string(nil), g.Matches...), nil } @@ -71,14 +65,14 @@ func (c *Context) glob(pattern string, excludes []string) ([]string, error) { // Store the results c.globLock.Lock() - if g, exists = c.globs[fileName]; !exists { - c.globs[fileName] = GlobPath{result, fileName} + if g, exists = c.globs[key]; !exists { + c.globs[key] = result } c.globLock.Unlock() if exists { // Getting the list raced with another goroutine, throw away the results and use theirs - verifyGlob(fileName, pattern, excludes, g) + verifyGlob(key, pattern, excludes, g) // Return a copy so that modifications don't affect the cached value. return append([]string(nil), g.Matches...), nil } @@ -87,49 +81,36 @@ func (c *Context) glob(pattern string, excludes []string) ([]string, error) { return append([]string(nil), result.Matches...), nil } -func (c *Context) Globs() []GlobPath { - fileNames := make([]string, 0, len(c.globs)) +func (c *Context) Globs() pathtools.MultipleGlobResults { + keys := make([]globKey, 0, len(c.globs)) for k := range c.globs { - fileNames = append(fileNames, k) + keys = append(keys, k) } - sort.Strings(fileNames) - globs := make([]GlobPath, len(fileNames)) - for i, fileName := range fileNames { - globs[i] = c.globs[fileName] + sort.Slice(keys, func(i, j int) bool { + if keys[i].pattern != keys[j].pattern { + return keys[i].pattern < keys[j].pattern + } + return keys[i].excludes < keys[j].excludes + }) + + globs := make(pathtools.MultipleGlobResults, len(keys)) + for i, key := range keys { + globs[i] = c.globs[key] } return globs } -func globToString(pattern string) string { - ret := "" - for _, c := range pattern { - switch { - case c >= 'a' && c <= 'z', - c >= 'A' && c <= 'Z', - c >= '0' && c <= '9', - c == '_', c == '-', c == '/': - ret += string(c) - default: - ret += "_" - } - } - - return ret +// globKey combines a pattern and a list of excludes into a hashable struct to be used as a key in +// a map. +type globKey struct { + pattern string + excludes string } -func globToFileName(pattern string, excludes []string) string { - name := globToString(pattern) - excludeName := "" - for _, e := range excludes { - excludeName += "__" + globToString(e) - } - - // Prevent file names from reaching ninja's path component limit - if strings.Count(name, "/")+strings.Count(excludeName, "/") > 30 { - excludeName = fmt.Sprintf("___%x", md5.Sum([]byte(excludeName))) - } - - return name + excludeName + ".glob" +// globToKey converts a pattern and an excludes list into a globKey struct that is hashable and +// usable as a key in a map. +func globToKey(pattern string, excludes []string) globKey { + return globKey{pattern, strings.Join(excludes, "|")} } diff --git a/pathtools/glob.go b/pathtools/glob.go index 374770d2..14cdacfc 100644 --- a/pathtools/glob.go +++ b/pathtools/glob.go @@ -15,21 +15,20 @@ package pathtools import ( + "encoding/json" "errors" "fmt" "io/ioutil" "os" "path/filepath" "strings" - - "github.com/google/blueprint/deptools" ) // BPGlobArgumentVersion is used to abort argument parsing early when the bpglob argument format // has changed but soong_build hasn't had a chance to rerun yet to update build-globs.ninja. // Increment it manually when changing the bpglob argument format. It is located here because // pathtools is the only package that is shared between bpglob and bootstrap. -const BPGlobArgumentVersion = 1 +const BPGlobArgumentVersion = 2 var GlobMultipleRecursiveErr = errors.New("pattern contains multiple '**'") var GlobLastRecursiveErr = errors.New("pattern has '**' as last path element") @@ -54,6 +53,31 @@ func (result GlobResult) FileList() []byte { return []byte(strings.Join(result.Matches, "\n") + "\n") } +// MultipleGlobResults is a list of GlobResult structs. +type MultipleGlobResults []GlobResult + +// FileList returns the list of files matched by a list of multiple globs for writing to an output file. +func (results MultipleGlobResults) FileList() []byte { + multipleMatches := make([][]string, len(results)) + for i, result := range results { + multipleMatches[i] = result.Matches + } + buf, err := json.Marshal(multipleMatches) + if err != nil { + panic(fmt.Errorf("failed to marshal glob results to json: %w", err)) + } + return buf +} + +// Deps returns the deps from all of the GlobResults. +func (results MultipleGlobResults) Deps() []string { + var deps []string + for _, result := range results { + deps = append(deps, result.Deps...) + } + return deps +} + // Glob returns the list of files and directories that match the given pattern // but do not match the given exclude patterns, along with the list of // directories and other dependencies that were searched to construct the file @@ -344,32 +368,6 @@ func HasGlob(in []string) bool { return false } -// GlobWithDepFile finds all files and directories that match glob. Directories -// will have a trailing '/'. It compares the list of matches against the -// contents of fileListFile, and rewrites fileListFile if it has changed. It -// also writes all of the the directories it traversed as dependencies on -// fileListFile to depFile. -// -// The format of glob is either path/*.ext for a single directory glob, or -// path/**/*.ext for a recursive glob. -// -// Returns a list of file paths, and an error. -// -// In general ModuleContext.GlobWithDeps or SingletonContext.GlobWithDeps -// should be used instead, as they will automatically set up dependencies -// to rerun the primary builder when the list of matching files changes. -func GlobWithDepFile(glob, fileListFile, depFile string, excludes []string) ([]string, error) { - result, err := Glob(glob, excludes, FollowSymlinks) - if err != nil { - return nil, err - } - - WriteFileIfChanged(fileListFile, result.FileList(), 0666) - deptools.WriteDepFile(depFile, fileListFile, result.Deps) - - return result.Matches, nil -} - // WriteFileIfChanged wraps ioutil.WriteFile, but only writes the file if // the files does not already exist with identical contents. This can be used // along with ninja restat rules to skip rebuilding downstream rules if no