Skip to content

Commit

Permalink
detect repositories for each source root subdirectory in parallel
Browse files Browse the repository at this point in the history
  • Loading branch information
Vladimir Kotal committed Oct 6, 2020
1 parent d5999cd commit 8df660a
Show file tree
Hide file tree
Showing 6 changed files with 143 additions and 19 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -195,6 +195,7 @@ public final class Configuration {
private int indexingParallelism;
private int historyParallelism;
private int historyRenamedParallelism;
private int repositorySearchParallelism;
private boolean tagsEnabled;
private int hitsPerPage;
private int cachePages;
Expand Down Expand Up @@ -238,9 +239,9 @@ public final class Configuration {
* The directory hierarchy depth to limit the scanning for repositories.
* E.g. if the /mercurial/ directory (relative to source root) is a repository
* and /mercurial/usr/closed/ is sub-repository, the latter will be discovered
* only if the depth is set to 3 or greater.
* only if the depth is set to 2 or greater.
*/
public static final int defaultScanningDepth = 3;
public static final int defaultScanningDepth = 2;

/**
* The name of the eftar file relative to the <var>DATA_ROOT</var>, which
Expand Down Expand Up @@ -1145,6 +1146,14 @@ public void setHistoryRenamedParallelism(int value) {
this.historyRenamedParallelism = value > 0 ? value : 0;
}

public int getRepositorySearchParallelism() {
return repositorySearchParallelism;
}

public void setRepositorySearchParallelism(int value) {
this.repositorySearchParallelism = value > 0 ? value : 0;
}

public boolean isTagsEnabled() {
return this.tagsEnabled;
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -1110,6 +1110,17 @@ public int getHistoryRenamedParallelism() {
parallelism;
}

/**
* Gets the value of {@link Configuration#getRepositorySearchParallelism()} -- or
* if zero, then as a default gets the number of available processors.
* @return a natural number &gt;= 1
*/
public int getRepositorySearchParallelism() {
int parallelism = syncReadConfiguration(Configuration::getRepositorySearchParallelism);
return parallelism < 1 ? Runtime.getRuntime().availableProcessors() :
parallelism;
}

public boolean isTagsEnabled() {
return syncReadConfiguration(Configuration::isTagsEnabled);
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@
*/

/*
* Copyright (c) 2005, 2019, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2005, 2020, Oracle and/or its affiliates. All rights reserved.
* Portions Copyright (c) 2017-2020, Chris Fraire <[email protected]>.
*/
package org.opengrok.indexer.history;
Expand All @@ -41,6 +41,7 @@
import java.util.concurrent.CountDownLatch;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.Executors;
import java.util.concurrent.Future;
import java.util.concurrent.ThreadFactory;
import java.util.logging.Level;
import java.util.logging.Logger;
Expand Down Expand Up @@ -88,15 +89,12 @@ public final class HistoryGuru {
*/
private final Map<String, String> repositoryRoots = new ConcurrentHashMap<>();

private final int scanningDepth;

/**
* Creates a new instance of HistoryGuru, and try to set the default source
* control system.
*/
private HistoryGuru() {
env = RuntimeEnvironment.getInstance();
scanningDepth = env.getScanningDepth();

HistoryCache cache = null;
if (env.useHistoryCache()) {
Expand Down Expand Up @@ -435,7 +433,7 @@ private Collection<RepositoryInfo> addRepositories(File[] files,
"Failed to get sub directories for ''{0}'', " +
"check access permissions.",
file.getAbsolutePath());
} else if (depth <= scanningDepth) {
} else if (depth <= env.getScanningDepth()) {
repoList.addAll(addRepositories(subFiles,
allowedNesting, depth + 1, isNested));
}
Expand All @@ -453,7 +451,7 @@ private Collection<RepositoryInfo> addRepositories(File[] files,
LOGGER.log(Level.WARNING,
"Failed to get sub directories for ''{0}'', check access permissions.",
file.getAbsolutePath());
} else if (depth <= scanningDepth) {
} else if (depth <= env.getScanningDepth()) {
// Search down to a limit -- if not: too much
// stat'ing for huge Mercurial repositories
repoList.addAll(addRepositories(subFiles,
Expand All @@ -480,7 +478,25 @@ private Collection<RepositoryInfo> addRepositories(File[] files,
* @return collection of added repositories
*/
public Collection<RepositoryInfo> addRepositories(File[] files) {
return addRepositories(files, env.getNestingMaximum(), 0, false);
ExecutorService executor = env.getIndexerParallelizer().getRepositorySearchExecutor();
List<Future<Collection<RepositoryInfo>>> futures = new ArrayList<>();
for (File file: files) {
futures.add(executor.submit(() -> addRepositories(new File[]{file},
env.getNestingMaximum(), 0, false)));
}

List<RepositoryInfo> repoList = new ArrayList<>();
futures.forEach(future -> {
try {
repoList.addAll(future.get());
} catch (Exception e) {
LOGGER.log(Level.WARNING, "failed to get results of repository scan");
}
});

env.getIndexerParallelizer().bounceRepositorySearchExecutor();

return repoList;
}

/**
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -336,12 +336,16 @@ public static void main(String[] argv) {
// Create history cache first.
if (searchRepositories) {
if (searchPaths.isEmpty()) {
searchPaths.add(env.getSourceRootPath());
} else {
searchPaths = searchPaths.stream().
map(t -> Paths.get(env.getSourceRootPath(), t).toString()).
collect(Collectors.toSet());
String[] dirs = env.getSourceRootFile().
list((f, name) -> f.isDirectory() && env.getPathAccepter().accept(f));
if (dirs != null) {
searchPaths.addAll(Arrays.asList(dirs));
}
}

searchPaths = searchPaths.stream().
map(t -> Paths.get(env.getSourceRootPath(), t).toString()).
collect(Collectors.toSet());
}
getInstance().prepareIndexer(env, searchPaths, addProjects,
createDict, runIndex, subFiles, new ArrayList<>(repositories));
Expand Down Expand Up @@ -609,7 +613,7 @@ public static String[] parseOptions(String[] argv) throws ParseException {
runIndex = false);

parser.on("--nestingMaximum", "=number",
"Maximum of nested repositories. Default is 1.").execute(v ->
"Maximum depth of nested repositories. Default is 1.").execute(v ->
cfg.setNestingMaximum((Integer) v));

parser.on("-O", "--optimize", "=on|off", ON_OFF, Boolean.class,
Expand Down Expand Up @@ -739,10 +743,13 @@ public static String[] parseOptions(String[] argv) throws ParseException {
cfg.setWebappLAF((String) stylePath));

parser.on("-T", "--threads", "=number", Integer.class,
"The number of threads to use for index generation. By default the number",
"of threads will be set to the number of available CPUs. This influences the number",
"of spawned ctags processes as well.").execute(threadCount ->
cfg.setIndexingParallelism((Integer) threadCount));
"The number of threads to use for index generation and repository scan.",
"By default the number of threads will be set to the number of available",
"CPUs. This influences the number of spawned ctags processes as well.").
execute(threadCount -> {
cfg.setIndexingParallelism((Integer) threadCount);
cfg.setRepositorySearchParallelism((Integer) threadCount);
});

parser.on("-t", "--tabSize", "=number", Integer.class,
"Default tab size to use (number of spaces per tab character).")
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@

/*
* Copyright (c) 2017-2020, Chris Fraire <[email protected]>.
* Copyright (c) 2020, Oracle and/or its affiliates. All rights reserved.
*/

package org.opengrok.indexer.index;
Expand Down Expand Up @@ -61,6 +62,7 @@ public class IndexerParallelizer implements AutoCloseable {
private LazilyInstantiate<ExecutorService> lzHistoryExecutor;
private LazilyInstantiate<ExecutorService> lzHistoryRenamedExecutor;
private LazilyInstantiate<ExecutorService> lzCtagsWatcherExecutor;
private LazilyInstantiate<ExecutorService> lzRepositorySearchExecutor;

/**
* Initializes a new instance using settings from the specified environment
Expand All @@ -84,6 +86,7 @@ public IndexerParallelizer(RuntimeEnvironment env) {
createLazyHistoryExecutor();
createLazyHistoryRenamedExecutor();
createLazyCtagsWatcherExecutor();
createLazyRepositorySearchExecutor();
}

/**
Expand Down Expand Up @@ -128,6 +131,13 @@ public ExecutorService getCtagsWatcherExecutor() {
return lzCtagsWatcherExecutor.get();
}

/**
* @return the ExecutorService used for repository scan
*/
public ExecutorService getRepositorySearchExecutor() {
return lzRepositorySearchExecutor.get();
}

/**
* Calls {@link #bounce()}, which prepares for -- but does not start -- new
* pools.
Expand All @@ -154,43 +164,75 @@ public void close() {
* call this method satisfactorily too.
*/
public void bounce() {
bounceForkJoinPool();
bounceFixedExecutor();
bounceCtagsPool();
bounceHistoryExecutor();
bounceHistoryRenamedExecutor();
bounceCtagsWatcherExecutor();
bounceRepositorySearchExecutor();
}

private void bounceForkJoinPool() {
if (lzForkJoinPool.isActive()) {
ForkJoinPool formerForkJoinPool = lzForkJoinPool.get();
createLazyForkJoinPool();
formerForkJoinPool.shutdown();
}
}

private void bounceFixedExecutor() {
if (lzFixedExecutor.isActive()) {
ExecutorService formerFixedExecutor = lzFixedExecutor.get();
createLazyFixedExecutor();
formerFixedExecutor.shutdown();
}
}

private void bounceCtagsPool() {
if (lzCtagsPool.isActive()) {
ObjectPool<Ctags> formerCtagsPool = lzCtagsPool.get();
createLazyCtagsPool();
formerCtagsPool.shutdown();
}
}

private void bounceHistoryExecutor() {
if (lzHistoryExecutor.isActive()) {
ExecutorService formerHistoryExecutor = lzHistoryExecutor.get();
createLazyHistoryExecutor();
formerHistoryExecutor.shutdown();
}
}

private void bounceHistoryRenamedExecutor() {
if (lzHistoryRenamedExecutor.isActive()) {
ExecutorService formerHistoryRenamedExecutor = lzHistoryRenamedExecutor.get();
createLazyHistoryRenamedExecutor();
formerHistoryRenamedExecutor.shutdown();
}
}

private void bounceCtagsWatcherExecutor() {
if (lzCtagsWatcherExecutor.isActive()) {
ExecutorService formerCtagsWatcherExecutor = lzCtagsWatcherExecutor.get();
createLazyCtagsWatcherExecutor();
formerCtagsWatcherExecutor.shutdown();
}
}

/**
* Shutdown the executor used for repository search.
* @see #bounce()
*/
public void bounceRepositorySearchExecutor() {
if (lzRepositorySearchExecutor.isActive()) {
ExecutorService formerRepositorySearchExecutor = lzRepositorySearchExecutor.get();
createLazyRepositorySearchExecutor();
formerRepositorySearchExecutor.shutdown();
}
}

private void createLazyForkJoinPool() {
lzForkJoinPool = LazilyInstantiate.using(() ->
new ForkJoinPool(indexingParallelism));
Expand Down Expand Up @@ -226,6 +268,11 @@ private void createLazyHistoryRenamedExecutor() {
Executors.newFixedThreadPool(env.getHistoryRenamedParallelism()));
}

private void createLazyRepositorySearchExecutor() {
lzRepositorySearchExecutor = LazilyInstantiate.using(() ->
Executors.newFixedThreadPool(env.getRepositorySearchParallelism()));
}

private class CtagsObjectFactory implements ObjectFactory<Ctags> {

public Ctags createNew() {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,7 @@
import java.util.Collection;
import java.util.Collections;
import java.util.List;
import java.util.stream.Collectors;

import org.junit.After;
import org.junit.AfterClass;
Expand Down Expand Up @@ -252,4 +253,37 @@ private static void certainlyMkdirs(File file) throws IOException {
throw new IOException("Couldn't mkdirs " + file);
}
}

@Test
@ConditionalRun(RepositoryInstalled.GitInstalled.class)
public void testScanningDepth() throws IOException {
String topLevelDirName = "scanDepthTest";
File repoRoot = new File(repository.getSourceRoot(), topLevelDirName);
certainlyMkdirs(repoRoot);
File repo0 = new File(repoRoot, ".git");
certainlyMkdirs(repo0);
File sub1 = new File(repoRoot, "sub1");
certainlyMkdirs(sub1);
File sub2 = new File(sub1, "sub2");
certainlyMkdirs(sub2);
File sub3 = new File(sub2, ".git");
certainlyMkdirs(sub3);

int originalScanDepth = env.getScanningDepth();
env.setScanningDepth(0);

HistoryGuru instance = HistoryGuru.getInstance();
Collection<RepositoryInfo> addedRepos = instance.addRepositories(
Collections.singleton(Paths.get(repository.getSourceRoot(),topLevelDirName).toString()));
assertEquals("should add to max depth", 1, addedRepos.size());

env.setScanningDepth(1);
List<String> repoDirs = addedRepos.stream().map(RepositoryInfo::getDirectoryName).collect(Collectors.toList());
instance.removeRepositories(repoDirs);
addedRepos = instance.addRepositories(
Collections.singleton(Paths.get(repository.getSourceRoot(),topLevelDirName).toString()));
assertEquals("should add to increased max depth", 2, addedRepos.size());

env.setScanningDepth(originalScanDepth);
}
}

0 comments on commit 8df660a

Please sign in to comment.