Skip to content

Commit

Permalink
Add a setting to configure the behavior of the crawler when the queue is
Browse files Browse the repository at this point in the history
empty. When set to true, it will shut down. When set to false, it will
keep running. This can be useful when building a daemon, waiting for
more work.
  • Loading branch information
EgbertW committed May 26, 2015
1 parent c51fa77 commit 9687c8e
Show file tree
Hide file tree
Showing 2 changed files with 29 additions and 3 deletions.
16 changes: 16 additions & 0 deletions src/main/java/edu/uci/ics/crawler4j/crawler/CrawlConfig.java
Original file line number Diff line number Diff line change
Expand Up @@ -123,6 +123,11 @@ public class CrawlConfig {
*/
private boolean onlineTldListUpdate = false;

/**
* Should the crawler stop running when the queue is empty?
*/
private boolean shutdownOnEmptyQueue = true;

/**
* If crawler should run behind a proxy, this parameter can be used for
* specifying the proxy host.
Expand Down Expand Up @@ -378,6 +383,17 @@ public boolean isFollowRedirects() {
public void setFollowRedirects(boolean followRedirects) {
this.followRedirects = followRedirects;
}

public boolean isShutdownOnEmptyQueue() {
return shutdownOnEmptyQueue;
}

/**
* Should the crawler stop running when the queue is empty?
*/
public void setShutdownOnEmptyQueue(boolean shutdown) {
shutdownOnEmptyQueue = shutdown;
}

public boolean isOnlineTldListUpdate() {
return onlineTldListUpdate;
Expand Down
16 changes: 13 additions & 3 deletions src/main/java/edu/uci/ics/crawler4j/crawler/CrawlController.java
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,13 @@
import edu.uci.ics.crawler4j.url.WebURL;
import edu.uci.ics.crawler4j.util.IO;

import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import java.io.File;
import java.util.ArrayList;
import java.util.List;

/**
* The controller that manages a crawling session. This class creates the
* crawler threads and monitors their progress.
Expand Down Expand Up @@ -254,8 +261,11 @@ public void run() {
someoneIsWorking = true;
}
}
if (!someoneIsWorking) {
// Make sure again that none of the threads are alive.
boolean shut_on_empty = config.isShutdownOnEmptyQueue();
if (!someoneIsWorking && shut_on_empty) {
// Make sure again that none of the threads
// are
// alive.
logger.info("It looks like no thread is working, waiting for 10 seconds to make sure...");
sleep(10);

Expand Down Expand Up @@ -511,4 +521,4 @@ public void shutdown() {
pageFetcher.shutDown();
frontier.finish();
}
}
}

0 comments on commit 9687c8e

Please sign in to comment.