Skip to content

Commit 7ed6385

Browse files
committed
Merge pull request yasserg#65 from MadEgg/shutdown-empty
Allow crawler to keep running with an empty queue
2 parents ce856f4 + 9687c8e commit 7ed6385

File tree

2 files changed

+29
-3
lines changed

2 files changed

+29
-3
lines changed

src/main/java/edu/uci/ics/crawler4j/crawler/CrawlConfig.java

+16
Original file line numberDiff line numberDiff line change
@@ -123,6 +123,11 @@ public class CrawlConfig {
123123
*/
124124
private boolean onlineTldListUpdate = false;
125125

126+
/**
127+
* Should the crawler stop running when the queue is empty?
128+
*/
129+
private boolean shutdownOnEmptyQueue = true;
130+
126131
/**
127132
* If crawler should run behind a proxy, this parameter can be used for
128133
* specifying the proxy host.
@@ -378,6 +383,17 @@ public boolean isFollowRedirects() {
378383
public void setFollowRedirects(boolean followRedirects) {
379384
this.followRedirects = followRedirects;
380385
}
386+
387+
public boolean isShutdownOnEmptyQueue() {
388+
return shutdownOnEmptyQueue;
389+
}
390+
391+
/**
392+
* Should the crawler stop running when the queue is empty?
393+
*/
394+
public void setShutdownOnEmptyQueue(boolean shutdown) {
395+
shutdownOnEmptyQueue = shutdown;
396+
}
381397

382398
public boolean isOnlineTldListUpdate() {
383399
return onlineTldListUpdate;

src/main/java/edu/uci/ics/crawler4j/crawler/CrawlController.java

+13-3
Original file line numberDiff line numberDiff line change
@@ -36,6 +36,13 @@
3636
import edu.uci.ics.crawler4j.url.WebURL;
3737
import edu.uci.ics.crawler4j.util.IO;
3838

39+
import org.slf4j.Logger;
40+
import org.slf4j.LoggerFactory;
41+
42+
import java.io.File;
43+
import java.util.ArrayList;
44+
import java.util.List;
45+
3946
/**
4047
* The controller that manages a crawling session. This class creates the
4148
* crawler threads and monitors their progress.
@@ -254,8 +261,11 @@ public void run() {
254261
someoneIsWorking = true;
255262
}
256263
}
257-
if (!someoneIsWorking) {
258-
// Make sure again that none of the threads are alive.
264+
boolean shut_on_empty = config.isShutdownOnEmptyQueue();
265+
if (!someoneIsWorking && shut_on_empty) {
266+
// Make sure again that none of the threads
267+
// are
268+
// alive.
259269
logger.info("It looks like no thread is working, waiting for 10 seconds to make sure...");
260270
sleep(10);
261271

@@ -511,4 +521,4 @@ public void shutdown() {
511521
pageFetcher.shutDown();
512522
frontier.finish();
513523
}
514-
}
524+
}

0 commit comments

Comments
 (0)