Skip to content

Commit 95be562

Browse files
committed
Merge pull request yasserg#48 from igor-sokolov/master
Added a factory for instantiation of WebCrawler in CrawlController
2 parents ceaeaf1 + c836dcb commit 95be562

File tree

1 file changed

+56
-5
lines changed

1 file changed

+56
-5
lines changed

src/main/java/edu/uci/ics/crawler4j/crawler/CrawlController.java

+56-5
Original file line numberDiff line numberDiff line change
@@ -123,8 +123,30 @@ public CrawlController(CrawlConfig config, PageFetcher pageFetcher, RobotstxtSer
123123
shuttingDown = false;
124124
}
125125

126+
public interface WebCrawlerFactory<T extends WebCrawler> {
127+
T newInstance() throws Exception;
128+
}
129+
130+
private static class DefaultWebCrawlerFactory<T extends WebCrawler> implements WebCrawlerFactory<T> {
131+
final Class<T> _c;
132+
133+
DefaultWebCrawlerFactory(Class<T> _c) {
134+
this._c = _c;
135+
}
136+
137+
@Override
138+
public T newInstance() throws Exception {
139+
try {
140+
return _c.newInstance();
141+
} catch (ReflectiveOperationException e) {
142+
throw e;
143+
}
144+
}
145+
}
146+
126147
/**
127148
* Start the crawling session and wait for it to finish.
149+
* This method utilizes default crawler factory that creates new crawler using Java reflection
128150
*
129151
* @param _c
130152
* the class that implements the logic for crawler threads
@@ -134,11 +156,40 @@ public CrawlController(CrawlConfig config, PageFetcher pageFetcher, RobotstxtSer
134156
* @param <T> Your class extending WebCrawler
135157
*/
136158
public <T extends WebCrawler> void start(final Class<T> _c, final int numberOfCrawlers) {
137-
this.start(_c, numberOfCrawlers, true);
159+
this.start(new DefaultWebCrawlerFactory<>(_c), numberOfCrawlers, true);
160+
}
161+
162+
/**
163+
* Start the crawling session and wait for it to finish.
164+
*
165+
* @param crawlerFactory
166+
* factory to create crawlers on demand for each thread
167+
* @param numberOfCrawlers
168+
* the number of concurrent threads that will be contributing in
169+
* this crawling session.
170+
* @param <T> Your class extending WebCrawler
171+
*/
172+
public <T extends WebCrawler> void start(final WebCrawlerFactory<T> crawlerFactory, final int numberOfCrawlers) {
173+
this.start(crawlerFactory, numberOfCrawlers, true);
174+
}
175+
176+
/**
177+
* Start the crawling session and return immediately.
178+
*
179+
* @param crawlerFactory
180+
* factory to create crawlers on demand for each thread
181+
* @param numberOfCrawlers
182+
* the number of concurrent threads that will be contributing in
183+
* this crawling session.
184+
* @param <T> Your class extending WebCrawler
185+
*/
186+
public <T extends WebCrawler> void startNonBlocking(WebCrawlerFactory<T> crawlerFactory, final int numberOfCrawlers) {
187+
this.start(crawlerFactory, numberOfCrawlers, false);
138188
}
139189

140190
/**
141191
* Start the crawling session and return immediately.
192+
* This method utilizes default crawler factory that creates new crawler using Java reflection
142193
*
143194
* @param _c
144195
* the class that implements the logic for crawler threads
@@ -148,18 +199,18 @@ public <T extends WebCrawler> void start(final Class<T> _c, final int numberOfCr
148199
* @param <T> Your class extending WebCrawler
149200
*/
150201
public <T extends WebCrawler> void startNonBlocking(final Class<T> _c, final int numberOfCrawlers) {
151-
this.start(_c, numberOfCrawlers, false);
202+
this.start(new DefaultWebCrawlerFactory<>(_c), numberOfCrawlers, false);
152203
}
153204

154-
protected <T extends WebCrawler> void start(final Class<T> _c, final int numberOfCrawlers, boolean isBlocking) {
205+
protected <T extends WebCrawler> void start(final WebCrawlerFactory<T> crawlerFactory, final int numberOfCrawlers, boolean isBlocking) {
155206
try {
156207
finished = false;
157208
crawlersLocalData.clear();
158209
final List<Thread> threads = new ArrayList<>();
159210
final List<T> crawlers = new ArrayList<>();
160211

161212
for (int i = 1; i <= numberOfCrawlers; i++) {
162-
T crawler = _c.newInstance();
213+
T crawler = crawlerFactory.newInstance();
163214
Thread thread = new Thread(crawler, "Crawler " + i);
164215
crawler.setThread(thread);
165216
crawler.init(i, this);
@@ -186,7 +237,7 @@ public void run() {
186237
if (!thread.isAlive()) {
187238
if (!shuttingDown) {
188239
logger.info("Thread {} was dead, I'll recreate it", i);
189-
T crawler = _c.newInstance();
240+
T crawler = crawlerFactory.newInstance();
190241
thread = new Thread(crawler, "Crawler " + (i + 1));
191242
threads.remove(i);
192243
threads.add(i, thread);

0 commit comments

Comments
 (0)