@@ -123,8 +123,30 @@ public CrawlController(CrawlConfig config, PageFetcher pageFetcher, RobotstxtSer
123
123
shuttingDown = false ;
124
124
}
125
125
126
+ public interface WebCrawlerFactory <T extends WebCrawler > {
127
+ T newInstance () throws Exception ;
128
+ }
129
+
130
+ private static class DefaultWebCrawlerFactory <T extends WebCrawler > implements WebCrawlerFactory <T > {
131
+ final Class <T > _c ;
132
+
133
+ DefaultWebCrawlerFactory (Class <T > _c ) {
134
+ this ._c = _c ;
135
+ }
136
+
137
+ @ Override
138
+ public T newInstance () throws Exception {
139
+ try {
140
+ return _c .newInstance ();
141
+ } catch (ReflectiveOperationException e ) {
142
+ throw e ;
143
+ }
144
+ }
145
+ }
146
+
126
147
/**
127
148
* Start the crawling session and wait for it to finish.
149
+ * This method utilizes default crawler factory that creates new crawler using Java reflection
128
150
*
129
151
* @param _c
130
152
* the class that implements the logic for crawler threads
@@ -134,11 +156,40 @@ public CrawlController(CrawlConfig config, PageFetcher pageFetcher, RobotstxtSer
134
156
* @param <T> Your class extending WebCrawler
135
157
*/
136
158
public <T extends WebCrawler > void start (final Class <T > _c , final int numberOfCrawlers ) {
137
- this .start (_c , numberOfCrawlers , true );
159
+ this .start (new DefaultWebCrawlerFactory <>(_c ), numberOfCrawlers , true );
160
+ }
161
+
162
+ /**
163
+ * Start the crawling session and wait for it to finish.
164
+ *
165
+ * @param crawlerFactory
166
+ * factory to create crawlers on demand for each thread
167
+ * @param numberOfCrawlers
168
+ * the number of concurrent threads that will be contributing in
169
+ * this crawling session.
170
+ * @param <T> Your class extending WebCrawler
171
+ */
172
+ public <T extends WebCrawler > void start (final WebCrawlerFactory <T > crawlerFactory , final int numberOfCrawlers ) {
173
+ this .start (crawlerFactory , numberOfCrawlers , true );
174
+ }
175
+
176
+ /**
177
+ * Start the crawling session and return immediately.
178
+ *
179
+ * @param crawlerFactory
180
+ * factory to create crawlers on demand for each thread
181
+ * @param numberOfCrawlers
182
+ * the number of concurrent threads that will be contributing in
183
+ * this crawling session.
184
+ * @param <T> Your class extending WebCrawler
185
+ */
186
+ public <T extends WebCrawler > void startNonBlocking (WebCrawlerFactory <T > crawlerFactory , final int numberOfCrawlers ) {
187
+ this .start (crawlerFactory , numberOfCrawlers , false );
138
188
}
139
189
140
190
/**
141
191
* Start the crawling session and return immediately.
192
+ * This method utilizes default crawler factory that creates new crawler using Java reflection
142
193
*
143
194
* @param _c
144
195
* the class that implements the logic for crawler threads
@@ -148,18 +199,18 @@ public <T extends WebCrawler> void start(final Class<T> _c, final int numberOfCr
148
199
* @param <T> Your class extending WebCrawler
149
200
*/
150
201
public <T extends WebCrawler > void startNonBlocking (final Class <T > _c , final int numberOfCrawlers ) {
151
- this .start (_c , numberOfCrawlers , false );
202
+ this .start (new DefaultWebCrawlerFactory <>( _c ) , numberOfCrawlers , false );
152
203
}
153
204
154
- protected <T extends WebCrawler > void start (final Class <T > _c , final int numberOfCrawlers , boolean isBlocking ) {
205
+ protected <T extends WebCrawler > void start (final WebCrawlerFactory <T > crawlerFactory , final int numberOfCrawlers , boolean isBlocking ) {
155
206
try {
156
207
finished = false ;
157
208
crawlersLocalData .clear ();
158
209
final List <Thread > threads = new ArrayList <>();
159
210
final List <T > crawlers = new ArrayList <>();
160
211
161
212
for (int i = 1 ; i <= numberOfCrawlers ; i ++) {
162
- T crawler = _c .newInstance ();
213
+ T crawler = crawlerFactory .newInstance ();
163
214
Thread thread = new Thread (crawler , "Crawler " + i );
164
215
crawler .setThread (thread );
165
216
crawler .init (i , this );
@@ -186,7 +237,7 @@ public void run() {
186
237
if (!thread .isAlive ()) {
187
238
if (!shuttingDown ) {
188
239
logger .info ("Thread {} was dead, I'll recreate it" , i );
189
- T crawler = _c .newInstance ();
240
+ T crawler = crawlerFactory .newInstance ();
190
241
thread = new Thread (crawler , "Crawler " + (i + 1 ));
191
242
threads .remove (i );
192
243
threads .add (i , thread );
0 commit comments