Skip to content

Commit 810f705

Browse files
committed
Merge pull request yasserg#70 from MadEgg/fetch-errors
Add onUnhandledException callback to handle unhandled exceptions during fetching
2 parents bdbdc3e + f9c82fe commit 810f705

File tree

1 file changed

+15
-4
lines changed

1 file changed

+15
-4
lines changed

src/main/java/edu/uci/ics/crawler4j/crawler/WebCrawler.java

+15-4
Original file line numberDiff line numberDiff line change
@@ -212,7 +212,20 @@ protected void onContentFetchError(WebURL webUrl) {
212212
// Do nothing by default (except basic logging)
213213
// Sub-classed can override this to add their custom functionality
214214
}
215-
215+
216+
/**
217+
* This function is called when a unhandled exception was encountered during fetching
218+
*
219+
* @param webUrl URL where a unhandled exception occured
220+
*/
221+
protected void onUnhandledException(WebURL webUrl, Throwable e) {
222+
String urlStr = (webUrl == null ? "NULL" : webUrl.getURL());
223+
logger.warn("Unhandled exception while fetching {}: {}", urlStr, e.getMessage());
224+
logger.info("Stacktrace: ", e);
225+
// Do nothing by default (except basic logging)
226+
// Sub-classed can override this to add their custom functionality
227+
}
228+
216229
/**
217230
* This function is called if there has been an error in parsing the content.
218231
*
@@ -418,9 +431,7 @@ private void processPage(WebURL curURL) {
418431
} catch (NotAllowedContentException nace) {
419432
logger.debug("Skipping: {} as it contains binary content which you configured not to crawl", curURL.getURL());
420433
} catch (Exception e) {
421-
String urlStr = (curURL == null ? "NULL" : curURL.getURL());
422-
logger.error("{}, while processing: {}", e.getMessage(), urlStr);
423-
logger.debug("Stacktrace", e);
434+
onUnhandledException(curURL, e);
424435
} finally {
425436
if (fetchResult != null) {
426437
fetchResult.discardContentIfNotConsumed();

0 commit comments

Comments
 (0)