Skip to content

Commit

Permalink
Merge pull request yasserg#70 from MadEgg/fetch-errors
Browse files Browse the repository at this point in the history
Add onUnhandledException callback to handle unhandled exceptions during fetching
  • Loading branch information
yasserg committed Jul 21, 2015
2 parents bdbdc3e + f9c82fe commit 810f705
Showing 1 changed file with 15 additions and 4 deletions.
19 changes: 15 additions & 4 deletions src/main/java/edu/uci/ics/crawler4j/crawler/WebCrawler.java
Original file line number Diff line number Diff line change
Expand Up @@ -212,7 +212,20 @@ protected void onContentFetchError(WebURL webUrl) {
// Do nothing by default (except basic logging)
// Sub-classed can override this to add their custom functionality
}


/**
* This function is called when a unhandled exception was encountered during fetching
*
* @param webUrl URL where a unhandled exception occured
*/
protected void onUnhandledException(WebURL webUrl, Throwable e) {
String urlStr = (webUrl == null ? "NULL" : webUrl.getURL());
logger.warn("Unhandled exception while fetching {}: {}", urlStr, e.getMessage());
logger.info("Stacktrace: ", e);
// Do nothing by default (except basic logging)
// Sub-classed can override this to add their custom functionality
}

/**
* This function is called if there has been an error in parsing the content.
*
Expand Down Expand Up @@ -418,9 +431,7 @@ private void processPage(WebURL curURL) {
} catch (NotAllowedContentException nace) {
logger.debug("Skipping: {} as it contains binary content which you configured not to crawl", curURL.getURL());
} catch (Exception e) {
String urlStr = (curURL == null ? "NULL" : curURL.getURL());
logger.error("{}, while processing: {}", e.getMessage(), urlStr);
logger.debug("Stacktrace", e);
onUnhandledException(curURL, e);
} finally {
if (fetchResult != null) {
fetchResult.discardContentIfNotConsumed();
Expand Down

0 comments on commit 810f705

Please sign in to comment.