Skip to content

Commit

Permalink
Browse files Browse the repository at this point in the history
  • Loading branch information
s17t committed Dec 11, 2018
1 parent 1a0df4d commit f9f2ba2
Show file tree
Hide file tree
Showing 2 changed files with 37 additions and 2 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -155,6 +155,21 @@ public interface WebCrawlerFactory<T extends WebCrawler> {
T newInstance() throws Exception;
}

private static class SingleInstanceFactory<T extends WebCrawler>
implements WebCrawlerFactory<T> {

final T instance;

SingleInstanceFactory(T instance) {
this.instance = instance;
}

@Override
public T newInstance() throws Exception {
return this.instance;
}
}

private static class DefaultWebCrawlerFactory<T extends WebCrawler>
implements WebCrawlerFactory<T> {
final Class<T> clazz;
Expand Down Expand Up @@ -188,6 +203,18 @@ public <T extends WebCrawler> void start(Class<T> clazz, int numberOfCrawlers) {
this.start(new DefaultWebCrawlerFactory<>(clazz), numberOfCrawlers, true);
}

/**
* Start the crawling session and wait for it to finish.
* This method depends on a single instance of a crawler. Only that instance will be used for crawling.
*
* @param instance
* the instance of a class that implements the logic for crawler threads
* @param <T> Your class extending WebCrawler
*/
public <T extends WebCrawler> void start(T instance) {
this.start(new SingleInstanceFactory<>(instance), 1, true);
}

/**
* Start the crawling session and wait for it to finish.
*
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -204,6 +204,15 @@ protected void onRedirectedStatusCode(Page page) {
//Subclasses can override this to add their custom functionality
}

/**
* Emitted when the crawler is redirected to an invalid Location.
* @param page
*/
protected void onRedirectedToInvalidUrl(Page page) {
logger.warn("Unexpected error, URL: {} is redirected to NOTHING",
page.url.getURL());
}

/**
* This function is called if the crawler encountered an unexpected http status code ( a
* status code other than 3xx)
Expand Down Expand Up @@ -405,8 +414,7 @@ private void processPage(WebURL curURL) {

String movedToUrl = fetchResult.getMovedToUrl();
if (movedToUrl == null) {
logger.warn("Unexpected error, URL: {} is redirected to NOTHING",
curURL);
onRedirectedToInvalidUrl(page);
return;
}
page.setRedirectedToUrl(movedToUrl);
Expand Down

0 comments on commit f9f2ba2

Please sign in to comment.