Skip to content

Commit

Permalink
Merge pull request yasserg#36 from Veritando/master
Browse files Browse the repository at this point in the history
Provide factory method for creating the HttpUriRequest (default: HttpGet...
  • Loading branch information
yasserg committed Mar 4, 2015
2 parents 6bd194a + e01b206 commit 70fe6f1
Showing 1 changed file with 19 additions and 6 deletions.
25 changes: 19 additions & 6 deletions src/main/java/edu/uci/ics/crawler4j/fetcher/PageFetcher.java
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,7 @@
import org.apache.http.client.entity.UrlEncodedFormEntity;
import org.apache.http.client.methods.HttpGet;
import org.apache.http.client.methods.HttpPost;
import org.apache.http.client.methods.HttpUriRequest;
import org.apache.http.config.Registry;
import org.apache.http.config.RegistryBuilder;
import org.apache.http.conn.socket.ConnectionSocketFactory;
Expand Down Expand Up @@ -197,9 +198,9 @@ public PageFetchResult fetchPage(WebURL webUrl)
// Getting URL, setting headers & content
PageFetchResult fetchResult = new PageFetchResult();
String toFetchURL = webUrl.getURL();
HttpGet get = null;
HttpUriRequest request = null;
try {
get = new HttpGet(toFetchURL);
request = newHttpUriRequest(toFetchURL);
// Applying Politeness delay
synchronized (mutex) {
long now = (new Date()).getTime();
Expand All @@ -209,7 +210,7 @@ public PageFetchResult fetchPage(WebURL webUrl)
lastFetchTime = (new Date()).getTime();
}

HttpResponse response = httpClient.execute(get);
HttpResponse response = httpClient.execute(request);
fetchResult.setEntity(response.getEntity());
fetchResult.setResponseHeaders(response.getAllHeaders());

Expand All @@ -229,7 +230,7 @@ public PageFetchResult fetchPage(WebURL webUrl)
}
} else if (statusCode == HttpStatus.SC_OK) { // is 200, everything looks ok
fetchResult.setFetchedUrl(toFetchURL);
String uri = get.getURI().toString();
String uri = request.getURI().toString();
if (!uri.equals(toFetchURL)) {
if (!URLCanonicalizer.getCanonicalURL(uri).equals(toFetchURL)) {
fetchResult.setFetchedUrl(uri);
Expand Down Expand Up @@ -258,8 +259,8 @@ public PageFetchResult fetchPage(WebURL webUrl)
return fetchResult;

} finally { // occurs also with thrown exceptions
if ((fetchResult.getEntity() == null) && (get != null)) {
get.abort();
if ((fetchResult.getEntity() == null) && (request != null)) {
request.abort();
}
}
}
Expand All @@ -270,4 +271,16 @@ public synchronized void shutDown() {
connectionMonitorThread.shutdown();
}
}

/**
* Creates a new HttpUriRequest for the given url. The default is to create a HttpGet without
* any further configuration. Subclasses may override this method and provide their own logic.
*
* @param url the url to be fetched
* @return the HttpUriRequest for the given url
*/
protected HttpUriRequest newHttpUriRequest(String url) {
return new HttpGet(url);
}

}

0 comments on commit 70fe6f1

Please sign in to comment.