Skip to content

Commit e01b206

Browse files
committed
Provide factory method for creating the HttpUriRequest (default: HttpGet), fixes yasserg#35
1 parent 6bd194a commit e01b206

File tree

1 file changed

+19
-6
lines changed

1 file changed

+19
-6
lines changed

src/main/java/edu/uci/ics/crawler4j/fetcher/PageFetcher.java

+19-6
Original file line numberDiff line numberDiff line change
@@ -40,6 +40,7 @@
4040
import org.apache.http.client.entity.UrlEncodedFormEntity;
4141
import org.apache.http.client.methods.HttpGet;
4242
import org.apache.http.client.methods.HttpPost;
43+
import org.apache.http.client.methods.HttpUriRequest;
4344
import org.apache.http.config.Registry;
4445
import org.apache.http.config.RegistryBuilder;
4546
import org.apache.http.conn.socket.ConnectionSocketFactory;
@@ -197,9 +198,9 @@ public PageFetchResult fetchPage(WebURL webUrl)
197198
// Getting URL, setting headers & content
198199
PageFetchResult fetchResult = new PageFetchResult();
199200
String toFetchURL = webUrl.getURL();
200-
HttpGet get = null;
201+
HttpUriRequest request = null;
201202
try {
202-
get = new HttpGet(toFetchURL);
203+
request = newHttpUriRequest(toFetchURL);
203204
// Applying Politeness delay
204205
synchronized (mutex) {
205206
long now = (new Date()).getTime();
@@ -209,7 +210,7 @@ public PageFetchResult fetchPage(WebURL webUrl)
209210
lastFetchTime = (new Date()).getTime();
210211
}
211212

212-
HttpResponse response = httpClient.execute(get);
213+
HttpResponse response = httpClient.execute(request);
213214
fetchResult.setEntity(response.getEntity());
214215
fetchResult.setResponseHeaders(response.getAllHeaders());
215216

@@ -229,7 +230,7 @@ public PageFetchResult fetchPage(WebURL webUrl)
229230
}
230231
} else if (statusCode == HttpStatus.SC_OK) { // is 200, everything looks ok
231232
fetchResult.setFetchedUrl(toFetchURL);
232-
String uri = get.getURI().toString();
233+
String uri = request.getURI().toString();
233234
if (!uri.equals(toFetchURL)) {
234235
if (!URLCanonicalizer.getCanonicalURL(uri).equals(toFetchURL)) {
235236
fetchResult.setFetchedUrl(uri);
@@ -258,8 +259,8 @@ public PageFetchResult fetchPage(WebURL webUrl)
258259
return fetchResult;
259260

260261
} finally { // occurs also with thrown exceptions
261-
if ((fetchResult.getEntity() == null) && (get != null)) {
262-
get.abort();
262+
if ((fetchResult.getEntity() == null) && (request != null)) {
263+
request.abort();
263264
}
264265
}
265266
}
@@ -270,4 +271,16 @@ public synchronized void shutDown() {
270271
connectionMonitorThread.shutdown();
271272
}
272273
}
274+
275+
/**
276+
* Creates a new HttpUriRequest for the given url. The default is to create a HttpGet without
277+
* any further configuration. Subclasses may override this method and provide their own logic.
278+
*
279+
* @param url the url to be fetched
280+
* @return the HttpUriRequest for the given url
281+
*/
282+
protected HttpUriRequest newHttpUriRequest(String url) {
283+
return new HttpGet(url);
284+
}
285+
273286
}

0 commit comments

Comments
 (0)