40
40
import org .apache .http .client .entity .UrlEncodedFormEntity ;
41
41
import org .apache .http .client .methods .HttpGet ;
42
42
import org .apache .http .client .methods .HttpPost ;
43
+ import org .apache .http .client .methods .HttpUriRequest ;
43
44
import org .apache .http .config .Registry ;
44
45
import org .apache .http .config .RegistryBuilder ;
45
46
import org .apache .http .conn .socket .ConnectionSocketFactory ;
@@ -197,9 +198,9 @@ public PageFetchResult fetchPage(WebURL webUrl)
197
198
// Getting URL, setting headers & content
198
199
PageFetchResult fetchResult = new PageFetchResult ();
199
200
String toFetchURL = webUrl .getURL ();
200
- HttpGet get = null ;
201
+ HttpUriRequest request = null ;
201
202
try {
202
- get = new HttpGet (toFetchURL );
203
+ request = newHttpUriRequest (toFetchURL );
203
204
// Applying Politeness delay
204
205
synchronized (mutex ) {
205
206
long now = (new Date ()).getTime ();
@@ -209,7 +210,7 @@ public PageFetchResult fetchPage(WebURL webUrl)
209
210
lastFetchTime = (new Date ()).getTime ();
210
211
}
211
212
212
- HttpResponse response = httpClient .execute (get );
213
+ HttpResponse response = httpClient .execute (request );
213
214
fetchResult .setEntity (response .getEntity ());
214
215
fetchResult .setResponseHeaders (response .getAllHeaders ());
215
216
@@ -229,7 +230,7 @@ public PageFetchResult fetchPage(WebURL webUrl)
229
230
}
230
231
} else if (statusCode == HttpStatus .SC_OK ) { // is 200, everything looks ok
231
232
fetchResult .setFetchedUrl (toFetchURL );
232
- String uri = get .getURI ().toString ();
233
+ String uri = request .getURI ().toString ();
233
234
if (!uri .equals (toFetchURL )) {
234
235
if (!URLCanonicalizer .getCanonicalURL (uri ).equals (toFetchURL )) {
235
236
fetchResult .setFetchedUrl (uri );
@@ -258,8 +259,8 @@ public PageFetchResult fetchPage(WebURL webUrl)
258
259
return fetchResult ;
259
260
260
261
} finally { // occurs also with thrown exceptions
261
- if ((fetchResult .getEntity () == null ) && (get != null )) {
262
- get .abort ();
262
+ if ((fetchResult .getEntity () == null ) && (request != null )) {
263
+ request .abort ();
263
264
}
264
265
}
265
266
}
@@ -270,4 +271,16 @@ public synchronized void shutDown() {
270
271
connectionMonitorThread .shutdown ();
271
272
}
272
273
}
274
+
275
+ /**
276
+ * Creates a new HttpUriRequest for the given url. The default is to create a HttpGet without
277
+ * any further configuration. Subclasses may override this method and provide their own logic.
278
+ *
279
+ * @param url the url to be fetched
280
+ * @return the HttpUriRequest for the given url
281
+ */
282
+ protected HttpUriRequest newHttpUriRequest (String url ) {
283
+ return new HttpGet (url );
284
+ }
285
+
273
286
}
0 commit comments