@@ -187,6 +187,15 @@ protected void onPageBiggerThanMaxSize(String urlStr, long pageSize) {
187
187
pageSize );
188
188
}
189
189
190
+ /**
191
+ * This function is called if the crawler encounters a page with a 3xx status code
192
+ *
193
+ * @param page Partial page object
194
+ */
195
+ protected void onRedirectedStatusCode (Page page ) {
196
+ //Subclasses can override this to add their custom functionality
197
+ }
198
+
190
199
/**
191
200
* This function is called if the crawler encountered an unexpected http status code ( a
192
201
* status code other than 3xx)
@@ -361,15 +370,17 @@ private void processPage(WebURL curURL) {
361
370
// follow https://issues.apache.org/jira/browse/HTTPCORE-389
362
371
363
372
page .setRedirect (true );
364
- if (myController .getConfig ().isFollowRedirects ()) {
365
- String movedToUrl = fetchResult .getMovedToUrl ();
366
- if (movedToUrl == null ) {
367
- logger .warn ("Unexpected error, URL: {} is redirected to NOTHING" ,
368
- curURL );
369
- return ;
370
- }
371
- page .setRedirectedToUrl (movedToUrl );
372
373
374
+ String movedToUrl = fetchResult .getMovedToUrl ();
375
+ if (movedToUrl == null ) {
376
+ logger .warn ("Unexpected error, URL: {} is redirected to NOTHING" ,
377
+ curURL );
378
+ return ;
379
+ }
380
+ page .setRedirectedToUrl (movedToUrl );
381
+ onRedirectedStatusCode (page );
382
+
383
+ if (myController .getConfig ().isFollowRedirects ()) {
373
384
int newDocId = docIdServer .getDocId (movedToUrl );
374
385
if (newDocId > 0 ) {
375
386
logger .debug ("Redirect page: {} is already seen" , curURL );
0 commit comments