|
18 | 18 | package edu.uci.ics.crawler4j.crawler;
|
19 | 19 |
|
20 | 20 | import java.util.ArrayList;
|
| 21 | +import java.util.Collection; |
| 22 | +import java.util.HashSet; |
21 | 23 | import java.util.List;
|
22 | 24 |
|
| 25 | +import org.apache.http.Header; |
| 26 | +import org.apache.http.message.BasicHeader; |
| 27 | + |
23 | 28 | import edu.uci.ics.crawler4j.crawler.authentication.AuthInfo;
|
24 | 29 |
|
25 | 30 | public class CrawlConfig {
|
@@ -54,6 +59,11 @@ public class CrawlConfig {
|
54 | 59 | */
|
55 | 60 | private String userAgentString = "crawler4j (http://code.google.com/p/crawler4j/)";
|
56 | 61 |
|
| 62 | + /** |
| 63 | + * Default request header values. |
| 64 | + */ |
| 65 | + private Collection<BasicHeader> defaultHeaders = new HashSet<BasicHeader>(); |
| 66 | + |
57 | 67 | /**
|
58 | 68 | * Politeness delay in milliseconds (delay between sending two requests to
|
59 | 69 | * the same host).
|
@@ -229,6 +239,24 @@ public void setUserAgentString(String userAgentString) {
|
229 | 239 | this.userAgentString = userAgentString;
|
230 | 240 | }
|
231 | 241 |
|
| 242 | + /** |
| 243 | + * Return a copy of the default header collection. |
| 244 | + */ |
| 245 | + public Collection<BasicHeader> getDefaultHeaders() { |
| 246 | + return new HashSet<>(defaultHeaders); |
| 247 | + } |
| 248 | + |
| 249 | + /** |
| 250 | + * Set the default header collection (creating copies of the provided headers). |
| 251 | + */ |
| 252 | + public void setDefaultHeaders(Collection<? extends Header> defaultHeaders) { |
| 253 | + Collection<BasicHeader> copiedHeaders = new HashSet<>(); |
| 254 | + for (Header header : defaultHeaders) { |
| 255 | + copiedHeaders.add(new BasicHeader(header.getName(), header.getValue())); |
| 256 | + } |
| 257 | + this.defaultHeaders = copiedHeaders; |
| 258 | + } |
| 259 | + |
232 | 260 | public int getPolitenessDelay() {
|
233 | 261 | return politenessDelay;
|
234 | 262 | }
|
|
0 commit comments