Skip to content

Commit bebe4b1

Browse files
committed
Merge pull request yasserg#46 from yasserg/pr/42
Added default request header configuration
2 parents 981712c + 93d7c29 commit bebe4b1

File tree

2 files changed

+29
-0
lines changed

2 files changed

+29
-0
lines changed

src/main/java/edu/uci/ics/crawler4j/crawler/CrawlConfig.java

+28
Original file line numberDiff line numberDiff line change
@@ -18,8 +18,13 @@
1818
package edu.uci.ics.crawler4j.crawler;
1919

2020
import java.util.ArrayList;
21+
import java.util.Collection;
22+
import java.util.HashSet;
2123
import java.util.List;
2224

25+
import org.apache.http.Header;
26+
import org.apache.http.message.BasicHeader;
27+
2328
import edu.uci.ics.crawler4j.crawler.authentication.AuthInfo;
2429

2530
public class CrawlConfig {
@@ -54,6 +59,11 @@ public class CrawlConfig {
5459
*/
5560
private String userAgentString = "crawler4j (http://code.google.com/p/crawler4j/)";
5661

62+
/**
63+
* Default request header values.
64+
*/
65+
private Collection<BasicHeader> defaultHeaders = new HashSet<BasicHeader>();
66+
5767
/**
5868
* Politeness delay in milliseconds (delay between sending two requests to
5969
* the same host).
@@ -229,6 +239,24 @@ public void setUserAgentString(String userAgentString) {
229239
this.userAgentString = userAgentString;
230240
}
231241

242+
/**
243+
* Return a copy of the default header collection.
244+
*/
245+
public Collection<BasicHeader> getDefaultHeaders() {
246+
return new HashSet<>(defaultHeaders);
247+
}
248+
249+
/**
250+
* Set the default header collection (creating copies of the provided headers).
251+
*/
252+
public void setDefaultHeaders(Collection<? extends Header> defaultHeaders) {
253+
Collection<BasicHeader> copiedHeaders = new HashSet<>();
254+
for (Header header : defaultHeaders) {
255+
copiedHeaders.add(new BasicHeader(header.getName(), header.getValue()));
256+
}
257+
this.defaultHeaders = copiedHeaders;
258+
}
259+
232260
public int getPolitenessDelay() {
233261
return politenessDelay;
234262
}

src/main/java/edu/uci/ics/crawler4j/fetcher/PageFetcher.java

+1
Original file line numberDiff line numberDiff line change
@@ -119,6 +119,7 @@ public boolean isTrusted(final X509Certificate[] chain, String authType) {
119119
clientBuilder.setDefaultRequestConfig(requestConfig);
120120
clientBuilder.setConnectionManager(connectionManager);
121121
clientBuilder.setUserAgent(config.getUserAgentString());
122+
clientBuilder.setDefaultHeaders(config.getDefaultHeaders());
122123

123124
if (config.getProxyHost() != null) {
124125
if (config.getProxyUsername() != null) {

0 commit comments

Comments
 (0)