Skip to content

Commit 22597ea

Browse files
committed
Added support for NT authentication to support crawling sites protected by active directory
1 parent 70fe6f1 commit 22597ea

File tree

4 files changed

+49
-3
lines changed

4 files changed

+49
-3
lines changed

.gitignore

+3-1
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,9 @@
11
target/
2+
.DS_STORE
23
.idea/*
34
*.iml
45
.settings/*
56
javadocs/*
67
.classpath
7-
.project
8+
.project
9+
logs/

src/main/java/edu/uci/ics/crawler4j/crawler/authentication/AuthInfo.java

+2-1
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,8 @@
1717
public abstract class AuthInfo {
1818
public enum AuthenticationType {
1919
BASIC_AUTHENTICATION,
20-
FORM_AUTHENTICATION
20+
FORM_AUTHENTICATION,
21+
NT_AUTHENTICATION
2122
}
2223

2324
protected AuthenticationType authenticationType;
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,24 @@
1+
package edu.uci.ics.crawler4j.crawler.authentication;
2+
3+
import javax.swing.text.html.FormSubmitEvent.MethodType;
4+
import java.net.MalformedURLException;
5+
6+
/**
7+
* Authentication information for Microsoft Active Directory
8+
*/
9+
public class NtAuthInfo extends AuthInfo {
10+
private String domain;
11+
12+
public NtAuthInfo(String username, String password, String loginUrl, String domain) throws MalformedURLException {
13+
super(AuthenticationType.NT_AUTHENTICATION, MethodType.GET, loginUrl, username, password);
14+
this.domain = domain;
15+
}
16+
17+
public String getDomain() {
18+
return domain;
19+
}
20+
21+
public void setDomain(String domain) {
22+
this.domain = domain;
23+
}
24+
}

src/main/java/edu/uci/ics/crawler4j/fetcher/PageFetcher.java

+20-1
Original file line numberDiff line numberDiff line change
@@ -19,19 +19,23 @@
1919

2020
import java.io.IOException;
2121
import java.io.UnsupportedEncodingException;
22+
import java.net.InetAddress;
23+
import java.net.UnknownHostException;
2224
import java.security.cert.X509Certificate;
2325
import java.util.ArrayList;
2426
import java.util.Date;
2527
import java.util.List;
2628

2729
import javax.net.ssl.SSLContext;
2830

31+
import edu.uci.ics.crawler4j.crawler.authentication.NtAuthInfo;
2932
import org.apache.http.Header;
3033
import org.apache.http.HttpHost;
3134
import org.apache.http.HttpResponse;
3235
import org.apache.http.HttpStatus;
3336
import org.apache.http.NameValuePair;
3437
import org.apache.http.auth.AuthScope;
38+
import org.apache.http.auth.NTCredentials;
3539
import org.apache.http.auth.UsernamePasswordCredentials;
3640
import org.apache.http.client.ClientProtocolException;
3741
import org.apache.http.client.CredentialsProvider;
@@ -145,12 +149,27 @@ private void doAuthetication(List<AuthInfo> authInfos) {
145149
for (AuthInfo authInfo : authInfos) {
146150
if (authInfo.getAuthenticationType() == AuthInfo.AuthenticationType.BASIC_AUTHENTICATION) {
147151
doBasicLogin((BasicAuthInfo) authInfo);
148-
} else {
152+
} else if (authInfo.getAuthenticationType() == AuthInfo.AuthenticationType.NT_AUTHENTICATION) {
153+
doNtLogin((NtAuthInfo)authInfo);
154+
}else {
149155
doFormLogin((FormAuthInfo) authInfo);
150156
}
151157
}
152158
}
153159

160+
private void doNtLogin(NtAuthInfo authInfo) {
161+
logger.info("NT authentication for: " + authInfo.getLoginTarget());
162+
HttpHost targetHost = new HttpHost(authInfo.getHost(), authInfo.getPort(), authInfo.getProtocol());
163+
CredentialsProvider credsProvider = new BasicCredentialsProvider();
164+
try {
165+
credsProvider.setCredentials(new AuthScope(targetHost.getHostName(), targetHost.getPort()),
166+
new NTCredentials(authInfo.getUsername(), authInfo.getPassword(), InetAddress.getLocalHost().getHostName(), authInfo.getDomain()));
167+
} catch (UnknownHostException e) {
168+
logger.error("Error creating NT credentials", e);
169+
}
170+
httpClient = HttpClients.custom().setDefaultCredentialsProvider(credsProvider).build();
171+
}
172+
154173
/**
155174
* BASIC authentication<br/>
156175
* Official Example: https://hc.apache.org/httpcomponents-client-ga/httpclient/examples/org/apache/http/examples

0 commit comments

Comments
 (0)