Skip to content

Commit

Permalink
Added support for NT authentication to support crawling sites protect…
Browse files Browse the repository at this point in the history
…ed by active directory
  • Loading branch information
jcarlile committed Apr 15, 2015
1 parent 70fe6f1 commit 22597ea
Show file tree
Hide file tree
Showing 4 changed files with 49 additions and 3 deletions.
4 changes: 3 additions & 1 deletion .gitignore
Original file line number Diff line number Diff line change
@@ -1,7 +1,9 @@
target/
.DS_STORE
.idea/*
*.iml
.settings/*
javadocs/*
.classpath
.project
.project
logs/
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,8 @@
public abstract class AuthInfo {
public enum AuthenticationType {
BASIC_AUTHENTICATION,
FORM_AUTHENTICATION
FORM_AUTHENTICATION,
NT_AUTHENTICATION
}

protected AuthenticationType authenticationType;
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
package edu.uci.ics.crawler4j.crawler.authentication;

import javax.swing.text.html.FormSubmitEvent.MethodType;
import java.net.MalformedURLException;

/**
* Authentication information for Microsoft Active Directory
*/
public class NtAuthInfo extends AuthInfo {
private String domain;

public NtAuthInfo(String username, String password, String loginUrl, String domain) throws MalformedURLException {
super(AuthenticationType.NT_AUTHENTICATION, MethodType.GET, loginUrl, username, password);
this.domain = domain;
}

public String getDomain() {
return domain;
}

public void setDomain(String domain) {
this.domain = domain;
}
}
21 changes: 20 additions & 1 deletion src/main/java/edu/uci/ics/crawler4j/fetcher/PageFetcher.java
Original file line number Diff line number Diff line change
Expand Up @@ -19,19 +19,23 @@

import java.io.IOException;
import java.io.UnsupportedEncodingException;
import java.net.InetAddress;
import java.net.UnknownHostException;
import java.security.cert.X509Certificate;
import java.util.ArrayList;
import java.util.Date;
import java.util.List;

import javax.net.ssl.SSLContext;

import edu.uci.ics.crawler4j.crawler.authentication.NtAuthInfo;
import org.apache.http.Header;
import org.apache.http.HttpHost;
import org.apache.http.HttpResponse;
import org.apache.http.HttpStatus;
import org.apache.http.NameValuePair;
import org.apache.http.auth.AuthScope;
import org.apache.http.auth.NTCredentials;
import org.apache.http.auth.UsernamePasswordCredentials;
import org.apache.http.client.ClientProtocolException;
import org.apache.http.client.CredentialsProvider;
Expand Down Expand Up @@ -145,12 +149,27 @@ private void doAuthetication(List<AuthInfo> authInfos) {
for (AuthInfo authInfo : authInfos) {
if (authInfo.getAuthenticationType() == AuthInfo.AuthenticationType.BASIC_AUTHENTICATION) {
doBasicLogin((BasicAuthInfo) authInfo);
} else {
} else if (authInfo.getAuthenticationType() == AuthInfo.AuthenticationType.NT_AUTHENTICATION) {
doNtLogin((NtAuthInfo)authInfo);
}else {
doFormLogin((FormAuthInfo) authInfo);
}
}
}

private void doNtLogin(NtAuthInfo authInfo) {
logger.info("NT authentication for: " + authInfo.getLoginTarget());
HttpHost targetHost = new HttpHost(authInfo.getHost(), authInfo.getPort(), authInfo.getProtocol());
CredentialsProvider credsProvider = new BasicCredentialsProvider();
try {
credsProvider.setCredentials(new AuthScope(targetHost.getHostName(), targetHost.getPort()),
new NTCredentials(authInfo.getUsername(), authInfo.getPassword(), InetAddress.getLocalHost().getHostName(), authInfo.getDomain()));
} catch (UnknownHostException e) {
logger.error("Error creating NT credentials", e);
}
httpClient = HttpClients.custom().setDefaultCredentialsProvider(credsProvider).build();
}

/**
* BASIC authentication<br/>
* Official Example: https://hc.apache.org/httpcomponents-client-ga/httpclient/examples/org/apache/http/examples
Expand Down

0 comments on commit 22597ea

Please sign in to comment.