diff --git a/.gitignore b/.gitignore index 31996f117..0a6ada8e4 100644 --- a/.gitignore +++ b/.gitignore @@ -1,7 +1,9 @@ target/ +.DS_STORE .idea/* *.iml .settings/* javadocs/* .classpath -.project \ No newline at end of file +.project +logs/ diff --git a/src/main/java/edu/uci/ics/crawler4j/crawler/authentication/AuthInfo.java b/src/main/java/edu/uci/ics/crawler4j/crawler/authentication/AuthInfo.java index b03fbbef3..568693bc0 100644 --- a/src/main/java/edu/uci/ics/crawler4j/crawler/authentication/AuthInfo.java +++ b/src/main/java/edu/uci/ics/crawler4j/crawler/authentication/AuthInfo.java @@ -17,7 +17,8 @@ public abstract class AuthInfo { public enum AuthenticationType { BASIC_AUTHENTICATION, - FORM_AUTHENTICATION + FORM_AUTHENTICATION, + NT_AUTHENTICATION } protected AuthenticationType authenticationType; diff --git a/src/main/java/edu/uci/ics/crawler4j/crawler/authentication/NtAuthInfo.java b/src/main/java/edu/uci/ics/crawler4j/crawler/authentication/NtAuthInfo.java new file mode 100644 index 000000000..220538296 --- /dev/null +++ b/src/main/java/edu/uci/ics/crawler4j/crawler/authentication/NtAuthInfo.java @@ -0,0 +1,24 @@ +package edu.uci.ics.crawler4j.crawler.authentication; + +import javax.swing.text.html.FormSubmitEvent.MethodType; +import java.net.MalformedURLException; + +/** + * Authentication information for Microsoft Active Directory + */ +public class NtAuthInfo extends AuthInfo { + private String domain; + + public NtAuthInfo(String username, String password, String loginUrl, String domain) throws MalformedURLException { + super(AuthenticationType.NT_AUTHENTICATION, MethodType.GET, loginUrl, username, password); + this.domain = domain; + } + + public String getDomain() { + return domain; + } + + public void setDomain(String domain) { + this.domain = domain; + } +} \ No newline at end of file diff --git a/src/main/java/edu/uci/ics/crawler4j/fetcher/PageFetcher.java b/src/main/java/edu/uci/ics/crawler4j/fetcher/PageFetcher.java index bf3a1fcdc..381efeea5 100644 --- a/src/main/java/edu/uci/ics/crawler4j/fetcher/PageFetcher.java +++ b/src/main/java/edu/uci/ics/crawler4j/fetcher/PageFetcher.java @@ -19,6 +19,8 @@ import java.io.IOException; import java.io.UnsupportedEncodingException; +import java.net.InetAddress; +import java.net.UnknownHostException; import java.security.cert.X509Certificate; import java.util.ArrayList; import java.util.Date; @@ -26,12 +28,14 @@ import javax.net.ssl.SSLContext; +import edu.uci.ics.crawler4j.crawler.authentication.NtAuthInfo; import org.apache.http.Header; import org.apache.http.HttpHost; import org.apache.http.HttpResponse; import org.apache.http.HttpStatus; import org.apache.http.NameValuePair; import org.apache.http.auth.AuthScope; +import org.apache.http.auth.NTCredentials; import org.apache.http.auth.UsernamePasswordCredentials; import org.apache.http.client.ClientProtocolException; import org.apache.http.client.CredentialsProvider; @@ -145,12 +149,27 @@ private void doAuthetication(List authInfos) { for (AuthInfo authInfo : authInfos) { if (authInfo.getAuthenticationType() == AuthInfo.AuthenticationType.BASIC_AUTHENTICATION) { doBasicLogin((BasicAuthInfo) authInfo); - } else { + } else if (authInfo.getAuthenticationType() == AuthInfo.AuthenticationType.NT_AUTHENTICATION) { + doNtLogin((NtAuthInfo)authInfo); + }else { doFormLogin((FormAuthInfo) authInfo); } } } + private void doNtLogin(NtAuthInfo authInfo) { + logger.info("NT authentication for: " + authInfo.getLoginTarget()); + HttpHost targetHost = new HttpHost(authInfo.getHost(), authInfo.getPort(), authInfo.getProtocol()); + CredentialsProvider credsProvider = new BasicCredentialsProvider(); + try { + credsProvider.setCredentials(new AuthScope(targetHost.getHostName(), targetHost.getPort()), + new NTCredentials(authInfo.getUsername(), authInfo.getPassword(), InetAddress.getLocalHost().getHostName(), authInfo.getDomain())); + } catch (UnknownHostException e) { + logger.error("Error creating NT credentials", e); + } + httpClient = HttpClients.custom().setDefaultCredentialsProvider(credsProvider).build(); + } + /** * BASIC authentication
* Official Example: https://hc.apache.org/httpcomponents-client-ga/httpclient/examples/org/apache/http/examples