title | description | services | documentationcenter | author | manager | editor | tags | ms.assetid | ms.service | ms.custom | ms.devlang | ms.topic | ms.tgt_pltfrm | ms.workload | ms.date | ms.author |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
Create Hadoop clusters using .NET - Azure HDInsight | Microsoft Docs |
Learn how to create Hadoop, HBase, Storm, or Spark clusters on Linux for HDInsight using the HDInsight .NET SDK. |
hdinsight |
mumian |
jhubbard |
cgronlun |
azure-portal |
9c74e3dc-837f-4c90-bbb1-489bc7124a3d |
hdinsight |
hdinsightactive |
na |
article |
na |
big-data |
11/06/2017 |
jgao |
[!INCLUDE selector]
Learn how to create a Hadoop cluster in Azure HDInsight cluster using the .NET SDK.
Important
The steps in this document create a cluster with one worker node. If you plan on more than 32 worker nodes, either at cluster creation or by scaling the cluster after creation, you need to select a head node size with at least 8 cores and 14GB ram.
For more information on node sizes and associated costs, see HDInsight pricing.
[!INCLUDE delete-cluster-warning]
- An Azure subscription. See Get Azure free trial.
- An Azure storage account. See Create a storage account.
- Visual Studio 2013, Visual Studio 2015 or Visual Studio 2017.
-
Open Visual Studio 2017.
-
Create a new Visual C# console application.
-
From the Tools menu, click NuGet Package Manager, and then click Package Manager Console.
-
Run the following command in the console to install the packages:
Install-Package Microsoft.Rest.ClientRuntime.Azure.Authentication -Pre Install-Package Microsoft.Azure.Management.ResourceManager -Pre Install-Package Microsoft.Azure.Management.HDInsight
These commands add .NET libraries and references to them to the current Visual Studio project.
-
From Solution Explorer, double-click Program.cs to open it, paste the following code, and provide values for the variables:
using System; using Microsoft.Rest; using Microsoft.Rest.Azure.Authentication; using Microsoft.Azure; using Microsoft.Azure.Management.HDInsight; using Microsoft.Azure.Management.HDInsight.Models; using Microsoft.Azure.Management.ResourceManager; using Microsoft.IdentityModel.Clients.ActiveDirectory; namespace CreateHDInsightCluster { class Program { private static HDInsightManagementClient _hdiManagementClient; private const string SubscriptionId = "<Your Azure Subscription ID>"; // Replace with your AAD tenant ID if necessary private const string TenantId = UserTokenProvider.CommonTenantId; // This is the GUID for the PowerShell client. Used for interactive logins in this example. private const string ClientId = "1950a258-227b-4e31-a9cf-717495945fc2"; private const string ExistingResourceGroupName = "<Enter Resource Group Name>"; private const string ExistingStorageName = "<Enter Default Storage Account Name>.blob.core.windows.net"; private const string ExistingStorageKey = "<Enter Default Storage Account Key>"; private const string ExistingBlobContainer = "<Enter Default Bob Container Name>"; private const string NewClusterName = "<Enter HDInsight Cluster Name>"; private const int NewClusterNumNodes = 2; private const string NewClusterLocation = "EAST US 2"; // Must be the same as the default Storage account private const OSType NewClusterOSType = OSType.Linux; private const string NewClusterType = "Hadoop"; private const string NewClusterVersion = "3.6"; private const string NewClusterUsername = "admin"; private const string NewClusterPassword = "<Enter HTTP User Password>"; private const string NewClusterSshUserName = "sshuser"; // You can use eitehr password or public key. See https://docs.microsoft.com/azure/hdinsight/hdinsight-hadoop-linux-use-ssh-unix private const string NewClusterSshPassword = "<Enter SSH User Password>"; private const string NewClusterSshPublicKey = @"---- BEGIN SSH2 PUBLIC KEY ---- Comment: ""rsa-key-20150731"" AAAAB3NzaC1yc2EAAAABJQAAAQEA4QiCRLqT7fnmUA5OhYWZNlZo6lLaY1c+IRsp gmPCsJVGQLu6O1wqcxRqiKk7keYq8bP5s30v6bIljsLZYTnyReNUa5LtFw7eauGr yVt3Pve6ejfWELhbVpi0iq8uJNFA9VvRkz8IP1JmjC5jsdnJhzQZtgkIrdn3w0e6 WVfu15kKyY8YAiynVbdV51EB0SZaSLdMZkZQ81xi4DDtCZD7qvdtWEFwLa+EHdkd pzO36Mtev5XvseLQqzXzZ6aVBdlXoppGHXkoGHAMNOtEWRXpAUtEccjpATsaZhQR zZdZlzHduhM10ofS4YOYBADt9JohporbQVHM5w6qUhIgyiPo7w== ---- END SSH2 PUBLIC KEY ----"; //replace the public key with your own static void Main(string[] args) { System.Console.WriteLine("Creating a cluster. The process takes 10 to 20 minutes ..."); // Authenticate and get a token var authToken = GetTokenCloudCredentials(TenantId, ClientId, SubscriptionId); // Flag subscription for HDInsight, if it isn't already. EnableHDInsight(authToken); // Get an HDInsight management client _hdiManagementClient = new HDInsightManagementClient(authToken); // Set parameters for the new cluster var parameters = new ClusterCreateParameters { ClusterSizeInNodes = NewClusterNumNodes, UserName = NewClusterUsername, ClusterType = NewClusterType, OSType = NewClusterOSType, Version = NewClusterVersion, // Use an Azure storage account as the default storage DefaultStorageInfo = new AzureStorageInfo(ExistingStorageName, ExistingStorageKey, ExistingBlobContainer), // Is the cluster type RServer? If so, you can set the EdgeNodeSize. // Otherwise, the default VM size is used. //EdgeNodeSize = "Standard_D12_v2", Password = NewClusterPassword, Location = NewClusterLocation, SshUserName = NewClusterSshUserName, SshPassword = NewClusterSshPassword, //SshPublicKey = NewClusterSshPublicKey }; // Is the cluster type RServer? If so, add the RStudio configuration option. /* parameters.Configurations.Add( "rserver", new Dictionary<string, string>() { { "rstudio", "true" } } ); */ // Create the cluster _hdiManagementClient.Clusters.Create(ExistingResourceGroupName, NewClusterName, parameters); System.Console.WriteLine("The cluster has been created. Press ENTER to continue ..."); System.Console.ReadLine(); } /// <summary> /// Authenticate to an Azure subscription and retrieve an authentication token /// </summary> static TokenCloudCredentials GetTokenCloudCredentials(string TenantId, string ClientId, string SubscriptionId) { var authContext = new AuthenticationContext("https://login.microsoftonline.com/" + TenantId); var tokenAuthResult = authContext.AcquireToken("https://management.core.windows.net/", ClientId, new Uri("urn:ietf:wg:oauth:2.0:oob"), PromptBehavior.Always, UserIdentifier.AnyUser); return new TokenCloudCredentials(SubscriptionId, tokenAuthResult.AccessToken); } /// <summary> /// Marks your subscription as one that can use HDInsight, if it has not already been marked as such. /// </summary> /// <remarks>This is essentially a one-time action; if you have already done something with HDInsight /// on your subscription, then this isn't needed at all and will do nothing.</remarks> /// <param name="authToken">An authentication token for your Azure subscription</param> static void EnableHDInsight(TokenCloudCredentials authToken) { // Create a client for the Resource manager and set the subscription ID var resourceManagementClient = new ResourceManagementClient(new TokenCredentials(authToken.Token)); resourceManagementClient.SubscriptionId = SubscriptionId; // Register the HDInsight provider var rpResult = resourceManagementClient.Providers.Register("Microsoft.HDInsight"); } } }
-
Replace the class member values.
-
Press F5 to run the application. A console window should open and display the status of the application. You are prompted to enter your Azure account credentials. It can take several minutes to create an HDInsight cluster, normally around 15.
Using bootstrap, you can configure addition settings during the cluster creations. For more information, see Customize HDInsight clusters using Bootstrap.
Modify the sample in Create clusters to configure a Hive setting:
static void Main(string[] args)
{
System.Console.WriteLine("Creating a cluster. The process takes 10 to 20 minutes ...");
// Authenticate and get a token
var authToken = GetTokenCloudCredentials(TenantId, ClientId, SubscriptionId);
// Flag subscription for HDInsight, if it isn't already.
EnableHDInsight(authToken);
// Get an HDInsight management client
_hdiManagementClient = new HDInsightManagementClient(authToken);
// Set parameters for the new cluster
var extendedParameters = new ClusterCreateParametersExtended
{
Location = NewClusterLocation,
Properties = new ClusterCreateProperties
{
ClusterDefinition = new ClusterDefinition
{
ClusterType = NewClusterType.ToString()
},
ClusterVersion = NewClusterVersion,
OperatingSystemType = NewClusterOSType
}
};
var coreConfigs = new Dictionary<string, string>
{
{"fs.defaultFS", string.Format("wasb://{0}@{1}", ExistingBlobContainer, ExistingStorageName)},
{
string.Format("fs.azure.account.key.{0}", ExistingStorageName),
ExistingStorageKey
}
};
// bootstrap
var hiveConfigs = new Dictionary<string, string>
{
{ "hive.metastore.client.socket.timeout", "90"}
};
var gatewayConfigs = new Dictionary<string, string>
{
{"restAuthCredential.isEnabled", "true"},
{"restAuthCredential.username", NewClusterUsername},
{"restAuthCredential.password", NewClusterPassword}
};
var configurations = new Dictionary<string, Dictionary<string, string>>
{
{"core-site", coreConfigs},
{"gateway", gatewayConfigs},
{"hive-site", hiveConfigs}
};
var serializedConfig = JsonConvert.SerializeObject(configurations);
extendedParameters.Properties.ClusterDefinition.Configurations = serializedConfig;
var sshPublicKeys = new List<SshPublicKey>();
var sshPublicKey = new SshPublicKey
{
CertificateData =
string.Format("ssh-rsa {0}", NewClusterSshPublicKey)
};
sshPublicKeys.Add(sshPublicKey);
var headNode = new Role
{
Name = "headnode",
TargetInstanceCount = 2,
HardwareProfile = new HardwareProfile
{
VmSize = "Large"
},
OsProfile = new OsProfile
{
LinuxOperatingSystemProfile = new LinuxOperatingSystemProfile
{
UserName = NewClusterSshUserName,
Password = NewClusterSshPassword //,
// When use a SSH pulbic key, make sure to remove comments, headers and trailers, and concatenate the key into one line
//SshProfile = new SshProfile
//{
// SshPublicKeys = sshPublicKeys
//}
}
}
};
var workerNode = new Role
{
Name = "workernode",
TargetInstanceCount = NewClusterNumNodes,
HardwareProfile = new HardwareProfile
{
VmSize = "Large"
},
OsProfile = new OsProfile
{
LinuxOperatingSystemProfile = new LinuxOperatingSystemProfile
{
UserName = NewClusterSshUserName,
Password = NewClusterSshPassword //,
//SshProfile = new SshProfile
//{
// SshPublicKeys = sshPublicKeys
//}
}
}
};
extendedParameters.Properties.ComputeProfile = new ComputeProfile();
extendedParameters.Properties.ComputeProfile.Roles.Add(headNode);
extendedParameters.Properties.ComputeProfile.Roles.Add(workerNode);
_hdiManagementClient.Clusters.Create(ExistingResourceGroupName, NewClusterName, extendedParameters);
System.Console.WriteLine("The cluster has been created. Press ENTER to continue ...");
System.Console.ReadLine();
}
Using Script Action, you can configure additional settings during cluster creations. For more information, see Customize Linux-based HDInsight clusters using Script Action.
Modify the sample in Create clusters to call a Script Action to install R:
static void Main(string[] args)
{
System.Console.WriteLine("Creating a cluster. The process takes 10 to 20 minutes ...");
// Authenticate and get a token
var authToken = GetTokenCloudCredentials(TenantId, ClientId, SubscriptionId);
// Flag subscription for HDInsight, if it isn't already.
EnableHDInsight(authToken);
// Get an HDInsight management client
_hdiManagementClient = new HDInsightManagementClient(authToken);
// Set parameters for the new cluster
var parameters = new ClusterCreateParameters
{
ClusterSizeInNodes = NewClusterNumNodes,
Location = NewClusterLocation,
ClusterType = NewClusterType,
OSType = NewClusterOSType,
Version = NewClusterVersion,
DefaultStorageInfo = new AzureStorageInfo(ExistingStorageName, ExistingStorageKey, ExistingBlobContainer),
UserName = NewClusterUsername,
Password = NewClusterPassword,
SshUserName = NewClusterSshUserName,
SshPublicKey = NewClusterSshPublicKey
};
ScriptAction rScriptAction = new ScriptAction("Install R",
new Uri("https://hdiconfigactions.blob.core.windows.net/linuxrconfigactionv01/r-installer-v01.sh"), "");
parameters.ScriptActions.Add(ClusterNodeType.HeadNode,new System.Collections.Generic.List<ScriptAction> { rScriptAction});
parameters.ScriptActions.Add(ClusterNodeType.WorkerNode, new System.Collections.Generic.List<ScriptAction> { rScriptAction });
_hdiManagementClient.Clusters.Create(ExistingResourceGroupName, NewClusterName, parameters);
System.Console.WriteLine("The cluster has been created. Press ENTER to continue ...");
System.Console.ReadLine();
}
If you run into issues with creating HDInsight clusters, see access control requirements.
Now that you have successfully created an HDInsight cluster, use the following to learn how to work with your cluster.
- Develop Java topologies for Storm on HDInsight
- Use Python components in Storm on HDInsight
- Deploy and monitor topologies with Storm on HDInsight
- Create a standalone application using Scala
- Run jobs remotely on a Spark cluster using Livy
- Spark with BI: Perform interactive data analysis using Spark in HDInsight with BI tools
- Spark with Machine Learning: Use Spark in HDInsight to predict food inspection results
- Spark Streaming: Use Spark in HDInsight for building real-time streaming applications