Skip to content

Commit

Permalink
Add data prep project
Browse files Browse the repository at this point in the history
  • Loading branch information
jwood803 committed Aug 24, 2019
1 parent 938f2dd commit c2d8f54
Show file tree
Hide file tree
Showing 7 changed files with 20,828 additions and 2 deletions.
51 changes: 50 additions & 1 deletion MLNetExamples/FeatureImportance/Program.cs
Original file line number Diff line number Diff line change
Expand Up @@ -24,8 +24,57 @@ static void Main(string[] args)
.Append(context.Regression.Trainers.LbfgsPoissonRegression());

var model = pipeline.Fit(data);
var transformedData = model.Transform(data);

//context.
// Get weights of model
var linearModel = model.LastTransformer.Model;

var weights = linearModel.Weights;

Console.WriteLine("Weights:");
var weightsResult = "";

foreach (var weight in weights)
{
weightsResult += $"{weight} ";
}

Console.WriteLine(weightsResult);
Console.WriteLine(Environment.NewLine);

// Get global feature importance
var lastTransformer = model.LastTransformer;

var featureImportance = context.Regression.PermutationFeatureImportance(lastTransformer, transformedData);

Console.WriteLine("Global feature importance:");
for (int i = 0; i < featureImportance.Count(); i++)
{
Console.WriteLine($"Feature - {features[i]}: Difference in RMS - {featureImportance[i].RootMeanSquaredError.Mean}");
}

Console.WriteLine(Environment.NewLine);

// Get feature importance for each row
var featureContribution = context.Transforms.CalculateFeatureContribution(lastTransformer, normalize: false);

var featureContributionResults = featureContribution.Fit(transformedData).Transform(transformedData);

var topTenRows = context.Data.TakeRows(featureContributionResults, 10);

var scoringEnumerator = context.Data.CreateEnumerable<HousingData>(topTenRows, reuseRowObject: true);

Console.WriteLine("Row feature importance:");
Console.WriteLine("Households - Housing Median Age - Median Income - Ocean Proximity");
var globalResults = "";
foreach (var row in scoringEnumerator)
{
globalResults += $"{row.Households} - {row.HousingMedianAge} = {row.MedianIncome} - {row.OceanProximity}\n";
}

Console.WriteLine(globalResults);

Console.ReadLine();
}
}
}
8 changes: 7 additions & 1 deletion MLNetExamples/MLNetExamples.sln
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,9 @@ Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "DatabaseLoader", "DatabaseL
EndProject
Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "AzureFunction", "AzureFunction\AzureFunction.csproj", "{0E54A183-8D61-4BA6-A6EA-BF24D55661DC}"
EndProject
Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "FeatureImportance", "FeatureImportance\FeatureImportance.csproj", "{D8DF42E5-501B-458F-9289-813875217AD4}"
Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "FeatureImportance", "FeatureImportance\FeatureImportance.csproj", "{D8DF42E5-501B-458F-9289-813875217AD4}"
EndProject
Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "SelectAndShuffle", "SelectAndShuffle\SelectAndShuffle.csproj", "{414D7AD9-C9F9-4485-AC5B-15FB8BDF416B}"
EndProject
Global
GlobalSection(SolutionConfigurationPlatforms) = preSolution
Expand Down Expand Up @@ -93,6 +95,10 @@ Global
{D8DF42E5-501B-458F-9289-813875217AD4}.Debug|Any CPU.Build.0 = Debug|Any CPU
{D8DF42E5-501B-458F-9289-813875217AD4}.Release|Any CPU.ActiveCfg = Release|Any CPU
{D8DF42E5-501B-458F-9289-813875217AD4}.Release|Any CPU.Build.0 = Release|Any CPU
{414D7AD9-C9F9-4485-AC5B-15FB8BDF416B}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
{414D7AD9-C9F9-4485-AC5B-15FB8BDF416B}.Debug|Any CPU.Build.0 = Debug|Any CPU
{414D7AD9-C9F9-4485-AC5B-15FB8BDF416B}.Release|Any CPU.ActiveCfg = Release|Any CPU
{414D7AD9-C9F9-4485-AC5B-15FB8BDF416B}.Release|Any CPU.Build.0 = Release|Any CPU
EndGlobalSection
GlobalSection(SolutionProperties) = preSolution
HideSolutionNode = FALSE
Expand Down
37 changes: 37 additions & 0 deletions MLNetExamples/SelectAndShuffle/HousingData.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,37 @@
using Microsoft.ML.Data;

namespace SelectAndShuffle
{
public class HousingData
{
[LoadColumn(0)]
public float Longitude { get; set; }

[LoadColumn(1)]
public float Latitude { get; set; }

[LoadColumn(2)]
public float HousingMedianAge { get; set; }

[LoadColumn(3)]
public float TotalRooms { get; set; }

[LoadColumn(4)]
public float TotalBedrooms { get; set; }

[LoadColumn(5)]
public float Population { get; set; }

[LoadColumn(6)]
public float Households { get; set; }

[LoadColumn(7)]
public float MedianIncome { get; set; }

[LoadColumn(8), ColumnName("Label")]
public float MedianHouseValue { get; set; }

[LoadColumn(9)]
public string OceanProximity{ get; set; }
}
}
10 changes: 10 additions & 0 deletions MLNetExamples/SelectAndShuffle/HousingPrediction.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
using Microsoft.ML.Data;

namespace SelectAndShuffle
{
public class HousingPrediction
{
[ColumnName("Score")]
public float PredictedPrice { get; set; }
}
}
65 changes: 65 additions & 0 deletions MLNetExamples/SelectAndShuffle/Program.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,65 @@
using Microsoft.ML;
using System;

namespace SelectAndShuffle
{
class Program
{
static void Main(string[] args)
{
var context = new MLContext();

var data = context.Data.LoadFromTextFile<HousingData>("./housing.csv", hasHeader: true, separatorChar: ',');

// Select columns
var selectCols = context.Transforms.SelectColumns("HousingMedianAge", "TotalBedrooms");
var selectColsTransform = selectCols.Fit(data).Transform(data);

//DisplayColumns(selectColsTransform);

// Drop columns
var dropCols = context.Transforms.DropColumns("Latitude", "Longitude");
var dropColsTransform = dropCols.Fit(data).Transform(data);

//DisplayColumns(dropColsTransform);

// Shuffle rows
//DisplayColumns(data);

var shuffleRows = context.Data.ShuffleRows(data, seed: 42);

//DisplayColumns(shuffleRows);

// Take rows
var takeRows = context.Data.TakeRows(data, 2);

//DisplayColumns(takeRows);

// Filter rows
var filterRows = context.Data.FilterRowsByColumn(data, "Population", lowerBound: 0, upperBound: 1000);

DisplayColumns(filterRows);

Console.ReadLine();
}

private static void DisplayColumns(IDataView data)
{
var preview = data.Preview(maxRows: 5);

string previewData = "";

for (int i = 0; i < preview.RowView.Length; i++)
{
foreach (var item in preview.RowView[i].Values)
{
previewData += $"{item.Key}: {item.Value} ";
}

Console.WriteLine("----------------------------------");
Console.WriteLine(previewData);
previewData = "";
}
}
}
}
18 changes: 18 additions & 0 deletions MLNetExamples/SelectAndShuffle/SelectAndShuffle.csproj
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
<Project Sdk="Microsoft.NET.Sdk">

<PropertyGroup>
<OutputType>Exe</OutputType>
<TargetFramework>netcoreapp2.2</TargetFramework>
</PropertyGroup>

<ItemGroup>
<PackageReference Include="Microsoft.ML" Version="1.3.1" />
</ItemGroup>

<ItemGroup>
<None Update="housing.csv">
<CopyToOutputDirectory>PreserveNewest</CopyToOutputDirectory>
</None>
</ItemGroup>

</Project>
Loading

0 comments on commit c2d8f54

Please sign in to comment.