Skip to content

Commit

Permalink
complete DBSCAN in unit test.
Browse files Browse the repository at this point in the history
  • Loading branch information
xin-pu committed Jun 16, 2022
1 parent 90c97c1 commit 243a024
Show file tree
Hide file tree
Showing 4 changed files with 83 additions and 50 deletions.
1 change: 1 addition & 0 deletions MLSharp.sln.DotSettings
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
<wpf:ResourceDictionary xml:space="preserve" xmlns:x="http://schemas.microsoft.com/winfx/2006/xaml" xmlns:s="clr-namespace:System;assembly=mscorlib" xmlns:ss="urn:shemas-jetbrains-com:settings-storage-xaml" xmlns:wpf="http://schemas.microsoft.com/winfx/2006/xaml/presentation">
<s:Boolean x:Key="/Default/UserDictionary/Words/=Dataset/@EntryIndexedValue">True</s:Boolean>
<s:Boolean x:Key="/Default/UserDictionary/Words/=DBSCAN/@EntryIndexedValue">True</s:Boolean>
<s:Boolean x:Key="/Default/UserDictionary/Words/=Logogram/@EntryIndexedValue">True</s:Boolean>
<s:Boolean x:Key="/Default/UserDictionary/Words/=Nesterov/@EntryIndexedValue">True</s:Boolean>
<s:Boolean x:Key="/Default/UserDictionary/Words/=pred/@EntryIndexedValue">True</s:Boolean>
Expand Down
109 changes: 59 additions & 50 deletions src/ML.Core.Test/KMeansTest.cs
Original file line number Diff line number Diff line change
@@ -1,10 +1,11 @@
using System.IO;
using System.Collections.Generic;
using System.IO;
using System.Linq;
using MathNet.Numerics.Random;
using ML.Core.Data.DataStructs;
using ML.Core.Data.Loader;
using ML.Core.Models;
using ML.Core.Transform;
using Numpy;
using Numpy.Models;
using Xunit;
using Xunit.Abstractions;

Expand Down Expand Up @@ -59,62 +60,70 @@ public void TestSpectral()
}

[Fact]
public void TestSpectral2()
public void TestDe()
{
var path = Path.Combine(dataFolder, "data_cluster.txt");
var data = TextLoader.LoadDataSet<LinearData>(path, new[] {','}, false);
var input = data.ToFeatureNDarray();

var (W, D) = getAdjacentMatrix(input);
print(W);
print(D);
print(np.eye(4));
print(np.eye(input.shape[0]) - np.linalg.inv(D) * W);

var Lrw = np.dot(np.linalg.inv(D), W);
print(Lrw);


var (Lamda, V) = np.linalg.eig(Lrw);
print(Lamda);
print(V);
}


/// <summary>
/// epsilon
/// </summary>
/// <param name="input"></param>
/// <param name="esplion"></param>
private (NDarray, NDarray) getAdjacentEpsilon(NDarray input, double epsilon)
{
var batch = input.shape[0];
var features = input.shape[1];
var inputH = np.reshape(np.tile(input, np.array(batch)), new Shape(batch, batch, features));

var inputV = np.reshape(np.tile(input, np.array(batch, 1)), new Shape(batch, batch, features));

var dis = np.linalg.norm(inputV - inputH, axis: -1, ord: 2);

var W = np.where(dis < epsilon, np.array(epsilon), np.array(0));
//np.fill_diagonal(W, 0);

var D = np.eye(batch) * np.sum(W, 0);

return (W, D);
var e = 1.5;
var minPoints = 40;
var ou = new List<NDarray>();
var all = new List<NDarray>();

/// 计算核心对象
foreach (var i in Enumerable.Range(0, batch))
{
var x = input[i];
all.Add(x);
var ner = getDirectly(x, input, e);
if (ner.Length > minPoints)
ou.Add(x);
}


while (ou.Count > 0)
{
var allTemp = new List<NDarray>(all);

///随机选取一个核心对象O;
var o = ou[SystemRandomSource.Default.Next(0, ou.Count)];
all.Remove(o);
var Q = new Queue<NDarray>();
Q.Enqueue(o);


while (Q.Count > 0)
{
var q = Q.Dequeue();
var n = getDirectly(q, input, e);
if (n.Length > minPoints)
{
var delta = n.Where(arr => all.Contains(arr) && !Equals(arr, q)).ToArray();
delta.ToList().ForEach(d =>
{
Q.Enqueue(d);
all.Remove(d);
});
}
}

var a = allTemp.Where(arr => !all.Contains(arr)).ToList();
print(np.vstack(a.ToArray()));

ou.RemoveAll(arr => a.Contains(arr));
}
}


/// <summary>
/// 全连接法计算邻接矩阵
/// </summary>
/// <param name="input"></param>
/// <returns></returns>
internal (NDarray, NDarray) getAdjacentMatrix(NDarray input)
private NDarray[] getDirectly(NDarray x, NDarray input, double e)
{
var W = new Gaussian().Call(input);
var D = np.eye(input.shape[0]) * np.sum(W, 0);
return (W, D);
var dis = np.linalg.norm(input - x, axis: -1, ord: 2).GetData<double>();
return dis
.Select((d, i) => (d, i))
.Where(p => p.d < e && p.d != 0)
.Select(p => input[p.i])
.ToArray();
}
}
}
1 change: 1 addition & 0 deletions src/ML.Core/ML.Core.csproj
Original file line number Diff line number Diff line change
Expand Up @@ -182,6 +182,7 @@
<Compile Include="Metrics\Regression\MeanSquaredLogarithmicError.cs" />
<Compile Include="Metrics\Regression\RSquared.cs" />
<Compile Include="Models\Cluster\Cluster.cs" />
<Compile Include="Models\Cluster\DBSCAN.cs" />
<Compile Include="Models\Cluster\KMeans.cs" />
<Compile Include="Models\Cluster\Spectral.cs" />
<Compile Include="Models\Supervised\BinaryLogicClassify.cs" />
Expand Down
22 changes: 22 additions & 0 deletions src/ML.Core/Models/Cluster/DBSCAN.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
using System;
using Numpy;

namespace ML.Core.Models
{
public class DBSCAN : Cluster
{
/// <summary>
/// 密度聚类 DBSCAN
/// </summary>
/// <param name="k"></param>
public DBSCAN(int k)
: base(k)
{
}

public override NDarray Call(NDarray input)
{
throw new NotImplementedException();
}
}
}

0 comments on commit 243a024

Please sign in to comment.