-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathSpace.m
122 lines (114 loc) · 4.02 KB
/
Space.m
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
classdef Space < handle
properties (Hidden)
Subsets % the same as clusters. [Array of Subset] (1 x N)
Clustering % Array of numbers, which line belongs to which cluster. (1 x n)
ClustersCount
ObjectsCount
Distances % Matrix of distances. (n x n)
Proximities % Matrix of proximities. (N x N)
Dispersion % In-cluster dispersies criterion J(K). [Number]
Proximity % External Dispersions criterion I(K). [Number]
Weight % Weight of Proximity. [Number]
end
methods
% Class initializer.
% @param Distances [Matrix] matrix of distances;
% @param ClustersCount [Integer] count of clusters;
% @param InitialClustering [Array] initial clustering. Default -- random.
%
function S = Space(Distances, ClustersCount, Weight, InitialClustering)
S.Distances = Distances;
S.Proximities = Distances2Proximities(S, Distances);
S.ClustersCount = ClustersCount;
S.ObjectsCount = size(Distances, 1);
S.Subsets = [];
if nargin < 3 % if Weight is absent
Weight = 0;
end
S.Weight = Weight;
if nargin < 4 % if InitialClustering is absent
InitialClustering = randi([1 ClustersCount], 1, S.ObjectsCount);
end
SetSubsets(S, InitialClustering);
end
function Result = SetSubsets(S, Clustering)
Result = [];
for i = 1:max(Clustering)
Numbers = find(Clustering == i);
Result = [Result, Subset(S.Distances, S.Proximities, Numbers)];
end
S.Clustering = Clustering;
S.Subsets = Result;
end
function p = Distances2Proximities(S, Distances)
N = size(Distances, 1);
p = zeros(N, N);
eta = sum(Distances(:) .^ 2) / (2 * N ^ 2);
Dist2Origin = @(x) (sum(Distances(:, x) .^ 2) / N) - eta;
for i = 1:N
for j = 1:N
p(i, j) = (Dist2Origin(i) + Dist2Origin(j) - Distances(i, j) ^ 2) / 2;
end
end
end
% Clusterization iteration.
% It seems to be sane to have only iteration here,
% because in data wrapper we could to some debug things between iterations.
%
function ClusterizeIteration(S)
for Number = 1:S.ObjectsCount
S.Dispersion = CalculateDispersion(S);
S.Proximity = CalculateProximity(S);
OldSubset = SubsetOf(S, Number);
OtherSubsets = S.Subsets(S.Subsets ~= OldSubset);
for NewSubset = OtherSubsets
Move(S, OldSubset, NewSubset, Number);
NewDispersion = CalculateDispersion(S);
NewProximity = CalculateProximity(S);
DispersionDiff = NewDispersion - S.Dispersion;
ProximityDiff = NewProximity - S.Proximity;
Diff = (- (1 - S.Weight) * DispersionDiff + S.Weight * ProximityDiff);
% Diff = (- DispersionDiff + S.Weight * ProximityDiff);
Result = Diff > 0;
if ~ Result
Move(S, NewSubset, OldSubset, Number); % Move back
else
S.Dispersion = NewDispersion;
S.Proximity = NewProximity;
end
end
end
end
function Result = SubsetOf(S, Number)
Result = S.Subsets(S.Clustering(Number));
end
function Move(S, OldSubset, NewSubset, Number)
S.Clustering(Number) = find(S.Subsets == NewSubset);
Move(OldSubset, NewSubset, Number);
end
% Average weighted dispersion of clusters.
% J(K) = 1 / N * (sum_{k = 1}^{K} N_k * \eta_k^2)
%
function result = CalculateDispersion(S)
Ns = zeros(1, S.ClustersCount);
etas = zeros(1, S.ClustersCount);
for i = 1:S.ClustersCount
Ns(i) = S.Subsets(i).Size;
etas(i) = Eta(S.Subsets(i));
end
result = sum(Ns .* etas) / S.ObjectsCount;
end
% Compactness of set of clusters.
% \delta(K) = 1 / K^2 * sum(sum(proximities))
%
function result = CalculateProximity(S)
result = 0;
for first = S.Subsets
for second = S.Subsets
result = result + Proximity(first, second);
end
end
result = result / S.ClustersCount ^ 2;
end
end % methods
end