Skip to content

Commit

Permalink
add clustering algorithm
Browse files Browse the repository at this point in the history
  • Loading branch information
ayrat555 committed May 27, 2017
1 parent 2166f29 commit d1090cf
Show file tree
Hide file tree
Showing 8 changed files with 194 additions and 8 deletions.
45 changes: 43 additions & 2 deletions lib/rock/algorithm.ex
Original file line number Diff line number Diff line change
Expand Up @@ -9,10 +9,16 @@ defmodule Rock.Algorithm do
neighbour_criterion = theta |> NeighbourCriterion.new
link_matrix = points |> Links.matrix(neighbour_criterion)
initial_clusters = points |> initialize_clusters
current_number_of_clusters = points |> Enum.count
local_heaps = initial_clusters |> Heaps.initialize(link_matrix, theta)

local_heaps
|> optimize_clusters(initial_clusters, number_of_clusters, theta)
|> optimize_clusters(
initial_clusters,
theta,
number_of_clusters,
current_number_of_clusters
)
end

defp initialize_clusters(points) do
Expand All @@ -24,6 +30,41 @@ defmodule Rock.Algorithm do
end)
end

defp optimize_clusters(local_heaps, clusters, number_of_clusters, theta) do
defp optimize_clusters(_, clusters, _, necessary_number, current_number)
when necessary_number == current_number do
clusters
end

defp optimize_clusters(local_heaps, clusters, theta, necessary_number, current_number) do
global_heap = local_heaps |> Heaps.global_heap
{_, _, v_uuid, u_uuid} = global_heap |> Enum.at(0)
v_cluster = clusters |> find_cluster(v_uuid)
u_cluster = clusters |> find_cluster(u_uuid)

{new_local_heap, new_cluster} =
local_heaps
|> Heaps.update(v_cluster, u_cluster, theta)

new_clusters =
clusters
|> List.delete(v_cluster)
|> List.delete(u_cluster)

new_clusters = [new_cluster | new_clusters]

optimize_clusters(
new_local_heap,
new_clusters,
theta,
necessary_number,
current_number - 1
)
end

defp find_cluster(clusters, uuid) do
clusters
|> Enum.find(fn(%Cluster{uuid: cluster_uuid}) ->
uuid == cluster_uuid
end)
end
end
6 changes: 3 additions & 3 deletions lib/rock/heaps.ex
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,7 @@ defmodule Rock.Heaps do
heap =
heap
|> Heap.remove_item(v_uuid)
|> Heap.remove_item(u_item)
|> Heap.remove_item(u_uuid)

if cross_link_count == 0 do
heap
Expand All @@ -55,10 +55,10 @@ defmodule Rock.Heaps do
|> remove_heap(v_uuid)
|> remove_heap(u_uuid)

#need optimization, move to to heaps update ^
#need optimization, move to heaps update ^
w_heap = new_heaps |> construct_w_heap(w_cluster)

{[w_heap | new_heaps], w_heap}
{[w_heap | new_heaps], w_cluster}
end

def global_heap(heaps) do
Expand Down
3 changes: 2 additions & 1 deletion lib/rock/struct/heap.ex
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@ defmodule Rock.Struct.Heap do
if uuid |> exists_in_items?(items),
do: raise ArgumentError, message: "cluster is already member of the heap"

new_item = heap_cluster |> calculate_item(cluster, theta, cross_link_count)
new_item = heap_cluster |> calculate_item(cluster, cross_link_count, theta)
new_items = [new_item | items] |> sort

%Heap{cluster: heap_cluster, items: new_items}
Expand Down Expand Up @@ -92,6 +92,7 @@ defmodule Rock.Struct.Heap do
other_cluster = %Cluster{uuid: uuid},
cross_link_count,
theta) when is_number(cross_link_count) do

measure =
ClusterMergeCriterion.measure(
cluster,
Expand Down
4 changes: 4 additions & 0 deletions lib/rock/struct/point.ex
Original file line number Diff line number Diff line change
Expand Up @@ -32,4 +32,8 @@ defmodule Rock.Struct.Point do
def attribute_size(%Point{attributes: attributes}) do
attributes |> Enum.count
end

def to_list(%Point{attributes: attributes}) do
attributes |> MapSet.to_list
end
end
8 changes: 8 additions & 0 deletions lib/rock/utils.ex
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
defmodule Rock.Utils do
alias Rock.Struct.Point
alias Rock.Struct.Cluster

def internalize_points(points) when is_list(points) do
points
Expand All @@ -8,4 +9,11 @@ defmodule Rock.Utils do
Point.new(name, attributes, index)
end)
end

def externalize_clusters(clusters) when is_list(clusters) do
clusters
|> Enum.map(fn(%Cluster{points: points}) ->
points |> Enum.map(&Point.to_list/1)
end)
end
end
3 changes: 2 additions & 1 deletion mix.exs
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,8 @@ defmodule Rock.Mixfile do
defp deps do
[
{:credo, "~> 0.7", only: [:dev, :test]},
{:uuid, "~> 1.1"}
{:uuid, "~> 1.1"},
{:apex, "~>1.0.0", only: [:dev, :test]}
]
end

Expand Down
3 changes: 2 additions & 1 deletion mix.lock
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
%{"bunt": {:hex, :bunt, "0.2.0", "951c6e801e8b1d2cbe58ebbd3e616a869061ddadcc4863d0a2182541acae9a38", [:mix], [], "hexpm"},
%{"apex": {:hex, :apex, "1.0.0", "abf230314d35ca4c48a902f693247f190ad42fc14862b9c4f7dbb7077b21c20a", [:mix], [], "hexpm"},
"bunt": {:hex, :bunt, "0.2.0", "951c6e801e8b1d2cbe58ebbd3e616a869061ddadcc4863d0a2182541acae9a38", [:mix], [], "hexpm"},
"credo": {:hex, :credo, "0.7.3", "9827ab04002186af1aec014a811839a06f72aaae6cd5eed3919b248c8767dbf3", [:mix], [{:bunt, "~> 0.2.0", [hex: :bunt, repo: "hexpm", optional: false]}], "hexpm"},
"uuid": {:hex, :uuid, "1.1.7", "007afd58273bc0bc7f849c3bdc763e2f8124e83b957e515368c498b641f7ab69", [:mix], [], "hexpm"}}
130 changes: 130 additions & 0 deletions test/rock/algorithm_test.exs
Original file line number Diff line number Diff line change
@@ -0,0 +1,130 @@
defmodule Rock.AlgorithmTest do
use ExUnit.Case

alias Rock.Algorithm
alias Rock.Utils

@points [
{"point1", ["1", "2", "3"]},
{"point2", ["1", "2", "4"]},
{"point3", ["1", "2", "5"]},
{"point4", ["1", "3", "4"]},
{"point5", ["1", "3", "5"]},
{"point6", ["1", "4", "5"]},
{"point7", ["2", "3", "4"]},
{"point8", ["2", "3", "5"]},
{"point9", ["2", "4", "5"]},
{"point10", ["3", "4", "5"]},
{"point11", ["1", "2", "6"]},
{"point12", ["1", "2", "7"]},
{"point13", ["1", "6", "7"]},
{"point14", ["2", "6", "7"]}
] |> Utils.internalize_points
@number_of_clusters 5


test "clusterizes points with theta = 0.1" do
theta = 0.1

result = @points
|> Algorithm.clusterize(@number_of_clusters, theta)
|> Utils.externalize_clusters

[
[
["1", "3", "5"],
["1", "4", "5"],
["3", "4", "5"]
],
[
["1", "2", "6"],
["1", "2", "7"],
["1", "6", "7"]
],
[
["1", "2", "5"],
["1", "3", "4"],
["1", "2", "3"],
["1", "2", "4"]
],
[
["2", "3", "4"],
["2", "3", "5"],
["2", "4", "5"]
],
[
["2", "6", "7"]
]
] = result
end

test "clusterizes points with theta = 0.2" do
theta = 0.2

result = @points
|> Algorithm.clusterize(@number_of_clusters, theta)
|> Utils.externalize_clusters

[
[
["1", "2", "5"],
["1", "3", "4"],
["1", "2", "3"],
["1", "2", "4"],
["2", "3", "4"],
["2", "3", "5"],
["2", "4", "5"]
],
[
["1", "2", "6"],
["1", "2", "7"],
["1", "3", "5"],
["1", "4", "5"]
],
[
["3", "4", "5"]
],
[
["1", "6", "7"]
],
[
["2", "6", "7"]
]
] = result
end

test "clusterizes points with theta = 0.3" do
theta = 0.3

result = @points
|> Algorithm.clusterize(@number_of_clusters, theta)
|> Utils.externalize_clusters

[
[
["2", "3", "4"],
["2", "3", "5"],
["3", "4", "5"],
["1", "4", "5"],
["2", "4", "5"]
],
[
["1", "2", "3"],
["1", "2", "4"],
["1", "2", "5"],
["1", "2", "6"],
["1", "2", "7"]
],
[
["1", "3", "4"],
["1", "3", "5"]
],
[
["1", "6", "7"]
],
[
["2", "6", "7"]
]
] = result
end
end

0 comments on commit d1090cf

Please sign in to comment.