Skip to content

Commit

Permalink
allow to use custom similarity function
Browse files Browse the repository at this point in the history
  • Loading branch information
ayrat555 committed May 27, 2017
1 parent d1090cf commit e9d51fc
Show file tree
Hide file tree
Showing 4 changed files with 90 additions and 11 deletions.
15 changes: 12 additions & 3 deletions lib/rock.ex
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
defmodule Rock do
alias Rock.Utils
alias Rock.Algorithm
@moduledoc """
Documentation for Rock.
ROCK: A Robust Clustering Algorithm for Categorical Attributes
"""

@doc """
Expand All @@ -12,7 +14,14 @@ defmodule Rock do
:world
"""
def hello do
:world

def clusterize(points, number_of_clusters, theta, similarity_function \\ nil)
when is_list(points)
when is_number(number_of_clusters)
when is_number(theta) do
points
|> Utils.internalize_points
|> Algorithm.clusterize(number_of_clusters, theta, similarity_function)
|> Utils.externalize_clusters
end
end
18 changes: 14 additions & 4 deletions lib/rock/algorithm.ex
Original file line number Diff line number Diff line change
Expand Up @@ -2,11 +2,15 @@ defmodule Rock.Algorithm do
alias Rock.Struct.Cluster
alias Rock.NeighbourCriterion
alias Rock.Links
alias Rock.ClusterMergeCriterion
alias Rock.Heaps

def clusterize(points, number_of_clusters, theta) when is_list(points) do
neighbour_criterion = theta |> NeighbourCriterion.new
def clusterize(points, number_of_clusters, theta, similarity_function \\ nil) when is_list(points) do
neighbour_criterion = if is_nil(similarity_function) do
theta |> NeighbourCriterion.new
else
theta |> NeighbourCriterion.new(similarity_function)
end

link_matrix = points |> Links.matrix(neighbour_criterion)
initial_clusters = points |> initialize_clusters
current_number_of_clusters = points |> Enum.count
Expand All @@ -30,6 +34,12 @@ defmodule Rock.Algorithm do
end)
end

defp optimize_clusters(_, _, _, necessary_number, current_number)
when necessary_number > current_number do

raise ArgumentError, message: "Needed number of clusters must be smaller than number of points"
end

defp optimize_clusters(_, clusters, _, necessary_number, current_number)
when necessary_number == current_number do
clusters
Expand All @@ -39,7 +49,7 @@ defmodule Rock.Algorithm do
global_heap = local_heaps |> Heaps.global_heap
{_, _, v_uuid, u_uuid} = global_heap |> Enum.at(0)
v_cluster = clusters |> find_cluster(v_uuid)
u_cluster = clusters |> find_cluster(u_uuid)
u_cluster = clusters |> find_cluster(u_uuid)

{new_local_heap, new_cluster} =
local_heaps
Expand Down
3 changes: 2 additions & 1 deletion mix.exs
Original file line number Diff line number Diff line change
Expand Up @@ -20,10 +20,11 @@ defmodule Rock.Mixfile do
[
{:credo, "~> 0.7", only: [:dev, :test]},
{:uuid, "~> 1.1"},
{:apex, "~>1.0.0", only: [:dev, :test]}
{:apex, "~> 1.0.0", only: [:dev, :test]}
]
end

defp elixirc_paths(:test), do: ["lib", "test/support"]
defp elixirc_paths(_), do: ["lib"]
end

65 changes: 62 additions & 3 deletions test/rock_test.exs
Original file line number Diff line number Diff line change
@@ -1,8 +1,67 @@
defmodule RockTest do
use ExUnit.Case
doctest Rock

test "the truth" do
assert 1 + 1 == 2
alias Rock.Struct.Point

@points [
{"point1", ["1", "2", "3"]},
{"point2", ["1", "2", "4"]},
{"point3", ["1", "2", "5"]},
{"point4", ["1", "3", "4"]},
{"point5", ["1", "3", "5"]},
{"point6", ["1", "4", "5"]},
{"point7", ["2", "3", "4"]},
{"point8", ["2", "3", "5"]},
{"point9", ["2", "4", "5"]},
{"point10", ["3", "4", "5"]},
{"point11", ["1", "2", "6"]},
{"point12", ["1", "2", "7"]},
{"point13", ["1", "6", "7"]},
{"point14", ["2", "6", "7"]}
]

test "clusterizes points" do
theta = 0.15
number_of_clusters = 2

result = @points |> Rock.clusterize(number_of_clusters, theta)

[
[
["1", "2", "6"],
["1", "2", "7"],
["1", "3", "5"],
["1", "4", "5"],
["1", "2", "5"],
["1", "3", "4"],
["1", "2", "3"],
["1", "2", "4"],
["2", "3", "4"],
["2", "3", "5"],
["2", "4", "5"],
["3", "4", "5"]
],
[
["1", "6", "7"],
["2", "6", "7"]
]
] = result
end

test "clusterizes points with custom similarity function" do
theta = 0.5
number_of_clusters = 2
similarity_function = fn(
%Point{attributes: attributes1},
%Point{attributes: attributes2}) ->
count1 = Enum.count(attributes1)
count2 = Enum.count(attributes2)

if count1 >= count2, do: count2 / count1, else: count1 / count2
end

result = @points |> Rock.clusterize(number_of_clusters, theta, similarity_function)

^number_of_clusters = result |> Enum.count
end
end

0 comments on commit e9d51fc

Please sign in to comment.