diff --git a/lib/rock.ex b/lib/rock.ex index 4e033fc..320ed59 100644 --- a/lib/rock.ex +++ b/lib/rock.ex @@ -1,6 +1,8 @@ defmodule Rock do + alias Rock.Utils + alias Rock.Algorithm @moduledoc """ - Documentation for Rock. + ROCK: A Robust Clustering Algorithm for Categorical Attributes """ @doc """ @@ -12,7 +14,14 @@ defmodule Rock do :world """ - def hello do - :world + + def clusterize(points, number_of_clusters, theta, similarity_function \\ nil) + when is_list(points) + when is_number(number_of_clusters) + when is_number(theta) do + points + |> Utils.internalize_points + |> Algorithm.clusterize(number_of_clusters, theta, similarity_function) + |> Utils.externalize_clusters end end diff --git a/lib/rock/algorithm.ex b/lib/rock/algorithm.ex index b2ee185..a3e5692 100644 --- a/lib/rock/algorithm.ex +++ b/lib/rock/algorithm.ex @@ -2,11 +2,15 @@ defmodule Rock.Algorithm do alias Rock.Struct.Cluster alias Rock.NeighbourCriterion alias Rock.Links - alias Rock.ClusterMergeCriterion alias Rock.Heaps - def clusterize(points, number_of_clusters, theta) when is_list(points) do - neighbour_criterion = theta |> NeighbourCriterion.new + def clusterize(points, number_of_clusters, theta, similarity_function \\ nil) when is_list(points) do + neighbour_criterion = if is_nil(similarity_function) do + theta |> NeighbourCriterion.new + else + theta |> NeighbourCriterion.new(similarity_function) + end + link_matrix = points |> Links.matrix(neighbour_criterion) initial_clusters = points |> initialize_clusters current_number_of_clusters = points |> Enum.count @@ -30,6 +34,12 @@ defmodule Rock.Algorithm do end) end + defp optimize_clusters(_, _, _, necessary_number, current_number) + when necessary_number > current_number do + + raise ArgumentError, message: "Needed number of clusters must be smaller than number of points" + end + defp optimize_clusters(_, clusters, _, necessary_number, current_number) when necessary_number == current_number do clusters @@ -39,7 +49,7 @@ defmodule Rock.Algorithm do global_heap = local_heaps |> Heaps.global_heap {_, _, v_uuid, u_uuid} = global_heap |> Enum.at(0) v_cluster = clusters |> find_cluster(v_uuid) - u_cluster = clusters |> find_cluster(u_uuid) + u_cluster = clusters |> find_cluster(u_uuid) {new_local_heap, new_cluster} = local_heaps diff --git a/mix.exs b/mix.exs index 6ec095c..1aadb6b 100644 --- a/mix.exs +++ b/mix.exs @@ -20,10 +20,11 @@ defmodule Rock.Mixfile do [ {:credo, "~> 0.7", only: [:dev, :test]}, {:uuid, "~> 1.1"}, - {:apex, "~>1.0.0", only: [:dev, :test]} + {:apex, "~> 1.0.0", only: [:dev, :test]} ] end defp elixirc_paths(:test), do: ["lib", "test/support"] defp elixirc_paths(_), do: ["lib"] end + diff --git a/test/rock_test.exs b/test/rock_test.exs index d379348..03ba383 100644 --- a/test/rock_test.exs +++ b/test/rock_test.exs @@ -1,8 +1,67 @@ defmodule RockTest do use ExUnit.Case - doctest Rock - test "the truth" do - assert 1 + 1 == 2 + alias Rock.Struct.Point + + @points [ + {"point1", ["1", "2", "3"]}, + {"point2", ["1", "2", "4"]}, + {"point3", ["1", "2", "5"]}, + {"point4", ["1", "3", "4"]}, + {"point5", ["1", "3", "5"]}, + {"point6", ["1", "4", "5"]}, + {"point7", ["2", "3", "4"]}, + {"point8", ["2", "3", "5"]}, + {"point9", ["2", "4", "5"]}, + {"point10", ["3", "4", "5"]}, + {"point11", ["1", "2", "6"]}, + {"point12", ["1", "2", "7"]}, + {"point13", ["1", "6", "7"]}, + {"point14", ["2", "6", "7"]} + ] + + test "clusterizes points" do + theta = 0.15 + number_of_clusters = 2 + + result = @points |> Rock.clusterize(number_of_clusters, theta) + + [ + [ + ["1", "2", "6"], + ["1", "2", "7"], + ["1", "3", "5"], + ["1", "4", "5"], + ["1", "2", "5"], + ["1", "3", "4"], + ["1", "2", "3"], + ["1", "2", "4"], + ["2", "3", "4"], + ["2", "3", "5"], + ["2", "4", "5"], + ["3", "4", "5"] + ], + [ + ["1", "6", "7"], + ["2", "6", "7"] + ] + ] = result + end + + test "clusterizes points with custom similarity function" do + theta = 0.5 + number_of_clusters = 2 + similarity_function = fn( + %Point{attributes: attributes1}, + %Point{attributes: attributes2}) -> + count1 = Enum.count(attributes1) + count2 = Enum.count(attributes2) + + if count1 >= count2, do: count2 / count1, else: count1 / count2 + end + + result = @points |> Rock.clusterize(number_of_clusters, theta, similarity_function) + + ^number_of_clusters = result |> Enum.count end end