Skip to content

Commit

Permalink
refactor kmeans init methods
Browse files Browse the repository at this point in the history
  • Loading branch information
akondas committed May 2, 2016
1 parent 56114d9 commit 5c67cfa
Show file tree
Hide file tree
Showing 6 changed files with 60 additions and 34 deletions.
2 changes: 1 addition & 1 deletion src/Phpml/Clustering/KMeans/Point.php
Original file line number Diff line number Diff line change
Expand Up @@ -49,7 +49,7 @@ public function getDistanceWith(self $point, $precise = true)
$distance += $difference * $difference;
}

return $precise ? sqrt((float)$distance) : $distance;
return $precise ? sqrt((float) $distance) : $distance;
}

/**
Expand Down
82 changes: 54 additions & 28 deletions src/Phpml/Clustering/KMeans/Space.php
Original file line number Diff line number Diff line change
Expand Up @@ -150,37 +150,11 @@ protected function initializeClusters(int $clustersNumber, int $initMethod)
{
switch ($initMethod) {
case KMeans::INIT_RANDOM:
list($min, $max) = $this->getBoundaries();
for ($n = 0; $n < $clustersNumber; ++$n) {
$clusters[] = new Cluster($this, $this->getRandomPoint($min, $max)->getCoordinates());
}
$clusters = $this->initializeRandomClusters($clustersNumber);
break;

case KMeans::INIT_KMEANS_PLUS_PLUS:
$position = rand(1, count($this));
for ($i = 1, $this->rewind(); $i < $position && $this->valid(); $i++, $this->next());
$clusters[] = new Cluster($this, $this->current()->getCoordinates());

$distances = new SplObjectStorage();

for ($i = 1; $i < $clustersNumber; ++$i) {
$sum = 0;
foreach ($this as $point) {
$distance = $point->getDistanceWith($point->getClosest($clusters));
$sum += $distances[$point] = $distance;
}

$sum = rand(0, (int) $sum);
foreach ($this as $point) {
if (($sum -= $distances[$point]) > 0) {
continue;
}

$clusters[] = new Cluster($this, $point->getCoordinates());
break;
}
}

$clusters = $this->initializeKMPPClusters($clustersNumber);
break;
}
$clusters[0]->attachAll($this);
Expand Down Expand Up @@ -230,4 +204,56 @@ protected function iterate($clusters)

return $convergence;
}

/**
* @param int $clustersNumber
*
* @return array
*/
private function initializeRandomClusters(int $clustersNumber)
{
$clusters = [];
list($min, $max) = $this->getBoundaries();

for ($n = 0; $n < $clustersNumber; ++$n) {
$clusters[] = new Cluster($this, $this->getRandomPoint($min, $max)->getCoordinates());
}

return $clusters;
}

/**
* @param int $clustersNumber
*
* @return array
*/
protected function initializeKMPPClusters(int $clustersNumber)
{
$clusters = [];
$position = rand(1, count($this));
for ($i = 1, $this->rewind(); $i < $position && $this->valid(); $i++, $this->next());
$clusters[] = new Cluster($this, $this->current()->getCoordinates());

$distances = new SplObjectStorage();

for ($i = 1; $i < $clustersNumber; ++$i) {
$sum = 0;
foreach ($this as $point) {
$distance = $point->getDistanceWith($point->getClosest($clusters));
$sum += $distances[$point] = $distance;
}

$sum = rand(0, (int) $sum);
foreach ($this as $point) {
if (($sum -= $distances[$point]) > 0) {
continue;
}

$clusters[] = new Cluster($this, $point->getCoordinates());
break;
}
}

return $clusters;
}
}
2 changes: 1 addition & 1 deletion src/Phpml/Math/Distance/Euclidean.php
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,6 @@ public function distance(array $a, array $b): float
$distance += pow($a[$i] - $b[$i], 2);
}

return sqrt((float)$distance);
return sqrt((float) $distance);
}
}
4 changes: 2 additions & 2 deletions src/Phpml/Math/Matrix.php
Original file line number Diff line number Diff line change
Expand Up @@ -147,7 +147,7 @@ private function calculateDeterminant()
for ($j = 0; $j < $this->columns; ++$j) {
$subMatrix = $this->crossOut(0, $j);
$minor = $this->matrix[0][$j] * $subMatrix->getDeterminant();
$determinant += fmod((float)$j, 2.0) == 0 ? $minor : -$minor;
$determinant += fmod((float) $j, 2.0) == 0 ? $minor : -$minor;
}
}

Expand Down Expand Up @@ -236,7 +236,7 @@ public function inverse()
for ($i = 0; $i < $this->rows; ++$i) {
for ($j = 0; $j < $this->columns; ++$j) {
$minor = $this->crossOut($i, $j)->getDeterminant();
$newMatrix[$i][$j] = fmod((float)($i + $j), 2.0) == 0 ? $minor : -$minor;
$newMatrix[$i][$j] = fmod((float) ($i + $j), 2.0) == 0 ? $minor : -$minor;
}
}

Expand Down
2 changes: 1 addition & 1 deletion src/Phpml/Math/Statistic/Correlation.php
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,7 @@ public static function pearson(array $x, array $y)
$b2 = $b2 + pow($b, 2);
}

$corr = $axb / sqrt((float)($a2 * $b2));
$corr = $axb / sqrt((float) ($a2 * $b2));

return $corr;
}
Expand Down
2 changes: 1 addition & 1 deletion src/Phpml/Math/Statistic/StandardDeviation.php
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,6 @@ public static function population(array $a, $sample = true)
--$n;
}

return sqrt((float)($carry / $n));
return sqrt((float) ($carry / $n));
}
}

0 comments on commit 5c67cfa

Please sign in to comment.