forked from jorgecasas/php-ml
-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Linear Discrimant Analysis (LDA) (#82)
* Linear Discrimant Analysis (LDA) * LDA test file * Matrix inverse via LUDecomposition * LUDecomposition inverse() and det() applied * Readme update for LDA
- Loading branch information
1 parent
12b8b11
commit 5b373fa
Showing
9 changed files
with
735 additions
and
131 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,98 @@ | ||
<?php declare(strict_types=1); | ||
|
||
namespace Phpml\DimensionReduction; | ||
|
||
use Phpml\Math\LinearAlgebra\EigenvalueDecomposition; | ||
use Phpml\Math\Matrix; | ||
|
||
/** | ||
* Class to compute eigen pairs (values & vectors) of a given matrix | ||
* with the consideration of numFeatures or totalVariance to be preserved | ||
* | ||
* @author hp | ||
*/ | ||
abstract class EigenTransformerBase | ||
{ | ||
/** | ||
* Total variance to be conserved after the reduction | ||
* | ||
* @var float | ||
*/ | ||
public $totalVariance = 0.9; | ||
|
||
/** | ||
* Number of features to be preserved after the reduction | ||
* | ||
* @var int | ||
*/ | ||
public $numFeatures = null; | ||
|
||
/** | ||
* Top eigenvectors of the matrix | ||
* | ||
* @var array | ||
*/ | ||
protected $eigVectors = []; | ||
|
||
/** | ||
* Top eigenValues of the matrix | ||
* | ||
* @var type | ||
*/ | ||
protected $eigValues = []; | ||
|
||
/** | ||
* Calculates eigenValues and eigenVectors of the given matrix. Returns | ||
* top eigenVectors along with the largest eigenValues. The total explained variance | ||
* of these eigenVectors will be no less than desired $totalVariance value | ||
* | ||
* @param array $matrix | ||
*/ | ||
protected function eigenDecomposition(array $matrix) | ||
{ | ||
$eig = new EigenvalueDecomposition($matrix); | ||
$eigVals = $eig->getRealEigenvalues(); | ||
$eigVects= $eig->getEigenvectors(); | ||
|
||
$totalEigVal = array_sum($eigVals); | ||
// Sort eigenvalues in descending order | ||
arsort($eigVals); | ||
|
||
$explainedVar = 0.0; | ||
$vectors = []; | ||
$values = []; | ||
foreach ($eigVals as $i => $eigVal) { | ||
$explainedVar += $eigVal / $totalEigVal; | ||
$vectors[] = $eigVects[$i]; | ||
$values[] = $eigVal; | ||
|
||
if ($this->numFeatures !== null) { | ||
if (count($vectors) == $this->numFeatures) { | ||
break; | ||
} | ||
} else { | ||
if ($explainedVar >= $this->totalVariance) { | ||
break; | ||
} | ||
} | ||
} | ||
|
||
$this->eigValues = $values; | ||
$this->eigVectors = $vectors; | ||
} | ||
|
||
/** | ||
* Returns the reduced data | ||
* | ||
* @param array $data | ||
* | ||
* @return array | ||
*/ | ||
protected function reduce(array $data) | ||
{ | ||
$m1 = new Matrix($data); | ||
$m2 = new Matrix($this->eigVectors); | ||
|
||
return $m1->multiply($m2->transpose())->toArray(); | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,247 @@ | ||
<?php | ||
|
||
declare(strict_types=1); | ||
|
||
namespace Phpml\DimensionReduction; | ||
|
||
use Phpml\Math\Statistic\Mean; | ||
use Phpml\Math\Matrix; | ||
|
||
class LDA extends EigenTransformerBase | ||
{ | ||
/** | ||
* @var bool | ||
*/ | ||
public $fit = false; | ||
|
||
/** | ||
* @var array | ||
*/ | ||
public $labels; | ||
|
||
/** | ||
* @var array | ||
*/ | ||
public $means; | ||
|
||
/** | ||
* @var array | ||
*/ | ||
public $counts; | ||
|
||
/** | ||
* @var float | ||
*/ | ||
public $overallMean; | ||
|
||
/** | ||
* Linear Discriminant Analysis (LDA) is used to reduce the dimensionality | ||
* of the data. Unlike Principal Component Analysis (PCA), it is a supervised | ||
* technique that requires the class labels in order to fit the data to a | ||
* lower dimensional space. <br><br> | ||
* The algorithm can be initialized by speciyfing | ||
* either with the totalVariance(a value between 0.1 and 0.99) | ||
* or numFeatures (number of features in the dataset) to be preserved. | ||
* | ||
* @param float|null $totalVariance Total explained variance to be preserved | ||
* @param int|null $numFeatures Number of features to be preserved | ||
* | ||
* @throws \Exception | ||
*/ | ||
public function __construct($totalVariance = null, $numFeatures = null) | ||
{ | ||
if ($totalVariance !== null && ($totalVariance < 0.1 || $totalVariance > 0.99)) { | ||
throw new \Exception("Total variance can be a value between 0.1 and 0.99"); | ||
} | ||
if ($numFeatures !== null && $numFeatures <= 0) { | ||
throw new \Exception("Number of features to be preserved should be greater than 0"); | ||
} | ||
if ($totalVariance !== null && $numFeatures !== null) { | ||
throw new \Exception("Either totalVariance or numFeatures should be specified in order to run the algorithm"); | ||
} | ||
|
||
if ($numFeatures !== null) { | ||
$this->numFeatures = $numFeatures; | ||
} | ||
if ($totalVariance !== null) { | ||
$this->totalVariance = $totalVariance; | ||
} | ||
} | ||
|
||
/** | ||
* Trains the algorithm to transform the given data to a lower dimensional space. | ||
* | ||
* @param array $data | ||
* @param array $classes | ||
* | ||
* @return array | ||
*/ | ||
public function fit(array $data, array $classes) : array | ||
{ | ||
$this->labels = $this->getLabels($classes); | ||
$this->means = $this->calculateMeans($data, $classes); | ||
|
||
$sW = $this->calculateClassVar($data, $classes); | ||
$sB = $this->calculateClassCov(); | ||
|
||
$S = $sW->inverse()->multiply($sB); | ||
$this->eigenDecomposition($S->toArray()); | ||
|
||
$this->fit = true; | ||
|
||
return $this->reduce($data); | ||
} | ||
|
||
/** | ||
* Returns unique labels in the dataset | ||
* | ||
* @param array $classes | ||
* | ||
* @return array | ||
*/ | ||
protected function getLabels(array $classes): array | ||
{ | ||
$counts = array_count_values($classes); | ||
|
||
return array_keys($counts); | ||
} | ||
|
||
|
||
/** | ||
* Calculates mean of each column for each class and returns | ||
* n by m matrix where n is number of labels and m is number of columns | ||
* | ||
* @param type $data | ||
* @param type $classes | ||
* | ||
* @return array | ||
*/ | ||
protected function calculateMeans($data, $classes) : array | ||
{ | ||
$means = []; | ||
$counts= []; | ||
$overallMean = array_fill(0, count($data[0]), 0.0); | ||
|
||
foreach ($data as $index => $row) { | ||
$label = array_search($classes[$index], $this->labels); | ||
|
||
foreach ($row as $col => $val) { | ||
if (! isset($means[$label][$col])) { | ||
$means[$label][$col] = 0.0; | ||
} | ||
$means[$label][$col] += $val; | ||
$overallMean[$col] += $val; | ||
} | ||
|
||
if (! isset($counts[$label])) { | ||
$counts[$label] = 0; | ||
} | ||
$counts[$label]++; | ||
} | ||
|
||
foreach ($means as $index => $row) { | ||
foreach ($row as $col => $sum) { | ||
$means[$index][$col] = $sum / $counts[$index]; | ||
} | ||
} | ||
|
||
// Calculate overall mean of the dataset for each column | ||
$numElements = array_sum($counts); | ||
$map = function ($el) use ($numElements) { | ||
return $el / $numElements; | ||
}; | ||
$this->overallMean = array_map($map, $overallMean); | ||
$this->counts = $counts; | ||
|
||
return $means; | ||
} | ||
|
||
|
||
/** | ||
* Returns in-class scatter matrix for each class, which | ||
* is a n by m matrix where n is number of classes and | ||
* m is number of columns | ||
* | ||
* @param array $data | ||
* @param array $classes | ||
* | ||
* @return Matrix | ||
*/ | ||
protected function calculateClassVar($data, $classes) | ||
{ | ||
// s is an n (number of classes) by m (number of column) matrix | ||
$s = array_fill(0, count($data[0]), array_fill(0, count($data[0]), 0)); | ||
$sW = new Matrix($s, false); | ||
|
||
foreach ($data as $index => $row) { | ||
$label = array_search($classes[$index], $this->labels); | ||
$means = $this->means[$label]; | ||
|
||
$row = $this->calculateVar($row, $means); | ||
|
||
$sW = $sW->add($row); | ||
} | ||
|
||
return $sW; | ||
} | ||
|
||
/** | ||
* Returns between-class scatter matrix for each class, which | ||
* is an n by m matrix where n is number of classes and | ||
* m is number of columns | ||
* | ||
* @return Matrix | ||
*/ | ||
protected function calculateClassCov() | ||
{ | ||
// s is an n (number of classes) by m (number of column) matrix | ||
$s = array_fill(0, count($this->overallMean), array_fill(0, count($this->overallMean), 0)); | ||
$sB = new Matrix($s, false); | ||
|
||
foreach ($this->means as $index => $classMeans) { | ||
$row = $this->calculateVar($classMeans, $this->overallMean); | ||
$N = $this->counts[$index]; | ||
$sB = $sB->add($row->multiplyByScalar($N)); | ||
} | ||
|
||
return $sB; | ||
} | ||
|
||
/** | ||
* Returns the result of the calculation (x - m)T.(x - m) | ||
* | ||
* @param array $row | ||
* @param array $means | ||
* | ||
* @return Matrix | ||
*/ | ||
protected function calculateVar(array $row, array $means) | ||
{ | ||
$x = new Matrix($row, false); | ||
$m = new Matrix($means, false); | ||
$diff = $x->subtract($m); | ||
|
||
return $diff->transpose()->multiply($diff); | ||
} | ||
|
||
/** | ||
* Transforms the given sample to a lower dimensional vector by using | ||
* the eigenVectors obtained in the last run of <code>fit</code>. | ||
* | ||
* @param array $sample | ||
* | ||
* @return array | ||
*/ | ||
public function transform(array $sample) | ||
{ | ||
if (!$this->fit) { | ||
throw new \Exception("LDA has not been fitted with respect to original dataset, please run LDA::fit() first"); | ||
} | ||
|
||
if (! is_array($sample[0])) { | ||
$sample = [$sample]; | ||
} | ||
|
||
return $this->reduce($sample); | ||
} | ||
} |
Oops, something went wrong.