diff --git a/src/FeatureSelection/ScoringFunction/UnivariateLinearRegression.php b/src/FeatureSelection/ScoringFunction/UnivariateLinearRegression.php new file mode 100644 index 00000000..9a819c52 --- /dev/null +++ b/src/FeatureSelection/ScoringFunction/UnivariateLinearRegression.php @@ -0,0 +1,81 @@ +center = $center; + } + + public function score(array $samples, array $targets): array + { + if ($this->center) { + $this->centerTargets($targets); + $this->centerSamples($samples); + } + + $correlations = []; + foreach ($samples[0] as $index => $feature) { + $featureColumn = array_column($samples, $index); + $correlations[$index] = + (Matrix::dot($targets, $featureColumn)[0] / (new Matrix($featureColumn, false))->transpose()->frobeniusNorm()) + / (new Matrix($targets, false))->frobeniusNorm(); + } + + $degreesOfFreedom = count($targets) - ($this->center ? 2 : 1); + + return array_map(function (float $correlation) use ($degreesOfFreedom): float { + return $correlation ** 2 / (1 - $correlation ** 2) * $degreesOfFreedom; + }, $correlations); + } + + private function centerTargets(&$targets): void + { + $mean = Mean::arithmetic($targets); + foreach ($targets as &$target) { + $target -= $mean; + } + } + + private function centerSamples(&$samples): void + { + $means = []; + foreach ($samples[0] as $index => $feature) { + $means[$index] = Mean::arithmetic(array_column($samples, $index)); + } + + foreach ($samples as &$sample) { + foreach ($sample as $index => &$feature) { + $feature -= $means[$index]; + } + } + } +} diff --git a/src/Math/Matrix.php b/src/Math/Matrix.php index 7c1ff3e2..e7bc92e3 100644 --- a/src/Math/Matrix.php +++ b/src/Math/Matrix.php @@ -236,6 +236,29 @@ public function isSingular(): bool return $this->getDeterminant() == 0; } + /** + * Frobenius norm (Hilbert–Schmidt norm, Euclidean norm) (‖A‖F) + * Square root of the sum of the square of all elements. + * + * https://en.wikipedia.org/wiki/Matrix_norm#Frobenius_norm + * + * _____________ + * /ᵐ ⁿ + * ‖A‖F = √ Σ Σ |aᵢⱼ|² + * ᵢ₌₁ ᵢ₌₁ + */ + public function frobeniusNorm(): float + { + $squareSum = 0; + for ($i = 0; $i < $this->rows; ++$i) { + for ($j = 0; $j < $this->columns; ++$j) { + $squareSum += ($this->matrix[$i][$j]) ** 2; + } + } + + return sqrt($squareSum); + } + /** * Returns the transpose of given array */ @@ -259,7 +282,7 @@ public static function dot(array $array1, array $array2): array /** * Element-wise addition or substraction depending on the given sign parameter */ - protected function _add(self $other, int $sign = 1): self + private function _add(self $other, int $sign = 1): self { $a1 = $this->toArray(); $a2 = $other->toArray(); @@ -277,7 +300,7 @@ protected function _add(self $other, int $sign = 1): self /** * Returns diagonal identity matrix of the same size of this matrix */ - protected function getIdentity(): self + private function getIdentity(): self { $array = array_fill(0, $this->rows, array_fill(0, $this->columns, 0)); for ($i = 0; $i < $this->rows; ++$i) { diff --git a/tests/FeatureSelection/ScoringFunction/UnivariateLinearRegressionTest.php b/tests/FeatureSelection/ScoringFunction/UnivariateLinearRegressionTest.php new file mode 100644 index 00000000..0047e5fe --- /dev/null +++ b/tests/FeatureSelection/ScoringFunction/UnivariateLinearRegressionTest.php @@ -0,0 +1,29 @@ +score($samples, $targets), '', 0.0001); + } + + public function testRegressionScoreWithoutCenter(): void + { + $samples = [[73676, 1996], [77006, 1998], [10565, 2000], [146088, 1995], [15000, 2001], [65940, 2000], [9300, 2000], [93739, 1996], [153260, 1994], [17764, 2002], [57000, 1998], [15000, 2000]]; + $targets = [2000, 2750, 15500, 960, 4400, 8800, 7100, 2550, 1025, 5900, 4600, 4400]; + + $function = new UnivariateLinearRegression(false); + self::assertEquals([1.74450, 18.08347], $function->score($samples, $targets), '', 0.0001); + } +} diff --git a/tests/FeatureSelection/SelectKBestTest.php b/tests/FeatureSelection/SelectKBestTest.php index a0355608..df17c08b 100644 --- a/tests/FeatureSelection/SelectKBestTest.php +++ b/tests/FeatureSelection/SelectKBestTest.php @@ -8,6 +8,7 @@ use Phpml\Exception\InvalidArgumentException; use Phpml\Exception\InvalidOperationException; use Phpml\FeatureSelection\ScoringFunction\ANOVAFValue; +use Phpml\FeatureSelection\ScoringFunction\UnivariateLinearRegression; use Phpml\FeatureSelection\SelectKBest; use PHPUnit\Framework\TestCase; @@ -45,6 +46,21 @@ public function testSelectKBestWithIrisDataset(): void self::assertEquals(2, count($samples[0])); } + public function testSelectKBestWithRegressionScoring(): void + { + $samples = [[73676, 1996, 2], [77006, 1998, 5], [10565, 2000, 4], [146088, 1995, 2], [15000, 2001, 2], [65940, 2000, 2], [9300, 2000, 2], [93739, 1996, 2], [153260, 1994, 2], [17764, 2002, 2], [57000, 1998, 2], [15000, 2000, 2]]; + $targets = [2000, 2750, 15500, 960, 4400, 8800, 7100, 2550, 1025, 5900, 4600, 4400]; + + $selector = new SelectKBest(new UnivariateLinearRegression(), 2); + $selector->fit($samples, $targets); + $selector->transform($samples); + + self::assertEquals( + [[73676, 1996], [77006, 1998], [10565, 2000], [146088, 1995], [15000, 2001], [65940, 2000], [9300, 2000], [93739, 1996], [153260, 1994], [17764, 2002], [57000, 1998], [15000, 2000]], + $samples + ); + } + public function testThrowExceptionOnEmptyTargets(): void { $this->expectException(InvalidArgumentException::class); diff --git a/tests/Math/MatrixTest.php b/tests/Math/MatrixTest.php index da535bfe..50cabacb 100644 --- a/tests/Math/MatrixTest.php +++ b/tests/Math/MatrixTest.php @@ -251,4 +251,55 @@ public function testDot(): void $dot = [6, 12]; $this->assertEquals($dot, Matrix::dot($matrix2, $matrix1)); } + + /** + * @dataProvider dataProviderForFrobeniusNorm + */ + public function testFrobeniusNorm(array $matrix, float $norm): void + { + $matrix = new Matrix($matrix); + + $this->assertEquals($norm, $matrix->frobeniusNorm(), '', 0.0001); + } + + public function dataProviderForFrobeniusNorm() + { + return [ + [ + [ + [1, -7], + [2, 3], + ], 7.93725, + ], + [ + [ + [1, 2, 3], + [2, 3, 4], + [3, 4, 5], + ], 9.643651, + ], + [ + [ + [1, 5, 3, 9], + [2, 3, 4, 12], + [4, 2, 5, 11], + ], 21.330729, + ], + [ + [ + [1, 5, 3], + [2, 3, 4], + [4, 2, 5], + [6, 6, 3], + ], 13.784049, + ], + [ + [ + [5, -4, 2], + [-1, 2, 3], + [-2, 1, 0], + ], 8, + ], + ]; + } }