Skip to content

Commit

Permalink
Implement first regression scoring function UnivariateLinearRegression
Browse files Browse the repository at this point in the history
  • Loading branch information
akondas committed Feb 14, 2018
1 parent fbf84ca commit 9e5b3a0
Show file tree
Hide file tree
Showing 5 changed files with 202 additions and 2 deletions.
Original file line number Diff line number Diff line change
@@ -0,0 +1,81 @@
<?php

declare(strict_types=1);

namespace Phpml\FeatureSelection\ScoringFunction;

use Phpml\FeatureSelection\ScoringFunction;
use Phpml\Math\Matrix;
use Phpml\Math\Statistic\Mean;

/**
* Quick linear model for testing the effect of a single regressor,
* sequentially for many regressors.
*
* This is done in 2 steps:
*
* 1. The cross correlation between each regressor and the target is computed,
* that is, ((X[:, i] - mean(X[:, i])) * (y - mean_y)) / (std(X[:, i]) *std(y)).
* 2. It is converted to an F score then to a p-value.
*
* Ported from scikit-learn f_regression function (http://scikit-learn.org/stable/modules/generated/sklearn.feature_selection.f_regression.html#sklearn.feature_selection.f_regression)
*/
final class UnivariateLinearRegression implements ScoringFunction
{
/**
* @var bool
*/
private $center;

/**
* @param bool $center - if true samples and targets will be centered
*/
public function __construct(bool $center = true)
{
$this->center = $center;
}

public function score(array $samples, array $targets): array
{
if ($this->center) {
$this->centerTargets($targets);
$this->centerSamples($samples);
}

$correlations = [];
foreach ($samples[0] as $index => $feature) {
$featureColumn = array_column($samples, $index);
$correlations[$index] =
(Matrix::dot($targets, $featureColumn)[0] / (new Matrix($featureColumn, false))->transpose()->frobeniusNorm())
/ (new Matrix($targets, false))->frobeniusNorm();
}

$degreesOfFreedom = count($targets) - ($this->center ? 2 : 1);

return array_map(function (float $correlation) use ($degreesOfFreedom): float {
return $correlation ** 2 / (1 - $correlation ** 2) * $degreesOfFreedom;
}, $correlations);
}

private function centerTargets(&$targets): void
{
$mean = Mean::arithmetic($targets);
foreach ($targets as &$target) {
$target -= $mean;
}
}

private function centerSamples(&$samples): void
{
$means = [];
foreach ($samples[0] as $index => $feature) {
$means[$index] = Mean::arithmetic(array_column($samples, $index));
}

foreach ($samples as &$sample) {
foreach ($sample as $index => &$feature) {
$feature -= $means[$index];
}
}
}
}
27 changes: 25 additions & 2 deletions src/Math/Matrix.php
Original file line number Diff line number Diff line change
Expand Up @@ -236,6 +236,29 @@ public function isSingular(): bool
return $this->getDeterminant() == 0;
}

/**
* Frobenius norm (Hilbert–Schmidt norm, Euclidean norm) (‖A‖F)
* Square root of the sum of the square of all elements.
*
* https://en.wikipedia.org/wiki/Matrix_norm#Frobenius_norm
*
* _____________
* /ᵐ ⁿ
* ‖A‖F = √ Σ Σ |aᵢⱼ|²
* ᵢ₌₁ ᵢ₌₁
*/
public function frobeniusNorm(): float
{
$squareSum = 0;
for ($i = 0; $i < $this->rows; ++$i) {
for ($j = 0; $j < $this->columns; ++$j) {
$squareSum += ($this->matrix[$i][$j]) ** 2;
}
}

return sqrt($squareSum);
}

/**
* Returns the transpose of given array
*/
Expand All @@ -259,7 +282,7 @@ public static function dot(array $array1, array $array2): array
/**
* Element-wise addition or substraction depending on the given sign parameter
*/
protected function _add(self $other, int $sign = 1): self
private function _add(self $other, int $sign = 1): self
{
$a1 = $this->toArray();
$a2 = $other->toArray();
Expand All @@ -277,7 +300,7 @@ protected function _add(self $other, int $sign = 1): self
/**
* Returns diagonal identity matrix of the same size of this matrix
*/
protected function getIdentity(): self
private function getIdentity(): self
{
$array = array_fill(0, $this->rows, array_fill(0, $this->columns, 0));
for ($i = 0; $i < $this->rows; ++$i) {
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
<?php

declare(strict_types=1);

namespace Phpml\Tests\FeatureSelection\ScoringFunction;

use Phpml\FeatureSelection\ScoringFunction\UnivariateLinearRegression;
use PHPUnit\Framework\TestCase;

final class UnivariateLinearRegressionTest extends TestCase
{
public function testRegressionScore(): void
{
$samples = [[73676, 1996], [77006, 1998], [10565, 2000], [146088, 1995], [15000, 2001], [65940, 2000], [9300, 2000], [93739, 1996], [153260, 1994], [17764, 2002], [57000, 1998], [15000, 2000]];
$targets = [2000, 2750, 15500, 960, 4400, 8800, 7100, 2550, 1025, 5900, 4600, 4400];

$function = new UnivariateLinearRegression();
self::assertEquals([6.97286, 6.48558], $function->score($samples, $targets), '', 0.0001);
}

public function testRegressionScoreWithoutCenter(): void
{
$samples = [[73676, 1996], [77006, 1998], [10565, 2000], [146088, 1995], [15000, 2001], [65940, 2000], [9300, 2000], [93739, 1996], [153260, 1994], [17764, 2002], [57000, 1998], [15000, 2000]];
$targets = [2000, 2750, 15500, 960, 4400, 8800, 7100, 2550, 1025, 5900, 4600, 4400];

$function = new UnivariateLinearRegression(false);
self::assertEquals([1.74450, 18.08347], $function->score($samples, $targets), '', 0.0001);
}
}
16 changes: 16 additions & 0 deletions tests/FeatureSelection/SelectKBestTest.php
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
use Phpml\Exception\InvalidArgumentException;
use Phpml\Exception\InvalidOperationException;
use Phpml\FeatureSelection\ScoringFunction\ANOVAFValue;
use Phpml\FeatureSelection\ScoringFunction\UnivariateLinearRegression;
use Phpml\FeatureSelection\SelectKBest;
use PHPUnit\Framework\TestCase;

Expand Down Expand Up @@ -45,6 +46,21 @@ public function testSelectKBestWithIrisDataset(): void
self::assertEquals(2, count($samples[0]));
}

public function testSelectKBestWithRegressionScoring(): void
{
$samples = [[73676, 1996, 2], [77006, 1998, 5], [10565, 2000, 4], [146088, 1995, 2], [15000, 2001, 2], [65940, 2000, 2], [9300, 2000, 2], [93739, 1996, 2], [153260, 1994, 2], [17764, 2002, 2], [57000, 1998, 2], [15000, 2000, 2]];
$targets = [2000, 2750, 15500, 960, 4400, 8800, 7100, 2550, 1025, 5900, 4600, 4400];

$selector = new SelectKBest(new UnivariateLinearRegression(), 2);
$selector->fit($samples, $targets);
$selector->transform($samples);

self::assertEquals(
[[73676, 1996], [77006, 1998], [10565, 2000], [146088, 1995], [15000, 2001], [65940, 2000], [9300, 2000], [93739, 1996], [153260, 1994], [17764, 2002], [57000, 1998], [15000, 2000]],
$samples
);
}

public function testThrowExceptionOnEmptyTargets(): void
{
$this->expectException(InvalidArgumentException::class);
Expand Down
51 changes: 51 additions & 0 deletions tests/Math/MatrixTest.php
Original file line number Diff line number Diff line change
Expand Up @@ -251,4 +251,55 @@ public function testDot(): void
$dot = [6, 12];
$this->assertEquals($dot, Matrix::dot($matrix2, $matrix1));
}

/**
* @dataProvider dataProviderForFrobeniusNorm
*/
public function testFrobeniusNorm(array $matrix, float $norm): void
{
$matrix = new Matrix($matrix);

$this->assertEquals($norm, $matrix->frobeniusNorm(), '', 0.0001);
}

public function dataProviderForFrobeniusNorm()
{
return [
[
[
[1, -7],
[2, 3],
], 7.93725,
],
[
[
[1, 2, 3],
[2, 3, 4],
[3, 4, 5],
], 9.643651,
],
[
[
[1, 5, 3, 9],
[2, 3, 4, 12],
[4, 2, 5, 11],
], 21.330729,
],
[
[
[1, 5, 3],
[2, 3, 4],
[4, 2, 5],
[6, 6, 3],
], 13.784049,
],
[
[
[5, -4, 2],
[-1, 2, 3],
[-2, 1, 0],
], 8,
],
];
}
}

0 comments on commit 9e5b3a0

Please sign in to comment.