forked from jorgecasas/php-ml
-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Implement first regression scoring function UnivariateLinearRegression
- Loading branch information
Showing
5 changed files
with
202 additions
and
2 deletions.
There are no files selected for viewing
81 changes: 81 additions & 0 deletions
81
src/FeatureSelection/ScoringFunction/UnivariateLinearRegression.php
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,81 @@ | ||
<?php | ||
|
||
declare(strict_types=1); | ||
|
||
namespace Phpml\FeatureSelection\ScoringFunction; | ||
|
||
use Phpml\FeatureSelection\ScoringFunction; | ||
use Phpml\Math\Matrix; | ||
use Phpml\Math\Statistic\Mean; | ||
|
||
/** | ||
* Quick linear model for testing the effect of a single regressor, | ||
* sequentially for many regressors. | ||
* | ||
* This is done in 2 steps: | ||
* | ||
* 1. The cross correlation between each regressor and the target is computed, | ||
* that is, ((X[:, i] - mean(X[:, i])) * (y - mean_y)) / (std(X[:, i]) *std(y)). | ||
* 2. It is converted to an F score then to a p-value. | ||
* | ||
* Ported from scikit-learn f_regression function (http://scikit-learn.org/stable/modules/generated/sklearn.feature_selection.f_regression.html#sklearn.feature_selection.f_regression) | ||
*/ | ||
final class UnivariateLinearRegression implements ScoringFunction | ||
{ | ||
/** | ||
* @var bool | ||
*/ | ||
private $center; | ||
|
||
/** | ||
* @param bool $center - if true samples and targets will be centered | ||
*/ | ||
public function __construct(bool $center = true) | ||
{ | ||
$this->center = $center; | ||
} | ||
|
||
public function score(array $samples, array $targets): array | ||
{ | ||
if ($this->center) { | ||
$this->centerTargets($targets); | ||
$this->centerSamples($samples); | ||
} | ||
|
||
$correlations = []; | ||
foreach ($samples[0] as $index => $feature) { | ||
$featureColumn = array_column($samples, $index); | ||
$correlations[$index] = | ||
(Matrix::dot($targets, $featureColumn)[0] / (new Matrix($featureColumn, false))->transpose()->frobeniusNorm()) | ||
/ (new Matrix($targets, false))->frobeniusNorm(); | ||
} | ||
|
||
$degreesOfFreedom = count($targets) - ($this->center ? 2 : 1); | ||
|
||
return array_map(function (float $correlation) use ($degreesOfFreedom): float { | ||
return $correlation ** 2 / (1 - $correlation ** 2) * $degreesOfFreedom; | ||
}, $correlations); | ||
} | ||
|
||
private function centerTargets(&$targets): void | ||
{ | ||
$mean = Mean::arithmetic($targets); | ||
foreach ($targets as &$target) { | ||
$target -= $mean; | ||
} | ||
} | ||
|
||
private function centerSamples(&$samples): void | ||
{ | ||
$means = []; | ||
foreach ($samples[0] as $index => $feature) { | ||
$means[$index] = Mean::arithmetic(array_column($samples, $index)); | ||
} | ||
|
||
foreach ($samples as &$sample) { | ||
foreach ($sample as $index => &$feature) { | ||
$feature -= $means[$index]; | ||
} | ||
} | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
29 changes: 29 additions & 0 deletions
29
tests/FeatureSelection/ScoringFunction/UnivariateLinearRegressionTest.php
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,29 @@ | ||
<?php | ||
|
||
declare(strict_types=1); | ||
|
||
namespace Phpml\Tests\FeatureSelection\ScoringFunction; | ||
|
||
use Phpml\FeatureSelection\ScoringFunction\UnivariateLinearRegression; | ||
use PHPUnit\Framework\TestCase; | ||
|
||
final class UnivariateLinearRegressionTest extends TestCase | ||
{ | ||
public function testRegressionScore(): void | ||
{ | ||
$samples = [[73676, 1996], [77006, 1998], [10565, 2000], [146088, 1995], [15000, 2001], [65940, 2000], [9300, 2000], [93739, 1996], [153260, 1994], [17764, 2002], [57000, 1998], [15000, 2000]]; | ||
$targets = [2000, 2750, 15500, 960, 4400, 8800, 7100, 2550, 1025, 5900, 4600, 4400]; | ||
|
||
$function = new UnivariateLinearRegression(); | ||
self::assertEquals([6.97286, 6.48558], $function->score($samples, $targets), '', 0.0001); | ||
} | ||
|
||
public function testRegressionScoreWithoutCenter(): void | ||
{ | ||
$samples = [[73676, 1996], [77006, 1998], [10565, 2000], [146088, 1995], [15000, 2001], [65940, 2000], [9300, 2000], [93739, 1996], [153260, 1994], [17764, 2002], [57000, 1998], [15000, 2000]]; | ||
$targets = [2000, 2750, 15500, 960, 4400, 8800, 7100, 2550, 1025, 5900, 4600, 4400]; | ||
|
||
$function = new UnivariateLinearRegression(false); | ||
self::assertEquals([1.74450, 18.08347], $function->score($samples, $targets), '', 0.0001); | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters