Skip to content

Commit

Permalink
Implement NumberConverter (#377)
Browse files Browse the repository at this point in the history
  • Loading branch information
akondas authored May 12, 2019
1 parent 1e1d794 commit 717f236
Show file tree
Hide file tree
Showing 10 changed files with 102 additions and 8 deletions.
2 changes: 1 addition & 1 deletion src/FeatureExtraction/TfIdfTransformer.php
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@ public function fit(array $samples, ?array $targets = null): void
}
}

public function transform(array &$samples): void
public function transform(array &$samples, ?array &$targets = null): void
{
foreach ($samples as &$sample) {
foreach ($sample as $index => &$feature) {
Expand Down
2 changes: 1 addition & 1 deletion src/FeatureExtraction/TokenCountVectorizer.php
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,7 @@ public function fit(array $samples, ?array $targets = null): void
$this->buildVocabulary($samples);
}

public function transform(array &$samples): void
public function transform(array &$samples, ?array &$targets = null): void
{
array_walk($samples, function (string &$sample): void {
$this->transformSample($sample);
Expand Down
2 changes: 1 addition & 1 deletion src/FeatureSelection/SelectKBest.php
Original file line number Diff line number Diff line change
Expand Up @@ -56,7 +56,7 @@ public function fit(array $samples, ?array $targets = null): void
$this->keepColumns = array_slice($sorted, 0, $this->k, true);
}

public function transform(array &$samples): void
public function transform(array &$samples, ?array &$targets = null): void
{
if ($this->keepColumns === null) {
return;
Expand Down
2 changes: 1 addition & 1 deletion src/FeatureSelection/VarianceThreshold.php
Original file line number Diff line number Diff line change
Expand Up @@ -48,7 +48,7 @@ public function fit(array $samples, ?array $targets = null): void
}
}

public function transform(array &$samples): void
public function transform(array &$samples, ?array &$targets = null): void
{
foreach ($samples as &$sample) {
$sample = array_values(array_intersect_key($sample, $this->keepColumns));
Expand Down
2 changes: 1 addition & 1 deletion src/Preprocessing/Imputer.php
Original file line number Diff line number Diff line change
Expand Up @@ -49,7 +49,7 @@ public function fit(array $samples, ?array $targets = null): void
$this->samples = $samples;
}

public function transform(array &$samples): void
public function transform(array &$samples, ?array &$targets = null): void
{
if ($this->samples === []) {
throw new InvalidOperationException('Missing training samples for Imputer.');
Expand Down
2 changes: 1 addition & 1 deletion src/Preprocessing/LabelEncoder.php
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@ public function fit(array $samples, ?array $targets = null): void
}
}

public function transform(array &$samples): void
public function transform(array &$samples, ?array &$targets = null): void
{
foreach ($samples as &$sample) {
$sample = $this->classes[(string) $sample];
Expand Down
2 changes: 1 addition & 1 deletion src/Preprocessing/Normalizer.php
Original file line number Diff line number Diff line change
Expand Up @@ -66,7 +66,7 @@ public function fit(array $samples, ?array $targets = null): void
$this->fitted = true;
}

public function transform(array &$samples): void
public function transform(array &$samples, ?array &$targets = null): void
{
$methods = [
self::NORM_L1 => 'normalizeL1',
Expand Down
47 changes: 47 additions & 0 deletions src/Preprocessing/NumberConverter.php
Original file line number Diff line number Diff line change
@@ -0,0 +1,47 @@
<?php

declare(strict_types=1);

namespace Phpml\Preprocessing;

final class NumberConverter implements Preprocessor
{
/**
* @var bool
*/
private $transformTargets;

/**
* @var mixed
*/
private $nonNumericPlaceholder;

/**
* @param mixed $nonNumericPlaceholder
*/
public function __construct(bool $transformTargets = false, $nonNumericPlaceholder = null)
{
$this->transformTargets = $transformTargets;
$this->nonNumericPlaceholder = $nonNumericPlaceholder;
}

public function fit(array $samples, ?array $targets = null): void
{
//nothing to do
}

public function transform(array &$samples, ?array &$targets = null): void
{
foreach ($samples as &$sample) {
foreach ($sample as &$feature) {
$feature = is_numeric($feature) ? (float) $feature : $this->nonNumericPlaceholder;
}
}

if ($this->transformTargets && is_array($targets)) {
foreach ($targets as &$target) {
$target = is_numeric($target) ? (float) $target : $this->nonNumericPlaceholder;
}
}
}
}
2 changes: 1 addition & 1 deletion src/Transformer.php
Original file line number Diff line number Diff line change
Expand Up @@ -11,5 +11,5 @@ interface Transformer
*/
public function fit(array $samples, ?array $targets = null): void;

public function transform(array &$samples): void;
public function transform(array &$samples, ?array &$targets = null): void;
}
47 changes: 47 additions & 0 deletions tests/Preprocessing/NumberConverterTest.php
Original file line number Diff line number Diff line change
@@ -0,0 +1,47 @@
<?php

declare(strict_types=1);

namespace Phpml\Tests\Preprocessing;

use Phpml\Preprocessing\NumberConverter;
use PHPUnit\Framework\TestCase;

final class NumberConverterTest extends TestCase
{
public function testConvertSamples(): void
{
$samples = [['1', '-4'], ['2.0', 3.0], ['3', '112.5'], ['5', '0.0004']];
$targets = ['1', '1', '2', '2'];

$converter = new NumberConverter();
$converter->transform($samples, $targets);

self::assertEquals([[1.0, -4.0], [2.0, 3.0], [3.0, 112.5], [5.0, 0.0004]], $samples);
self::assertEquals(['1', '1', '2', '2'], $targets);
}

public function testConvertTargets(): void
{
$samples = [['1', '-4'], ['2.0', 3.0], ['3', '112.5'], ['5', '0.0004']];
$targets = ['1', '1', '2', 'not'];

$converter = new NumberConverter(true);
$converter->transform($samples, $targets);

self::assertEquals([[1.0, -4.0], [2.0, 3.0], [3.0, 112.5], [5.0, 0.0004]], $samples);
self::assertEquals([1.0, 1.0, 2.0, null], $targets);
}

public function testConvertWithPlaceholder(): void
{
$samples = [['invalid'], ['13.5']];
$targets = ['invalid', '2'];

$converter = new NumberConverter(true, 'missing');
$converter->transform($samples, $targets);

self::assertEquals([['missing'], [13.5]], $samples);
self::assertEquals(['missing', 2.0], $targets);
}
}

0 comments on commit 717f236

Please sign in to comment.