Skip to content

Commit

Permalink
Linear classifiers: Perceptron, Adaline, DecisionStump (#50)
Browse files Browse the repository at this point in the history
* Linear classifiers

* Code formatting to PSR-2

* Added basic test cases for linear classifiers
  • Loading branch information
MustafaKarabulut authored and akondas committed Feb 16, 2017
1 parent f0a7984 commit cf222bc
Show file tree
Hide file tree
Showing 9 changed files with 676 additions and 9 deletions.
55 changes: 49 additions & 6 deletions src/Phpml/Classification/DecisionTree.php
Original file line number Diff line number Diff line change
Expand Up @@ -56,6 +56,11 @@ class DecisionTree implements Classifier
*/
private $numUsableFeatures = 0;

/**
* @var array
*/
private $selectedFeatures;

/**
* @var array
*/
Expand Down Expand Up @@ -126,33 +131,45 @@ protected function getSplitLeaf($records, $depth = 0)
if ($this->actualDepth < $depth) {
$this->actualDepth = $depth;
}

// Traverse all records to see if all records belong to the same class,
// otherwise group the records so that we can classify the leaf
// in case maximum depth is reached
$leftRecords = [];
$rightRecords= [];
$remainingTargets = [];
$prevRecord = null;
$allSame = true;

foreach ($records as $recordNo) {
// Check if the previous record is the same with the current one
$record = $this->samples[$recordNo];
if ($prevRecord && $prevRecord != $record) {
$allSame = false;
}
$prevRecord = $record;

// According to the split criteron, this record will
// belong to either left or the right side in the next split
if ($split->evaluate($record)) {
$leftRecords[] = $recordNo;
} else {
$rightRecords[]= $recordNo;
}

// Group remaining targets
$target = $this->targets[$recordNo];
if (! in_array($target, $remainingTargets)) {
$remainingTargets[] = $target;
if (! array_key_exists($target, $remainingTargets)) {
$remainingTargets[$target] = 1;
} else {
$remainingTargets[$target]++;
}
}

if (count($remainingTargets) == 1 || $allSame || $depth >= $this->maxDepth) {
$split->isTerminal = 1;
$classes = array_count_values($remainingTargets);
arsort($classes);
$split->classValue = key($classes);
arsort($remainingTargets);
$split->classValue = key($remainingTargets);
} else {
if ($leftRecords) {
$split->leftLeaf = $this->getSplitLeaf($leftRecords, $depth + 1);
Expand Down Expand Up @@ -200,15 +217,31 @@ protected function getBestSplit($records)
}

/**
* Returns available features/columns to the tree for the decision making
* process. <br>
*
* If a number is given with setNumFeatures() method, then a random selection
* of features up to this number is returned. <br>
*
* If some features are manually selected by use of setSelectedFeatures(),
* then only these features are returned <br>
*
* If any of above methods were not called beforehand, then all features
* are returned by default.
*
* @return array
*/
protected function getSelectedFeatures()
{
$allFeatures = range(0, $this->featureCount - 1);
if ($this->numUsableFeatures == 0) {
if ($this->numUsableFeatures == 0 && ! $this->selectedFeatures) {
return $allFeatures;
}

if ($this->selectedFeatures) {
return $this->selectedFeatures;
}

$numFeatures = $this->numUsableFeatures;
if ($numFeatures > $this->featureCount) {
$numFeatures = $this->featureCount;
Expand Down Expand Up @@ -323,6 +356,16 @@ public function setNumFeatures(int $numFeatures)
return $this;
}

/**
* Used to set predefined features to consider while deciding which column to use for a split,
*
* @param array $features
*/
protected function setSelectedFeatures(array $selectedFeatures)
{
$this->selectedFeatures = $selectedFeatures;
}

/**
* A string array to represent columns. Useful when HTML output or
* column importances are desired to be inspected.
Expand Down
148 changes: 148 additions & 0 deletions src/Phpml/Classification/Linear/Adaline.php
Original file line number Diff line number Diff line change
@@ -0,0 +1,148 @@
<?php

declare(strict_types=1);

namespace Phpml\Classification\Linear;

use Phpml\Helper\Predictable;
use Phpml\Helper\Trainable;
use Phpml\Classification\Classifier;
use Phpml\Classification\Linear\Perceptron;
use Phpml\Preprocessing\Normalizer;

class Adaline extends Perceptron
{

/**
* Batch training is the default Adaline training algorithm
*/
const BATCH_TRAINING = 1;

/**
* Online training: Stochastic gradient descent learning
*/
const ONLINE_TRAINING = 2;

/**
* The function whose result will be used to calculate the network error
* for each instance
*
* @var string
*/
protected static $errorFunction = 'output';

/**
* Training type may be either 'Batch' or 'Online' learning
*
* @var string
*/
protected $trainingType;

/**
* @var Normalizer
*/
private $normalizer;

/**
* Initalize an Adaline (ADAptive LInear NEuron) classifier with given learning rate and maximum
* number of iterations used while training the classifier <br>
*
* Learning rate should be a float value between 0.0(exclusive) and 1.0 (inclusive) <br>
* Maximum number of iterations can be an integer value greater than 0 <br>
* If normalizeInputs is set to true, then every input given to the algorithm will be standardized
* by use of standard deviation and mean calculation
*
* @param int $learningRate
* @param int $maxIterations
*/
public function __construct(float $learningRate = 0.001, int $maxIterations = 1000,
bool $normalizeInputs = true, int $trainingType = self::BATCH_TRAINING)
{
if ($normalizeInputs) {
$this->normalizer = new Normalizer(Normalizer::NORM_STD);
}

if (! in_array($trainingType, [self::BATCH_TRAINING, self::ONLINE_TRAINING])) {
throw new \Exception("Adaline can only be trained with batch and online/stochastic gradient descent algorithm");
}
$this->trainingType = $trainingType;

parent::__construct($learningRate, $maxIterations);
}

/**
* @param array $samples
* @param array $targets
*/
public function train(array $samples, array $targets)
{
if ($this->normalizer) {
$this->normalizer->transform($samples);
}

parent::train($samples, $targets);
}

/**
* Adapts the weights with respect to given samples and targets
* by use of gradient descent learning rule
*/
protected function runTraining()
{
// If online training is chosen, then the parent runTraining method
// will be executed with the 'output' method as the error function
if ($this->trainingType == self::ONLINE_TRAINING) {
return parent::runTraining();
}

// Batch learning is executed:
$currIter = 0;
while ($this->maxIterations > $currIter++) {
$outputs = array_map([$this, 'output'], $this->samples);
$updates = array_map([$this, 'gradient'], $this->targets, $outputs);
$sum = array_sum($updates);

// Updates all weights at once
for ($i=0; $i <= $this->featureCount; $i++) {
if ($i == 0) {
$this->weights[0] += $this->learningRate * $sum;
} else {
$col = array_column($this->samples, $i - 1);
$error = 0;
foreach ($col as $index => $val) {
$error += $val * $updates[$index];
}

$this->weights[$i] += $this->learningRate * $error;
}
}
}
}

/**
* Returns the direction of gradient given the desired and actual outputs
*
* @param int $desired
* @param int $output
* @return int
*/
protected function gradient($desired, $output)
{
return $desired - $output;
}

/**
* @param array $sample
* @return mixed
*/
public function predictSample(array $sample)
{
if ($this->normalizer) {
$samples = [$sample];
$this->normalizer->transform($samples);
$sample = $samples[0];
}

return parent::predictSample($sample);
}
}
56 changes: 56 additions & 0 deletions src/Phpml/Classification/Linear/DecisionStump.php
Original file line number Diff line number Diff line change
@@ -0,0 +1,56 @@
<?php

declare(strict_types=1);

namespace Phpml\Classification\Linear;

use Phpml\Helper\Predictable;
use Phpml\Helper\Trainable;
use Phpml\Classification\Classifier;
use Phpml\Classification\DecisionTree;

class DecisionStump extends DecisionTree
{
use Trainable, Predictable;

/**
* @var int
*/
protected $columnIndex;


/**
* A DecisionStump classifier is a one-level deep DecisionTree. It is generally
* used with ensemble algorithms as in the weak classifier role. <br>
*
* If columnIndex is given, then the stump tries to produce a decision node
* on this column, otherwise in cases given the value of -1, the stump itself
* decides which column to take for the decision (Default DecisionTree behaviour)
*
* @param int $columnIndex
*/
public function __construct(int $columnIndex = -1)
{
$this->columnIndex = $columnIndex;

parent::__construct(1);
}

/**
* @param array $samples
* @param array $targets
*/
public function train(array $samples, array $targets)
{
// Check if a column index was given
if ($this->columnIndex >= 0 && $this->columnIndex > count($samples[0]) - 1) {
$this->columnIndex = -1;
}

if ($this->columnIndex >= 0) {
$this->setSelectedFeatures([$this->columnIndex]);
}

parent::train($samples, $targets);
}
}
Loading

0 comments on commit cf222bc

Please sign in to comment.