Skip to content

Commit

Permalink
Apply cs fixes for NaiveBayes
Browse files Browse the repository at this point in the history
  • Loading branch information
akondas committed Jan 17, 2017
1 parent e603d60 commit d19ddb8
Showing 1 changed file with 56 additions and 17 deletions.
73 changes: 56 additions & 17 deletions src/Phpml/Classification/NaiveBayes.php
Original file line number Diff line number Diff line change
Expand Up @@ -12,24 +12,62 @@
class NaiveBayes implements Classifier
{
use Trainable, Predictable;

const CONTINUOS = 1;
const NOMINAL = 2;
const EPSILON = 1e-10;
private $std = array();
private $mean= array();
private $discreteProb = array();
private $dataType = array();
private $p = array();

/**
* @var array
*/
private $std = [];

/**
* @var array
*/
private $mean= [];

/**
* @var array
*/
private $discreteProb = [];

/**
* @var array
*/
private $dataType = [];

/**
* @var array
*/
private $p = [];

/**
* @var int
*/
private $sampleCount = 0;

/**
* @var int
*/
private $featureCount = 0;
private $labels = array();

/**
* @var array
*/
private $labels = [];

/**
* @param array $samples
* @param array $targets
*/
public function train(array $samples, array $targets)
{
$this->samples = $samples;
$this->targets = $targets;
$this->sampleCount = count($samples);
$this->featureCount = count($samples[0]);
// Get distinct targets

$this->labels = $targets;
array_unique($this->labels);
foreach ($this->labels as $label) {
Expand Down Expand Up @@ -67,18 +105,19 @@ private function calculateStatistics($label, $samples)
}, $db);
} else {
$this->mean[$label][$i] = Mean::arithmetic($values);
// Add epsilon in order to avoid zero stdev
// Add epsilon in order to avoid zero stdev
$this->std[$label][$i] = 1e-10 + StandardDeviation::population($values, false);
}
}
}

/**
* Calculates the probability P(label|sample_n)
*
*
* @param array $sample
* @param int $feature
* @param string $label
* @return float
*/
private function sampleProbability($sample, $feature, $label)
{
Expand All @@ -94,14 +133,14 @@ private function sampleProbability($sample, $feature, $label)
$mean= $this->mean[$label][$feature];
// Calculate the probability density by use of normal/Gaussian distribution
// Ref: https://en.wikipedia.org/wiki/Normal_distribution
//
// In order to avoid numerical errors because of small or zero values,
// some libraries adopt taking log of calculations such as
// scikit-learn did.
// (See : https://github.com/scikit-learn/scikit-learn/blob/master/sklearn/naive_bayes.py)
$pdf = -0.5 * log(2.0 * pi() * $std * $std);
$pdf -= 0.5 * pow($value - $mean, 2) / ($std * $std);
return $pdf;
//
// In order to avoid numerical errors because of small or zero values,
// some libraries adopt taking log of calculations such as
// scikit-learn did.
// (See : https://github.com/scikit-learn/scikit-learn/blob/master/sklearn/naive_bayes.py)
$pdf = -0.5 * log(2.0 * pi() * $std * $std);
$pdf -= 0.5 * pow($value - $mean, 2) / ($std * $std);
return $pdf;
}

/**
Expand Down

0 comments on commit d19ddb8

Please sign in to comment.