forked from jorgecasas/php-ml
-
Notifications
You must be signed in to change notification settings - Fork 1
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge pull request #12 from php-ai/develop
New features: ClassificationReport and FileDataset
- Loading branch information
Showing
71 changed files
with
1,014 additions
and
25 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,3 +1,4 @@ | ||
/vendor/ | ||
humbuglog.* | ||
/bin/phpunit | ||
.coverage |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,57 @@ | ||
# FilesDataset | ||
|
||
Helper class that loads dataset from files. Use folder names as targets. It extends the `ArrayDataset`. | ||
|
||
### Constructors Parameters | ||
|
||
* $rootPath - (string) path to root folder that contains files dataset | ||
|
||
``` | ||
use Phpml\Dataset\FilesDataset; | ||
$dataset = new FilesDataset('path/to/data'); | ||
``` | ||
|
||
See [ArrayDataset](machine-learning/datasets/array-dataset/) for more information. | ||
|
||
### Example | ||
|
||
Files structure: | ||
|
||
``` | ||
data | ||
business | ||
001.txt | ||
002.txt | ||
... | ||
entertainment | ||
001.txt | ||
002.txt | ||
... | ||
politics | ||
001.txt | ||
002.txt | ||
... | ||
sport | ||
001.txt | ||
002.txt | ||
... | ||
tech | ||
001.txt | ||
002.txt | ||
... | ||
``` | ||
|
||
Load files data with `FilesDataset`: | ||
|
||
``` | ||
use Phpml\Dataset\FilesDataset; | ||
$dataset = new FilesDataset('path/to/data'); | ||
$dataset->getSamples()[0][0] // content from file path/to/data/business/001.txt | ||
$dataset->getTargets()[0] // business | ||
$dataset->getSamples()[40][0] // content from file path/to/data/tech/001.txt | ||
$dataset->getTargets()[0] // tech | ||
``` |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,61 @@ | ||
# Classification Report | ||
|
||
Class for calculate main classifier metrics: precision, recall, F1 score and support. | ||
|
||
### Report | ||
|
||
To generate report you must provide the following parameters: | ||
|
||
* $actualLabels - (array) true sample labels | ||
* $predictedLabels - (array) predicted labels (e.x. from test group) | ||
|
||
``` | ||
use Phpml\Metric\ClassificationReport; | ||
$actualLabels = ['cat', 'ant', 'bird', 'bird', 'bird']; | ||
$predictedLabels = ['cat', 'cat', 'bird', 'bird', 'ant']; | ||
$report = new ClassificationReport($actualLabels, $predictedLabels); | ||
``` | ||
|
||
### Metrics | ||
|
||
After creating the report you can draw its individual metrics: | ||
|
||
* precision (`getPrecision()`) - fraction of retrieved instances that are relevant | ||
* recall (`getRecall()`) - fraction of relevant instances that are retrieved | ||
* F1 score (`getF1score()`) - measure of a test's accuracy | ||
* support (`getSupport()`) - count of testes samples | ||
|
||
``` | ||
$precision = $report->getPrecision(); | ||
// $precision = ['cat' => 0.5, 'ant' => 0.0, 'bird' => 1.0]; | ||
``` | ||
|
||
### Example | ||
|
||
``` | ||
use Phpml\Metric\ClassificationReport; | ||
$actualLabels = ['cat', 'ant', 'bird', 'bird', 'bird']; | ||
$predictedLabels = ['cat', 'cat', 'bird', 'bird', 'ant']; | ||
$report = new ClassificationReport($actualLabels, $predictedLabels); | ||
$report->getPrecision(); | ||
// ['cat' => 0.5, 'ant' => 0.0, 'bird' => 1.0] | ||
$report->getRecall(); | ||
// ['cat' => 1.0, 'ant' => 0.0, 'bird' => 0.67] | ||
$report->getF1score(); | ||
// ['cat' => 0.67, 'ant' => 0.0, 'bird' => 0.80] | ||
$report->getSupport(); | ||
// ['cat' => 1, 'ant' => 1, 'bird' => 3] | ||
$report->getAverage(); | ||
// ['precision' => 0.75, 'recall' => 0.83, 'f1score' => 0.73] | ||
``` |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,47 @@ | ||
<?php | ||
|
||
declare (strict_types = 1); | ||
|
||
namespace Phpml\Dataset; | ||
|
||
use Phpml\Exception\DatasetException; | ||
|
||
class FilesDataset extends ArrayDataset | ||
{ | ||
/** | ||
* @param string $rootPath | ||
* | ||
* @throws DatasetException | ||
*/ | ||
public function __construct(string $rootPath) | ||
{ | ||
if (!is_dir($rootPath)) { | ||
throw DatasetException::missingFolder($rootPath); | ||
} | ||
|
||
$this->scanRootPath($rootPath); | ||
} | ||
|
||
/** | ||
* @param string $rootPath | ||
*/ | ||
private function scanRootPath(string $rootPath) | ||
{ | ||
foreach (glob($rootPath.DIRECTORY_SEPARATOR.'*', GLOB_ONLYDIR) as $dir) { | ||
$this->scanDir($dir); | ||
} | ||
} | ||
|
||
/** | ||
* @param string $dir | ||
*/ | ||
private function scanDir(string $dir) | ||
{ | ||
$target = basename($dir); | ||
|
||
foreach (array_filter(glob($dir.DIRECTORY_SEPARATOR.'*'), 'is_file') as $file) { | ||
$this->samples[] = [file_get_contents($file)]; | ||
$this->targets[] = $target; | ||
} | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -116,6 +116,8 @@ private function transformSample(string &$sample) | |
} | ||
} | ||
|
||
ksort($counts); | ||
|
||
$sample = $counts; | ||
} | ||
|
||
|
Oops, something went wrong.