Skip to content

Commit

Permalink
MDL-59265 analytics: Rename machine learning backend method
Browse files Browse the repository at this point in the history
- Method names renamed to avoid interface changes once
  we support regression and unsupervised learning
- Adding regressor interface even if not implemente
- predictor interface comments expanded
- Differentiate model's required accuracy from predictions quality
- Add missing get_callback_boundary call
- Updated datasets' metadata to allow 3rd parties to code
  regressors themselves
- Add missing option to exception message
- Include target data into the dataset regardless of being a prediction
  dataset or a training dataset
- Explicit in_array and array_search non-strict calls
- Overwrite discrete should_be_displayed implementation with the binary one
- Overwrite no_teacher get_display_value as it would otherwise look
  wrong
- Other minor fixes
  • Loading branch information
David Monllao committed Aug 25, 2017
1 parent b8fe16c commit 5c5cb3e
Show file tree
Hide file tree
Showing 15 changed files with 265 additions and 51 deletions.
27 changes: 27 additions & 0 deletions analytics/classes/local/analyser/base.php
Original file line number Diff line number Diff line change
Expand Up @@ -469,6 +469,9 @@ protected function process_time_splitting($timesplitting, $analysable, $includet
return $result;
}

// Add target metadata.
$this->add_target_metadata($data);

// Write all calculated data to a file.
$file = $dataset->store($data);

Expand Down Expand Up @@ -636,4 +639,28 @@ protected function save_prediction_samples($sampleids, $ranges, $timesplitting)
$DB->insert_record('analytics_predict_samples', $predictionrange);
}
}

/**
* Adds target metadata to the dataset.
*
* @param array $data
* @return void
*/
protected function add_target_metadata(&$data) {
$data[0][] = 'targetcolumn';
$data[1][] = $this->analysabletarget->get_id();
if ($this->analysabletarget->is_linear()) {
$data[0][] = 'targettype';
$data[1][] = 'linear';
$data[0][] = 'targetmin';
$data[1][] = $this->analysabletarget::get_min_value();
$data[0][] = 'targetmax';
$data[1][] = $this->analysabletarget::get_max_value();
} else {
$data[0][] = 'targettype';
$data[1][] = 'discrete';
$data[0][] = 'targetclasses';
$data[1][] = json_encode($this->analysabletarget::get_classes());
}
}
}
17 changes: 17 additions & 0 deletions analytics/classes/local/indicator/binary.php
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,23 @@ public static final function get_classes() {
return array(0);
}

/**
* It should always be displayed.
*
* Binary values have no subtypes by default, please overwrite if
* your indicator is adding extra features.
*
* @param float $value
* @param string $subtype
* @return bool
*/
public function should_be_displayed($value, $subtype) {
if ($subtype != false) {
return false;
}
return true;
}

/**
* get_display_value
*
Expand Down
2 changes: 1 addition & 1 deletion analytics/classes/local/indicator/discrete.php
Original file line number Diff line number Diff line change
Expand Up @@ -85,7 +85,7 @@ public function should_be_displayed($value, $subtype) {
*/
public function get_display_value($value, $subtype = false) {

$displayvalue = array_search($subtype, static::get_classes());
$displayvalue = array_search($subtype, static::get_classes(), false);

debugging('Please overwrite \core_analytics\local\indicator\discrete::get_display_value to show something ' .
'different than the default "' . $displayvalue . '"', DEBUG_DEVELOPER);
Expand Down
2 changes: 1 addition & 1 deletion analytics/classes/local/indicator/linear.php
Original file line number Diff line number Diff line change
Expand Up @@ -63,7 +63,7 @@ public static function get_feature_headers() {
}

/**
* should_be_displayed
* Show only the main feature.
*
* @param float $value
* @param string $subtype
Expand Down
2 changes: 1 addition & 1 deletion analytics/classes/local/target/base.php
Original file line number Diff line number Diff line change
Expand Up @@ -231,7 +231,7 @@ public static function instance() {
*/
protected function min_prediction_score() {
// The default minimum discards predictions with a low score.
return \core_analytics\model::MIN_SCORE;
return \core_analytics\model::PREDICTION_MIN_SCORE;
}

/**
Expand Down
2 changes: 1 addition & 1 deletion analytics/classes/local/target/binary.php
Original file line number Diff line number Diff line change
Expand Up @@ -78,7 +78,7 @@ public function get_calculation_outcome($value, $ignoredsubtype = false) {
throw new \moodle_exception('errorpredictionformat', 'analytics');
}

if (in_array($value, $this->ignored_predicted_classes())) {
if (in_array($value, $this->ignored_predicted_classes(), false)) {
// Just in case, if it is ignored the prediction should not even be recorded but if it would, it is ignored now,
// which should mean that is it nothing serious.
return self::OUTCOME_VERY_POSITIVE;
Expand Down
24 changes: 12 additions & 12 deletions analytics/classes/local/target/discrete.php
Original file line number Diff line number Diff line change
Expand Up @@ -42,17 +42,18 @@ abstract class discrete extends base {
*/
public function is_linear() {
// Not supported yet.
throw new \coding_exception('Sorry, this version\'s prediction processors only support targets with binary values.');
throw new \coding_exception('Sorry, this version\'s prediction processors only support targets with binary values.' .
' You can write your own and overwrite this method though.');
}

/**
* Is the provided class one of this target valid classes?
*
* @param string $class
* @param mixed $class
* @return bool
*/
protected static function is_a_class($class) {
return (in_array($class, static::get_classes()));
return (in_array($class, static::get_classes(), false));
}

/**
Expand Down Expand Up @@ -99,7 +100,7 @@ public function get_calculation_outcome($value, $ignoredsubtype = false) {
throw new \moodle_exception('errorpredictionformat', 'analytics');
}

if (in_array($value, $this->ignored_predicted_classes())) {
if (in_array($value, $this->ignored_predicted_classes(), false)) {
// Just in case, if it is ignored the prediction should not even be recorded.
return self::OUTCOME_OK;
}
Expand Down Expand Up @@ -138,15 +139,16 @@ protected static function classes_description() {
* Returns the predicted classes that will be ignored.
*
* Better be keen to add more than less classes here, the callback is always able to discard some classes. As an example
* a target with classes 'grade 0-3', 'grade 3-6', 'grade 6-8' and 'grade 8-10' is interested in flagging both 'grade 0-3'
* and 'grade 3-6'. On the other hand, a target like dropout risk with classes 'yes', 'no' may just be interested in 'yes'.
* a target with classes 'grade 0-3', 'grade 3-6', 'grade 6-8' and 'grade 8-10' is interested in flagging both 'grade 6-8'
* and 'grade 8-10' as ignored. On the other hand, a target like dropout risk with classes 'yes', 'no' may just be
* interested in 'yes'.
*
* @return array List of values that will be ignored (array keys are ignored).
*/
protected function ignored_predicted_classes() {
// Coding exception as this will only be called if this target have non-linear values.
throw new \coding_exception('Overwrite ignored_predicted_classes() and return an array with the classes that triggers ' .
'the callback');
throw new \coding_exception('Overwrite ignored_predicted_classes() and return an array with the classes that should not ' .
'trigger the callback');
}

/**
Expand All @@ -162,10 +164,8 @@ public function triggers_callback($predictedvalue, $predictionscore) {
return false;
}

if (!$this->is_linear()) {
if (in_array($predictedvalue, $this->ignored_predicted_classes())) {
return false;
}
if (in_array($predictedvalue, $this->ignored_predicted_classes())) {
return false;
}

return true;
Expand Down
31 changes: 27 additions & 4 deletions analytics/classes/local/target/linear.php
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,8 @@ abstract class linear extends base {
*/
public function is_linear() {
// Not supported yet.
throw new \coding_exception('Sorry, this version\'s prediction processors only support targets with binary values.');
throw new \coding_exception('Sorry, this version\'s prediction processors only support targets with binary values.' .
' You can write your own and overwrite this method though.');
}

/**
Expand All @@ -52,7 +53,7 @@ public function is_linear() {
* @param string $ignoredsubtype
* @return int
*/
public function get_calculated_outcome($value, $ignoredsubtype = false) {
public function get_calculation_outcome($value, $ignoredsubtype = false) {

// This is very generic, targets will probably be interested in overwriting this.
$diff = static::get_max_value() - static::get_min_value();
Expand All @@ -67,7 +68,7 @@ public function get_calculated_outcome($value, $ignoredsubtype = false) {
*
* @return float
*/
protected static function get_max_value() {
public static function get_max_value() {
// Coding exception as this will only be called if this target have linear values.
throw new \coding_exception('Overwrite get_max_value() and return the target max value');
}
Expand All @@ -77,11 +78,33 @@ protected static function get_max_value() {
*
* @return float
*/
protected static function get_min_value() {
public static function get_min_value() {
// Coding exception as this will only be called if this target have linear values.
throw new \coding_exception('Overwrite get_min_value() and return the target min value');
}

/**
* Should the model callback be triggered?
*
* @param mixed $predictedvalue
* @param float $predictionscore
* @return bool
*/
public function triggers_callback($predictedvalue, $predictionscore) {

if (!parent::triggers_callback($predictedvalue, $predictionscore)) {
return false;
}

// People may not want to set a boundary.
$boundary = $this->get_callback_boundary();
if (!empty($boundary) && floatval($predictedvalue) < $boundary) {
return false;
}

return true;
}

/**
* Returns the minimum value that triggers the callback.
*
Expand Down
5 changes: 1 addition & 4 deletions analytics/classes/local/time_splitting/base.php
Original file line number Diff line number Diff line change
Expand Up @@ -371,12 +371,9 @@ protected function add_metadata(&$dataset, $indicators, $target = false) {
$metadata = array(
'timesplitting' => $this->get_id(),
// If no target the first column is the sampleid, if target the last column is the target.
// This will need to be updated when we support unsupervised learning models.
'nfeatures' => count(current($dataset)) - 1
);
if ($target) {
$metadata['targetclasses'] = json_encode($target::get_classes());
$metadata['targettype'] = ($target->is_linear()) ? 'linear' : 'discrete';
}

// The first 2 samples will be used to store metadata about the dataset.
$metadatacolumns = [];
Expand Down
26 changes: 22 additions & 4 deletions analytics/classes/model.php
Original file line number Diff line number Diff line change
Expand Up @@ -80,6 +80,11 @@ class model {
*/
const MIN_SCORE = 0.7;

/**
* Minimum prediction confidence (from 0 to 1) to accept a prediction as reliable enough.
*/
const PREDICTION_MIN_SCORE = 0.6;

/**
* Maximum standard deviation between different evaluation repetitions to consider that evaluation results are stable.
*/
Expand Down Expand Up @@ -524,8 +529,13 @@ public function evaluate($options = array()) {
$outputdir = $this->get_output_dir(array('evaluation', $dashestimesplittingid));

// Evaluate the dataset, the deviation we accept in the results depends on the amount of iterations.
$predictorresult = $predictor->evaluate($this->model->id, self::ACCEPTED_DEVIATION,
if ($this->get_target()->is_linear()) {
$predictorresult = $predictor->evaluate_regression($this->get_unique_id(), self::ACCEPTED_DEVIATION,
self::EVALUATION_ITERATIONS, $dataset, $outputdir);
} else {
$predictorresult = $predictor->evaluate_classification($this->get_unique_id(), self::ACCEPTED_DEVIATION,
self::EVALUATION_ITERATIONS, $dataset, $outputdir);
}

$result->status = $predictorresult->status;
$result->info = $predictorresult->info;
Expand Down Expand Up @@ -599,7 +609,11 @@ public function train() {
$samplesfile = $datasets[$this->model->timesplitting];

// Train using the dataset.
$predictorresult = $predictor->train($this->get_unique_id(), $samplesfile, $outputdir);
if ($this->get_target()->is_linear()) {
$predictorresult = $predictor->train_regression($this->get_unique_id(), $samplesfile, $outputdir);
} else {
$predictorresult = $predictor->train_classification($this->get_unique_id(), $samplesfile, $outputdir);
}

$result = new \stdClass();
$result->status = $predictorresult->status;
Expand Down Expand Up @@ -678,8 +692,12 @@ public function predict() {
$result->predictions = $this->get_static_predictions($indicatorcalculations);

} else {
// Prediction process runs on the machine learning backend.
$predictorresult = $predictor->predict($this->get_unique_id(), $samplesfile, $outputdir);
// Estimation and classification processes run on the machine learning backend side.
if ($this->get_target()->is_linear()) {
$predictorresult = $predictor->estimate($this->get_unique_id(), $samplesfile, $outputdir);
} else {
$predictorresult = $predictor->classify($this->get_unique_id(), $samplesfile, $outputdir);
}
$result->status = $predictorresult->status;
$result->info = $predictorresult->info;
$result->predictions = $this->format_predictor_predictions($predictorresult);
Expand Down
51 changes: 42 additions & 9 deletions analytics/classes/predictor.php
Original file line number Diff line number Diff line change
Expand Up @@ -43,34 +43,67 @@ interface predictor {
public function is_ready();

/**
* Train the provided dataset.
* Train this processor classification model using the provided supervised learning dataset.
*
* @param int $modelid
* @param string $uniqueid
* @param \stored_file $dataset
* @param string $outputdir
* @return \stdClass
*/
public function train($modelid, \stored_file $dataset, $outputdir);
public function train_classification($uniqueid, \stored_file $dataset, $outputdir);

/**
* Predict the provided dataset samples.
* Classifies the provided dataset samples.
*
* @param int $modelid
* @param string $uniqueid
* @param \stored_file $dataset
* @param string $outputdir
* @return \stdClass
*/
public function predict($modelid, \stored_file $dataset, $outputdir);
public function classify($uniqueid, \stored_file $dataset, $outputdir);

/**
* evaluate
* Evaluates this processor classification model using the provided supervised learning dataset.
*
* @param int $modelid
* @param string $uniqueid
* @param float $maxdeviation
* @param int $niterations
* @param \stored_file $dataset
* @param string $outputdir
* @return \stdClass
*/
public function evaluate($modelid, $maxdeviation, $niterations, \stored_file $dataset, $outputdir);
public function evaluate_classification($uniqueid, $maxdeviation, $niterations, \stored_file $dataset, $outputdir);

/**
* Train this processor regression model using the provided supervised learning dataset.
*
* @param string $uniqueid
* @param \stored_file $dataset
* @param string $outputdir
* @return \stdClass
*/
public function train_regression($uniqueid, \stored_file $dataset, $outputdir);

/**
* Estimates linear values for the provided dataset samples.
*
* @param string $uniqueid
* @param \stored_file $dataset
* @param mixed $outputdir
* @return void
*/
public function estimate($uniqueid, \stored_file $dataset, $outputdir);

/**
* Evaluates this processor regression model using the provided supervised learning dataset.
*
* @param string $uniqueid
* @param float $maxdeviation
* @param int $niterations
* @param \stored_file $dataset
* @param string $outputdir
* @return \stdClass
*/
public function evaluate_regression($uniqueid, $maxdeviation, $niterations, \stored_file $dataset, $outputdir);

}
17 changes: 17 additions & 0 deletions course/classes/analytics/indicator/no_teacher.php
Original file line number Diff line number Diff line change
Expand Up @@ -64,6 +64,23 @@ public static function required_sample_data() {
return array('context', 'course');
}

/**
* Reversed because the indicator is in 'negative' and the max returned value means teacher present.
*
* @param float $value
* @param string $subtype
* @return string
*/
public function get_display_value($value, $subtype = false) {

// No subtypes for binary values by default.
if ($value == -1) {
return get_string('yes');
} else if ($value == 1) {
return get_string('no');
}
}

/**
* calculate_sample
*
Expand Down
Loading

0 comments on commit 5c5cb3e

Please sign in to comment.