Skip to content

Commit

Permalink
MDL-64783 analytics: Performance improvements
Browse files Browse the repository at this point in the history
- Removed redundant query to analytics_predict_samples
- Analysers API now uses recordsets to iterate through the analysable
  elements. They take the last analysed time into account.
- New method for targets so there is no need to always update the last
  analysis time. Useful for lightweight targets.
  • Loading branch information
David Monllaó authored and stronk7 committed Apr 8, 2019
1 parent 02dfbf4 commit fccc728
Show file tree
Hide file tree
Showing 18 changed files with 552 additions and 157 deletions.
10 changes: 6 additions & 4 deletions admin/tool/analytics/classes/output/invalid_analysables.php
Original file line number Diff line number Diff line change
Expand Up @@ -76,13 +76,17 @@ public function export_for_template(\renderer_base $output) {

$offset = $this->page * $this->perpage;

$analysables = $this->model->get_analyser(['notimesplitting' => true])->get_analysables();
$analysables = $this->model->get_analyser(['notimesplitting' => true])->get_analysables_iterator();

$skipped = 0;
$enoughresults = false;
$morepages = false;
$results = array();
foreach ($analysables as $key => $analysable) {
foreach ($analysables as $analysable) {

if (!$analysable) {
continue;
}

$validtraining = $this->model->get_target()->is_valid_analysable($analysable, true);
if ($validtraining === true) {
Expand Down Expand Up @@ -117,8 +121,6 @@ public function export_for_template(\renderer_base $output) {
$morepages = true;
break;
}

unset($analysables[$key]);
}

// Prepare the context object.
Expand Down
4 changes: 0 additions & 4 deletions analytics/classes/analysable.php
Original file line number Diff line number Diff line change
Expand Up @@ -29,10 +29,6 @@
/**
* Any element analysers can analyse.
*
* Analysers get_analysers method return all analysable elements in the site;
* it is important that analysable elements implement lazy loading to avoid
* big memory footprints. See \core_analytics\course example.
*
* @package core_analytics
* @copyright 2016 David Monllao {@link http://www.davidmonllao.com}
* @license http://www.gnu.org/copyleft/gpl.html GNU GPL v3 or later
Expand Down
116 changes: 52 additions & 64 deletions analytics/classes/analysis.php
Original file line number Diff line number Diff line change
Expand Up @@ -83,15 +83,27 @@ public function run() {

$filesbytimesplitting = array();

list($analysables, $processedanalysables) = $this->get_sorted_analysables();
$alreadyprocessedanalysables = $this->get_processed_analysables();

$inittime = time();
foreach ($analysables as $key => $analysable) {
if ($this->includetarget) {
$action = 'training';
} else {
$action = 'prediction';
}
$analysables = $this->analyser->get_analysables_iterator($action);

$inittime = microtime(true);
foreach ($analysables as $analysable) {
$processed = false;

if (!$analysable) {
continue;
}

$analysableresults = $this->process_analysable($analysable);
if ($analysableresults) {
$success = $this->result->add_analysable_results($analysableresults);
if (!$success) {
$processed = $this->result->add_analysable_results($analysableresults);
if (!$processed) {
$errors = array();
foreach ($analysableresults as $timesplittingid => $result) {
$str = '';
Expand All @@ -110,51 +122,18 @@ public function run() {
}

// Updated regardless of how well the analysis went.
$this->update_analysable_analysed_time($processedanalysables, $analysable->get_id());
if ($this->analyser->get_target()->always_update_analysis_time() || $processed) {
$this->update_analysable_analysed_time($alreadyprocessedanalysables, $analysable->get_id());
}

// Apply time limit.
if (!$options['evaluation']) {
$timespent = time() - $inittime;
$timespent = microtime(true) - $inittime;
if ($modeltimelimit <= $timespent) {
break;
}
}

unset($analysables[$key]);
}

return true;
}

/**
* Returns the list of analysables sorted in processing priority order.
*
* It will first return analysables that have never been analysed before
* and it will continue with the ones we have already seen by timeanalysed DESC
* order.
*
* @return array(0 => \core_analytics\analysable[], 1 => \stdClass[])
*/
protected function get_sorted_analysables(): array {

$analysables = $this->analyser->get_analysables();

// Get the list of analysables that have been already processed.
$processedanalysables = $this->get_processed_analysables();

// We want to start processing analysables we have not yet processed and later continue
// with analysables that we already processed.
$unseen = array_diff_key($analysables, $processedanalysables);

// Var $processed first as we want to respect its timeanalysed DESC order so analysables that
// have recently been processed are on the bottom of the stack.
$seen = array_intersect_key($processedanalysables, $analysables);
array_walk($seen, function(&$value, $analysableid) use ($analysables) {
// We replace the analytics_used_analysables record by the analysable object.
$value = $analysables[$analysableid];
});

return array($unseen + $seen, $processedanalysables);
}

/**
Expand Down Expand Up @@ -294,7 +273,7 @@ protected function process_time_splitting(\core_analytics\local\time_splitting\b
// Only when processing data for predictions.
if (!$this->includetarget) {
// We also filter out samples and ranges that have already been used for predictions.
$this->filter_out_prediction_samples_and_ranges($sampleids, $ranges, $timesplitting);
$predictsamplesrecord = $this->filter_out_prediction_samples_and_ranges($sampleids, $ranges, $timesplitting);
}

if (count($sampleids) === 0) {
Expand Down Expand Up @@ -365,7 +344,9 @@ protected function process_time_splitting(\core_analytics\local\time_splitting\b
if ($this->includetarget) {
$this->save_train_samples($sampleids, $timesplitting);
} else {
$this->save_prediction_samples($sampleids, $ranges, $timesplitting);
// The variable $predictsamplesrecord will always be set as filter_out_prediction_samples_and_ranges
// will always be called before it (no evaluation mode and no includetarget).
$this->save_prediction_samples($sampleids, $ranges, $timesplitting, $predictsamplesrecord);
}
}

Expand Down Expand Up @@ -736,21 +717,17 @@ protected function filter_out_train_samples(array &$sampleids, \core_analytics\l
* @param int[] $sampleids
* @param array $ranges
* @param \core_analytics\local\time_splitting\base $timesplitting
* @return null
* @return \stdClass|null The analytics_predict_samples record or null
*/
protected function filter_out_prediction_samples_and_ranges(array &$sampleids, array &$ranges,
\core_analytics\local\time_splitting\base $timesplitting) {
global $DB;

if (count($ranges) > 1) {
throw new \coding_exception('$ranges argument should only contain one range');
}

$rangeindex = key($ranges);

$params = array('modelid' => $this->analyser->get_modelid(), 'analysableid' => $timesplitting->get_analysable()->get_id(),
'timesplitting' => $timesplitting->get_id(), 'rangeindex' => $rangeindex);
$predictedrange = $DB->get_record('analytics_predict_samples', $params);
$predictedrange = $this->get_predict_samples_record($timesplitting, $rangeindex);

if (!$predictedrange) {
// Nothing to filter out.
Expand All @@ -767,6 +744,18 @@ protected function filter_out_prediction_samples_and_ranges(array &$sampleids, a

// Replace the list of samples by the one excluding samples that already got predictions at this range.
$sampleids = $missingsamples;

return $predictedrange;
}

private function get_predict_samples_record(\core_analytics\local\time_splitting\base $timesplitting, int $rangeindex) {
global $DB;

$params = array('modelid' => $this->analyser->get_modelid(), 'analysableid' => $timesplitting->get_analysable()->get_id(),
'timesplitting' => $timesplitting->get_id(), 'rangeindex' => $rangeindex);
$predictedrange = $DB->get_record('analytics_predict_samples', $params);

return $predictedrange;
}

/**
Expand Down Expand Up @@ -796,10 +785,11 @@ protected function save_train_samples(array $sampleids, \core_analytics\local\ti
* @param int[] $sampleids
* @param array $ranges
* @param \core_analytics\local\time_splitting\base $timesplitting
* @param ?\stdClass $predictsamplesrecord The existing record or null if there is no record yet.
* @return null
*/
protected function save_prediction_samples(array $sampleids, array $ranges,
\core_analytics\local\time_splitting\base $timesplitting) {
\core_analytics\local\time_splitting\base $timesplitting, ?\stdClass $predictsamplesrecord) {
global $DB;

if (count($ranges) > 1) {
Expand All @@ -808,20 +798,18 @@ protected function save_prediction_samples(array $sampleids, array $ranges,

$rangeindex = key($ranges);

$params = array('modelid' => $this->analyser->get_modelid(), 'analysableid' => $timesplitting->get_analysable()->get_id(),
'timesplitting' => $timesplitting->get_id(), 'rangeindex' => $rangeindex);
if ($predictionrange = $DB->get_record('analytics_predict_samples', $params)) {
if ($predictsamplesrecord) {
// Append the new samples used for prediction.
$prevsamples = json_decode($predictionrange->sampleids, true);
$predictionrange->sampleids = json_encode($prevsamples + $sampleids);
$predictionrange->timemodified = time();
$DB->update_record('analytics_predict_samples', $predictionrange);
$predictsamplesrecord->sampleids = json_encode($predictsamplesrecord->sampleids + $sampleids);
$predictsamplesrecord->timemodified = time();
$DB->update_record('analytics_predict_samples', $predictsamplesrecord);
} else {
$predictionrange = (object)$params;
$predictionrange->sampleids = json_encode($sampleids);
$predictionrange->timecreated = time();
$predictionrange->timemodified = $predictionrange->timecreated;
$DB->insert_record('analytics_predict_samples', $predictionrange);
$predictsamplesrecord = (object)['modelid' => $this->analyser->get_modelid(), 'analysableid' => $timesplitting->get_analysable()->get_id(),
'timesplitting' => $timesplitting->get_id(), 'rangeindex' => $rangeindex];
$predictsamplesrecord->sampleids = json_encode($sampleids);
$predictsamplesrecord->timecreated = time();
$predictsamplesrecord->timemodified = $predictsamplesrecord->timecreated;
$DB->insert_record('analytics_predict_samples', $predictsamplesrecord);
}
}

Expand Down Expand Up @@ -898,4 +886,4 @@ private static function get_insert_batch_size(): int {

return (int)$bulkinsert;
}
}
}
16 changes: 10 additions & 6 deletions analytics/classes/course.php
Original file line number Diff line number Diff line change
Expand Up @@ -132,19 +132,22 @@ class course implements \core_analytics\analysable {
* Use self::instance() instead to get cached copies of the course. Instances obtained
* through this constructor will not be cached.
*
* Lazy load of course data, students and teachers.
*
* @param int|\stdClass $course Course id
* @param int|\stdClass $course Course id or mdl_course record
* @param ?\context $context
* @return void
*/
public function __construct($course) {
public function __construct($course, ?\context $context = null) {

if (is_scalar($course)) {
$this->course = new \stdClass();
$this->course->id = $course;
} else {
$this->course = $course;
}

if (!is_null($context)) {
$this->coursecontext = $context;
}
}

/**
Expand All @@ -153,9 +156,10 @@ public function __construct($course) {
* Lazy load of course data, students and teachers.
*
* @param int|\stdClass $course Course object or course id
* @param ?\context $context
* @return \core_analytics\course
*/
public static function instance($course) {
public static function instance($course, ?\context $context = null) {

$courseid = $course;
if (!is_scalar($courseid)) {
Expand All @@ -166,7 +170,7 @@ public static function instance($course) {
return self::$cachedinstance;
}

$cachedinstance = new \core_analytics\course($course);
$cachedinstance = new \core_analytics\course($course, $context);
self::$cachedinstance = $cachedinstance;
self::$cachedid = (int)$courseid;
return self::$cachedinstance;
Expand Down
95 changes: 93 additions & 2 deletions analytics/classes/local/analyser/base.php
Original file line number Diff line number Diff line change
Expand Up @@ -110,10 +110,34 @@ public function __construct($modelid, \core_analytics\local\target\base $target,
*
* \core_analytics\local\analyser\by_course and \core_analytics\local\analyser\sitewide are implementing
* this method returning site courses (by_course) and the whole system (sitewide) as analysables.
*
* @throws \coding_exception
* @return \core_analytics\analysable[] Array of analysable elements using the analysable id as array key.
*/
abstract public function get_analysables();
public function get_analysables() {
// This function should only be called from get_analysables_iterator and we keep it here until php 4.1
// for backwards compatibility.
throw new \coding_exception('This method is deprecated in favour of get_analysables_iterator.');
}

/**
* Returns the list of analysable elements available on the site.
*
* A relatively complex SQL query should be set so that we take into account which analysable elements
* have already been processed and the order in which they have been processed. Helper methods are available
* to ease to implementation of get_analysables_iterator: get_iterator_sql and order_sql.
*
* @param ?string $action 'prediction', 'training' or null if no specific action needed.
* @return \Iterator
*/
public function get_analysables_iterator(?string $action = null) {

debugging('Please overwrite get_analysables_iterator with your own implementation, we only keep this default
implementation for backwards compatibility purposes with get_analysables(). note that $action param will
be ignored so the analysable elements will be processed using get_analysables order, regardless of the
last time they were processed.');

return new \ArrayIterator($this->get_analysables());
}

/**
* This function returns this analysable list of samples.
Expand Down Expand Up @@ -376,4 +400,71 @@ public function join_sample_user($sampletablealias) {
public static function one_sample_per_analysable() {
return false;
}

/**
* Get the sql of a default implementaion of the iterator.
*
* This method only works for analysers that return analysable elements which ids map to a context instance ids.
*
* @param string $tablename The name of the table
* @param int $contextlevel The context level of the analysable
* @param string|null $action
* @param string|null $tablealias The table alias
* @return array [0] => sql and [1] => params array
*/
protected function get_iterator_sql(string $tablename, int $contextlevel, ?string $action = null, ?string $tablealias = null) {

if (!$tablealias) {
$tablealias = 'analysable';
}

$params = ['contextlevel' => $contextlevel, 'modelid' => $this->get_modelid()];
$select = $tablealias . '.*, ' . \context_helper::get_preload_record_columns_sql('ctx');

// We add the action filter on ON instead of on WHERE because otherwise records are not returned if there are existing
// records for another action or model.
$usedanalysablesjoin = ' LEFT JOIN {analytics_used_analysables} aua ON ' . $tablealias . '.id = aua.analysableid AND ' .
'(aua.modelid = :modelid OR aua.modelid IS NULL)';

if ($action) {
$usedanalysablesjoin .= " AND aua.action = :action";
$params = $params + ['action' => $action];
}

// Adding the 1 = 1 just to have the WHERE part so that all further conditions added by callers can be
// appended to $sql with and ' AND'.
$sql = 'SELECT ' . $select . '
FROM {' . $tablename . '} ' . $tablealias . '
' . $usedanalysablesjoin . '
JOIN {context} ctx ON (ctx.contextlevel = :contextlevel AND ctx.instanceid = ' . $tablealias . '.id)
WHERE 1 = 1';

return [$sql, $params];
}

/**
* Returns the order by clause.
*
* @param string|null $fieldname The field name
* @param string $order 'ASC' or 'DESC'
* @param string|null $tablealias The table alias of the field
* @return string
*/
protected function order_sql(?string $fieldname = null, string $order = 'ASC', ?string $tablealias = null) {

if (!$tablealias) {
$tablealias = 'analysable';
}

if ($order != 'ASC' && $order != 'DESC') {
throw new \coding_exception('The order can only be ASC or DESC');
}

$ordersql = ' ORDER BY (CASE WHEN aua.timeanalysed IS NULL THEN 0 ELSE aua.timeanalysed END) ASC';
if ($fieldname) {
$ordersql .= ', ' . $tablealias . '.' . $fieldname .' ' . $order;
}

return $ordersql;
}
}
Loading

0 comments on commit fccc728

Please sign in to comment.