Skip to content

Commit

Permalink
MDL-59039 Global search: Allow partial indexing (in scheduled task)
Browse files Browse the repository at this point in the history
  • Loading branch information
sammarshallou committed Jul 11, 2017
1 parent 350700b commit 67d6479
Show file tree
Hide file tree
Showing 13 changed files with 303 additions and 37 deletions.
3 changes: 3 additions & 0 deletions admin/searchareas.php
Original file line number Diff line number Diff line change
Expand Up @@ -125,6 +125,9 @@
$areasconfig[$areaid]->docsprocessed . ' , ' .
$areasconfig[$areaid]->recordsprocessed . ' , ' .
$areasconfig[$areaid]->docsignored;
if ($areasconfig[$areaid]->partial) {
$laststatus .= ' ' . get_string('searchpartial', 'admin');
}
} else {
$laststatus = '';
}
Expand Down
7 changes: 7 additions & 0 deletions admin/settings/plugins.php
Original file line number Diff line number Diff line change
Expand Up @@ -557,6 +557,13 @@
$temp->add(new admin_setting_heading('searchengineheading', new lang_string('searchengine', 'admin'), ''));
$temp->add(new admin_setting_configselect('searchengine',
new lang_string('selectsearchengine', 'admin'), '', 'solr', $engines));
$temp->add(new admin_setting_heading('searchindexingheading', new lang_string('searchoptions', 'admin'), ''));
$temp->add(new admin_setting_configcheckbox('searchindexwhendisabled',
new lang_string('searchindexwhendisabled', 'admin'), new lang_string('searchindexwhendisabled_desc', 'admin'),
0));
$temp->add(new admin_setting_configduration('searchindextime',
new lang_string('searchindextime', 'admin'), new lang_string('searchindextime_desc', 'admin'),
600));

$ADMIN->add('searchplugins', $temp);
$ADMIN->add('searchplugins', new admin_externalpage('searchareas', new lang_string('searchareas', 'admin'),
Expand Down
6 changes: 6 additions & 0 deletions lang/en/admin.php
Original file line number Diff line number Diff line change
Expand Up @@ -985,10 +985,16 @@
$string['searchengine'] = 'Search engine';
$string['searchindexactions'] = 'Index actions';
$string['searchindexdeleted'] = 'Index deleted';
$string['searchindextime'] = 'Indexing time limit';
$string['searchindextime_desc'] = 'When indexing large amounts of new content, the scheduled task will stop after this time limit is reached. It will continue the next time the task runs.';
$string['searchindexupdated'] = 'Search engine contents have been updated';
$string['searchindexwhendisabled'] = 'Index when disabled';
$string['searchindexwhendisabled_desc'] = 'Allows the scheduled task to build the search index even when search is disabled. This is useful if you want to build the index before the search facility appears to students.';
$string['searchinsettings'] = 'Search in settings';
$string['searchlastrun'] = 'Last run (time, # docs, # records, # ignores)';
$string['searchnotavailable'] = 'Search is not available';
$string['searchpartial'] = '(not yet fully indexed)';
$string['searchoptions'] = 'Search options';
$string['searchreindexed'] = 'All site contents have been reindexed.';
$string['searchreindexindex'] = 'Reindex all site contents';
$string['searchresults'] = 'Search results';
Expand Down
5 changes: 3 additions & 2 deletions lib/classes/task/search_index_task.php
Original file line number Diff line number Diff line change
Expand Up @@ -46,12 +46,13 @@ public function get_name() {
* Throw exceptions on errors (the job will be retried).
*/
public function execute() {
if (!\core_search\manager::is_global_search_enabled()) {
if (!\core_search\manager::is_global_search_enabled() &&
!get_config('core', 'searchindexwhendisabled')) {
return;
}
$globalsearch = \core_search\manager::instance();

// Indexing database records for modules + rich documents of forum.
$globalsearch->index();
$globalsearch->index(false, get_config('core', 'searchindextime'), new \text_progress_trace());
}
}
3 changes: 2 additions & 1 deletion lib/classes/task/search_optimize_task.php
Original file line number Diff line number Diff line change
Expand Up @@ -49,7 +49,8 @@ public function get_name() {
* Throw exceptions on errors (the job will be retried).
*/
public function execute() {
if (!\core_search\manager::is_global_search_enabled()) {
if (!\core_search\manager::is_global_search_enabled() &&
!get_config('core', 'searchindexwhendisabled')) {
return;
}

Expand Down
19 changes: 18 additions & 1 deletion search/classes/base.php
Original file line number Diff line number Diff line change
Expand Up @@ -175,7 +175,8 @@ public function get_config() {
list($componentname, $varname) = $this->get_config_var_name();

$config = [];
$settingnames = array('_enabled', '_indexingstart', '_indexingend', '_lastindexrun', '_docsignored', '_docsprocessed', '_recordsprocessed');
$settingnames = array('_enabled', '_indexingstart', '_indexingend', '_lastindexrun',
'_docsignored', '_docsprocessed', '_recordsprocessed', '_partial');
foreach ($settingnames as $name) {
$config[$varname . $name] = get_config($componentname, $varname . $name);
}
Expand Down Expand Up @@ -209,6 +210,22 @@ public function set_enabled($isenabled) {
return set_config($varname . '_enabled', $isenabled, $componentname);
}

/**
* Gets the length of time spent indexing this area (the last time it was indexed).
*
* @return int|bool Time in seconds spent indexing this area last time, false if never indexed
*/
public function get_last_indexing_duration() {
list($componentname, $varname) = $this->get_config_var_name();
$start = get_config($componentname, $varname . '_indexingstart');
$end = get_config($componentname, $varname . '_indexingend');
if ($start && $end) {
return $end - $start;
} else {
return false;
}
}

/**
* Returns true if this area uses file indexing.
*
Expand Down
15 changes: 14 additions & 1 deletion search/classes/engine.php
Original file line number Diff line number Diff line change
Expand Up @@ -213,8 +213,18 @@ public function add_documents($iterator, $searcharea, $options) {
$numdocs = 0;
$numdocsignored = 0;
$lastindexeddoc = 0;
$firstindexeddoc = 0;
$partial = false;

foreach ($iterator as $document) {
// Stop if we have exceeded the time limit (and there are still more items). Always
// do at least one second's worth of documents otherwise it will never make progress.
if ($lastindexeddoc !== $firstindexeddoc &&
!empty($options['stopat']) && microtime(true) >= $options['stopat']) {
$partial = true;
break;
}

if (!$document instanceof \core_search\document) {
continue;
}
Expand All @@ -236,10 +246,13 @@ public function add_documents($iterator, $searcharea, $options) {
}

$lastindexeddoc = $document->get('modified');
if (!$firstindexeddoc) {
$firstindexeddoc = $lastindexeddoc;
}
$numrecords++;
}

return array($numrecords, $numdocs, $numdocsignored, $lastindexeddoc);
return array($numrecords, $numdocs, $numdocsignored, $lastindexeddoc, $partial);
}

/**
Expand Down
94 changes: 73 additions & 21 deletions search/classes/manager.php
Original file line number Diff line number Diff line change
Expand Up @@ -521,11 +521,19 @@ public function optimize_index() {
* Index all documents.
*
* @param bool $fullindex Whether we should reindex everything or not.
* @param float $timelimit Time limit in seconds (0 = no time limit)
* @param \progress_trace $progress Optional class for tracking progress
* @throws \moodle_exception
* @return bool Whether there was any updated document or not.
*/
public function index($fullindex = false) {
global $CFG;
public function index($fullindex = false, $timelimit = 0, \progress_trace $progress = null) {
// Cannot combine time limit with reindex.
if ($timelimit && $fullindex) {
throw new \coding_exception('Cannot apply time limit when reindexing');
}
if (!$progress) {
$progress = new \null_progress_trace();
}

// Unlimited time.
\core_php_time_limit::raise();
Expand All @@ -536,11 +544,25 @@ public function index($fullindex = false) {
$sumdocs = 0;

$searchareas = $this->get_search_areas_list(true);

if ($timelimit) {
// If time is limited (and therefore we're not just indexing everything anyway), select
// an order for search areas. The intention here is to avoid a situation where a new
// large search area is enabled, and this means all our other search areas go out of
// date while that one is being indexed. To do this, we order by the time we spent
// indexing them last time we ran, meaning anything that took a very long time will be
// done last.
uasort($searchareas, function(\core_search\base $area1, \core_search\base $area2) {
return (int)$area1->get_last_indexing_duration() - (int)$area2->get_last_indexing_duration();
});

// Decide time to stop.
$stopat = microtime(true) + $timelimit;
}

foreach ($searchareas as $areaid => $searcharea) {

if (CLI_SCRIPT && !PHPUNIT_TEST) {
mtrace('Processing ' . $searcharea->get_visible_name() . ' area');
}
$progress->output('Processing area: ' . $searcharea->get_visible_name());

// Notify the engine that an area is starting.
$this->engine->area_index_starting($searcharea, $fullindex);
Expand All @@ -556,7 +578,16 @@ public function index($fullindex = false) {
if ($fullindex === true) {
$referencestarttime = 0;
} else {
$referencestarttime = $prevtimestart;
$partial = get_config($componentconfigname, $varname . '_partial');
if ($partial) {
// When the previous index did not complete all data, we start from the time of the
// last document that was successfully indexed. (Note this will result in
// re-indexing that one document, but we can't avoid that because there may be
// other documents in the same second.)
$referencestarttime = intval(get_config($componentconfigname, $varname . '_lastindexrun'));
} else {
$referencestarttime = $prevtimestart;
}
}

// Getting the recordset from the area.
Expand All @@ -565,27 +596,35 @@ public function index($fullindex = false) {
// Pass get_document as callback.
$fileindexing = $this->engine->file_indexing_enabled() && $searcharea->uses_file_indexing();
$options = array('indexfiles' => $fileindexing, 'lastindexedtime' => $prevtimestart);
if ($timelimit) {
$options['stopat'] = $stopat;
}
$iterator = new \core\dml\recordset_walk($recordset, array($searcharea, 'get_document'), $options);
list($numrecords,
$numdocs,
$numdocsignored,
$lastindexeddoc) = $this->engine->add_documents($iterator, $searcharea, $options);

if (CLI_SCRIPT && !PHPUNIT_TEST) {
if ($numdocs > 0) {
$elapsed = round((microtime(true) - $elapsed), 3);
mtrace('Processed ' . $numrecords . ' records containing ' . $numdocs . ' documents for ' .
$searcharea->get_visible_name() . ' area, in ' . $elapsed . ' seconds.');
} else {
mtrace('No new documents to index for ' . $searcharea->get_visible_name() . ' area.');
}
$result = $this->engine->add_documents($iterator, $searcharea, $options);
if (count($result) === 5) {
list($numrecords, $numdocs, $numdocsignored, $lastindexeddoc, $partial) = $result;
} else {
// Backward compatibility for engines that don't support partial adding.
list($numrecords, $numdocs, $numdocsignored, $lastindexeddoc) = $result;
debugging('engine::add_documents() should return $partial (4-value return is deprecated)',
DEBUG_DEVELOPER);
$partial = false;
}

if ($numdocs > 0) {
$elapsed = round((microtime(true) - $elapsed), 3);
$progress->output('Processed ' . $numrecords . ' records containing ' . $numdocs .
' documents, in ' . $elapsed . ' seconds' .
($partial ? ' (not complete)' : '') . '.', 1);
} else {
$progress->output('No new documents to index.', 1);
}

// Notify the engine this area is complete, and only mark times if true.
if ($this->engine->area_index_complete($searcharea, $numdocs, $fullindex)) {
$sumdocs += $numdocs;

// Store last index run once documents have been commited to the search engine.
// Store last index run once documents have been committed to the search engine.
set_config($varname . '_indexingstart', $indexingstart, $componentconfigname);
set_config($varname . '_indexingend', time(), $componentconfigname);
set_config($varname . '_docsignored', $numdocsignored, $componentconfigname);
Expand All @@ -594,6 +633,18 @@ public function index($fullindex = false) {
if ($lastindexeddoc > 0) {
set_config($varname . '_lastindexrun', $lastindexeddoc, $componentconfigname);
}
if ($partial) {
set_config($varname . '_partial', 1, $componentconfigname);
} else {
unset_config($varname . '_partial', $componentconfigname);
}
} else {
$progress->output('Engine reported error.');
}

if ($timelimit && (microtime(true) >= $stopat)) {
$progress->output('Stopping indexing due to time limit.');
break;
}
}

Expand Down Expand Up @@ -673,7 +724,8 @@ public function delete_index_by_id($id) {
*/
public function get_areas_config($searchareas) {

$vars = array('indexingstart', 'indexingend', 'lastindexrun', 'docsignored', 'docsprocessed', 'recordsprocessed');
$vars = array('indexingstart', 'indexingend', 'lastindexrun', 'docsignored',
'docsprocessed', 'recordsprocessed', 'partial');

$configsettings = [];
foreach ($searchareas as $searcharea) {
Expand Down
34 changes: 24 additions & 10 deletions search/cli/indexer.php
Original file line number Diff line number Diff line change
Expand Up @@ -27,8 +27,9 @@
require(__DIR__.'/../../config.php');
require_once($CFG->libdir.'/clilib.php'); // cli only functions

list($options, $unrecognized) = cli_get_params(array('help' => false, 'force' => false, 'reindex' => false),
array('h' => 'help', 'f' => 'force', 'r' => 'reindex'));
list($options, $unrecognized) = cli_get_params(array('help' => false, 'force' => false,
'reindex' => false, 'timelimit' => 0),
array('h' => 'help', 'f' => 'force', 'r' => 'reindex', 't' => 'timelimit'));

if ($unrecognized) {
$unrecognized = implode("\n ", $unrecognized);
Expand All @@ -40,18 +41,24 @@
"Index search data
Options:
-h, --help Print out this help
-r, --reindex Reindex data
-f, --force Allow indexer to run, even if global search is disabled.
-h, --help Print out this help
-r, --reindex Reindex data
-f, --force Allow indexer to run, even if global search is disabled.
-t=<n>, --timelimit=<n> Stop after indexing for specified time (in seconds)
Example:
Examples:
\$ sudo -u www-data /usr/bin/php search/cli/indexer.php --reindex
\$ sudo -u www-data /usr/bin/php search/cli/indexer.php --timelimit=300
";

echo $help;
die;
}

if ($options['timelimit'] && $options['reindex']) {
cli_error('Cannot apply time limit when reindexing');
}

if (!\core_search\manager::is_global_search_enabled() && empty($options['force'])) {
cli_error('Global search is disabled. Use --force if you want to force an index while disabled');
}
Expand All @@ -70,13 +77,20 @@
$globalsearch = \core_search\manager::instance();

if (empty($options['reindex'])) {
echo "Running full index of site\n";
echo "==========================\n";
$globalsearch->index();
if ($options['timelimit']) {
$limitinfo = ' (max ' . $options['timelimit'] . ' seconds)';
$limitunderline = preg_replace('~.~', '=', $limitinfo);
echo "Running index of site$limitinfo\n";
echo "=====================$limitunderline\n";
} else {
echo "Running full index of site\n";
echo "==========================\n";
}
$globalsearch->index(false, $options['timelimit'], new text_progress_trace());
} else {
echo "Running full reindex of site\n";
echo "============================\n";
$globalsearch->index(true);
$globalsearch->index(true, 0, new text_progress_trace());
}

// Optimize index at last.
Expand Down
4 changes: 4 additions & 0 deletions search/tests/fixtures/mock_search_area.php
Original file line number Diff line number Diff line change
Expand Up @@ -122,4 +122,8 @@ public function get_doc_url(\core_search\document $doc) {
public function get_context_url(\core_search\document $doc) {
return new \moodle_url('/index.php');
}

public function get_visible_name($lazyload = false) {
return 'Mock search area';
}
}
Loading

0 comments on commit 67d6479

Please sign in to comment.