diff --git a/search/LISEZMOI.txt b/search/LISEZMOI.txt new file mode 100644 index 0000000000000..8780ee3f74102 --- /dev/null +++ b/search/LISEZMOI.txt @@ -0,0 +1,89 @@ +Cette distribution partielle contient une refonte du moteur de +recherche globalde Moodle. + +Le moteur de recherche est capable d'indexer et de rechercher +des informations dans un grand nombre de contenus stockés +dans la plate-forme à travers la manipulation des activités et +des blocs. + +Le moteur de recherche procède à une première indexation des +ressources disponibles par action de l'administrateur. Une fois +cette indexation effectuée, le moteur maintient régulièrement les +indexes, en ajoutant les nouvelles entrées et en nettoyant les +entrées obsolètes. + +La recherche permet d'obtenir des références d'accès au contexte +qui diffuse cette information, au nom de l'utilisateur courant. +Le filtrage des résultats enlève de la liste des réponses toute +ressource que la situation de l'utilisateur empêcherait de voir +s'il y accédait dans son contexte habituel. + +Mise en oeuvre +############## + +Pour déployer le moteur : + + +* Copie de fichiers + +1. Ajouter les deux librairies fournies aux librairies de Moodle +2. Ecraser le répertoire "search" par le répertoire fourni +3. Ecraser le bloc "blocs/search" par le bloc fourni. + +* Installation logique + +4. Aller dans les notifications administratives et dérouler la procédure d'installation/mise à jour du bloc. L'installation crée la table image +des documents indexés et utilisés dans le module search. + +5. Insérer un nouveau bloc de recherche globale dans la plate-forme + +6. Effectuer une recherche vide (en administrateur) + +7. Aller sur la page des statistiques + +8. Activer l'indexation (indexsplash.php). Attention, si la plate-form contient beaucoup de contenus cette indexation peut être TRES LONGUE. + +Pour effectuer des recherches, une fois la première indexation terminée, retourner au bloc de recherche et tenter une recherche. + +Eléments pris en charge +####################### + +Dans l'état actuel, les éléments indexés par le moteur sont : + +- les entrées de forum +- les fiches de base de données +- les commentaires sur fiches de données +- les entrées de glossaire +- les commentaires sur entrées de glossaire +- les ressources natives Moodle +- les ressources physiques de type MSWord +- les ressources physiques de type PDF +- les ressources physiques de type fichier texte (.txt) +- les ressources physiques de type HTML (.htm et .html) +- les ressources physiques de type XML (.xml) +- les ressources physiques de type (Microsoft) Powerpoint (.ppt) +- les pages de wiki +- les entités de projet technique +- les sessions de chat + +Extensions +########## + +L'API du moteur de recherche permet désormais : + +- l'indexation de contenus de blocs. +- l'indexation de modules contenant une information complexe ou de plusieurs types distincts +- la sécurisation des informations indexées lors des extractions de résultats +- l'indexation de tout module tiers par ajout d'un fichier php calibré +- l'indexation de toute nouvelle resource physique par ajout d'un fichier php calibré + +Extensions futures +################## + +- De nouvelles prises en charge de contenus tels que les attachements des forums, les attachement des glossaires, ainsi que d'autres modules non encore +implémentés. + +- l'extension mnet de la recherche dans un réseau de moodle interconnectés. + + + diff --git a/search/READMETOO.txt b/search/READMETOO.txt new file mode 100644 index 0000000000000..e6c0983ed786d --- /dev/null +++ b/search/READMETOO.txt @@ -0,0 +1,90 @@ +This partial distribution contains a complete review of the +Global Search Engine of Moodle. + +The Global Search Engine stores indexes about a huge quantity +of information from within modules, block or resources stored +by Moodle either in the database or the file system. + +The administrator initialy indexes the existing content. Once this +first initialization performed, the search engine maintains indexes +regularily, adding new entries, deleting obsolete one or updating +some that have changed. + +Search will produce links for acceding the information in a similar +context as usually accessed, from the current user point of view. +Results filtering removes from results any link to information the +current user would not be allowed to acces on a straight situation. + +Deployement +########### + +For setting the engine : + + +* File copy + +1. Add to Moodle's library both additional libraries provided in the distribution +2. Replace the "search" directory with the new one +3. Replace the "blocks/search" with the new one. + +* Logical install + +4. Browse to the administrative notification screen and let the +install/update process run. The install process creates the Moodle +table needed for backing the indexed documents identities. + +5. Go to the block administration panel and setup once the Global Search +block. This will initialize useful parameters for the global search engine. + +6. Insert a new Global Search block somewhere in a course or top-level screen. + +7. Launch an empty search (you must be administrator). + +8. Go to the statistics screen. + +9. Activate indexation (indexersplash.php). Beware, if your Moodle has +a large amount of content, indexing process may be VERY LONG. + +To search, go back to the search block and try a query. + +Handled information for indexing +################################ + +In the actual state, the engine indexes the following information: + +- forum posts +- database records (using textual fields only) +- database comments +- glossary entries +- glossary comments on entries +- Moodle native resources +- physical MSWord files as resources (.doc) +- physical Powerpoint files as resources (.ppt) +- physical PDF files as resources +- physical text files as resources (.txt) +- physical html files as resources (.htm and .html) +- physical xml files as resources (.xml) +- wiki pages +- techproject descriptions +- char sessions + +Extensions +########## + +The reviewed search engine API allows: + +- indexing of blocks contents +- indexation of modules or blocks containing a complex information model +- securing the access to the results +- adding indexing handling adding a php calibrated script +- adding physical filetype handling adding a php calibrated script + +Future extensions +################# + +- Should be added more information to index such as forum and glossary attachements, so will other standard module contents. + +- extending the search capability to a mnet network information space. + + + diff --git a/search/add.php b/search/add.php index 285c9d948c8a7..6d45795507be4 100644 --- a/search/add.php +++ b/search/add.php @@ -1,103 +1,144 @@ dirroot/search/lib.php"); - - require_login(); - - if (empty($CFG->enableglobalsearch)) { - error('Global searching is not enabled.'); - } - - if (!isadmin()) { - error("You need to be an admin user to use this page.", "$CFG->wwwroot/login/index.php"); - } //if - - //check for php5 (lib.php) - if (!search_check_php5()) { +/** +* Global Search Engine for Moodle +* Michael Champanis (mchampan) [cynnical@gmail.com] +* review 1.8+ : Valery Fremaux [valery.fremaux@club-internet.fr] +* 2007/08/02 +* +* Asynchronous adder for new indexable contents +* +* Major chages in this review is passing the xxxx_db_names return to +* multiple arity to handle multiple document types modules +*/ + +require_once('../config.php'); +require_once("$CFG->dirroot/search/lib.php"); + +require_login(); + +if (empty($CFG->enableglobalsearch)) { + error(get_string('globalsearchdisabled', 'search')); +} + +if (!isadmin()) { + error(get_string('beadmin', 'search'), "$CFG->wwwroot/login/index.php"); +} + +//check for php5 (lib.php) +if (!search_check_php5()) { $phpversion = phpversion(); mtrace("Sorry, global search requires PHP 5.0.0 or later (currently using version $phpversion)"); exit(0); - } //if - - require_once("$CFG->dirroot/search/indexlib.php"); - - $index = new Zend_Search_Lucene(SEARCH_INDEX_PATH); - $dbcontrol = new IndexDBControl(); - $addition_count = 0; - - $indexdate = $CFG->search_indexer_run_date; - - mtrace('
Starting index update (additions)...'); - mtrace('Index size before: '.$CFG->search_index_size."\n"); - - //get all modules - if ($mods = get_records_select('modules')) { - //append virtual modules onto array - $mods = array_merge($mods, search_get_additional_modules()); - - foreach ($mods as $mod) { - //build include file and function names - $class_file = $CFG->dirroot.'/search/documents/'.$mod->name.'_document.php'; - $db_names_function = $mod->name.'_db_names'; - $get_document_function = $mod->name.'_single_document'; - $additions = array(); - - if (file_exists($class_file)) { - require_once($class_file); - - //if both required functions exist - if (function_exists($db_names_function) and function_exists($get_document_function)) { - mtrace("Checking $mod->name module for additions."); - $values = $db_names_function(); - $where = (isset($values[4])) ? $values[4] : ''; - - //select records in MODULE table, but not in SEARCH_DATABASE_TABLE - $sql = "select id, ".$values[0]." as docid from ".$values[1]. - " where id not in". - " (select docid from ".SEARCH_DATABASE_TABLE." where doctype like '$mod->name')". - " and ".$values[2]." > $indexdate". - " $where"; - - $records = get_records_sql($sql); - - //foreach record, build a module specific search document using the get_document function - if (is_array($records)) { - foreach($records as $record) { - $additions[] = $get_document_function($record->id); - } //foreach - } //if - - //foreach document, add it to the index and database table - foreach ($additions as $add) { - ++$addition_count; - - //object to insert into db - $dbid = $dbcontrol->addDocument($add); - - //synchronise db with index - $add->addField(Zend_Search_Lucene_Field::Keyword('dbid', $dbid)); - - mtrace(" Add: $add->title (database id = $add->dbid, moodle instance id = $add->docid)"); - - $index->addDocument($add); - } //foreach - - mtrace("Finished $mod->name.\n"); - } //if - } //if - } //foreach - } //if - - //commit changes - $index->commit(); - - //update index date and size - set_config("search_indexer_run_date", time()); - set_config("search_index_size", (int)$CFG->search_index_size + (int)$addition_count); - - //print some additional info - mtrace("Added $addition_count documents."); - mtrace('Index size after: '.$index->count().''); +} + +require_once("$CFG->dirroot/search/indexlib.php"); + +$index = new Zend_Search_Lucene(SEARCH_INDEX_PATH); +$dbcontrol = new IndexDBControl(); +$addition_count = 0; +$startindextime = time(); + +$indexdate = $CFG->search_indexer_run_date; + +mtrace('
Starting index update (additions)...'); +mtrace('Index size before: '.$CFG->search_index_size."\n"); + +//get all modules +if ($mods = get_records_select('modules')) { + +//append virtual modules onto array +$mods = array_merge($mods, search_get_additional_modules()); + foreach ($mods as $mod) { + //build include file and function names + $class_file = $CFG->dirroot.'/search/documents/'.$mod->name.'_document.php'; + $db_names_function = $mod->name.'_db_names'; + $get_document_function = $mod->name.'_single_document'; + $get_newrecords_function = $mod->name.'_new_records'; + $additions = array(); + + if (file_exists($class_file)) { + require_once($class_file); + + //if both required functions exist + if (function_exists($db_names_function) and function_exists($get_document_function)) { + mtrace("Checking $mod->name module for additions."); + $valuesArray = $db_names_function(); + if ($valuesArray){ + foreach($valuesArray as $values){ + $where = (isset($values[5])) ? 'AND ('.$values[5].')' : ''; + $itemtypes = ($values[4] != '*') ? " AND itemtype = '{$values[4]}' " : '' ; + + //select records in MODULE table, but not in SEARCH_DATABASE_TABLE + $table = SEARCH_DATABASE_TABLE; + $query = " + SELECT + docid, + itemtype + FROM + {$CFG->prefix}{$table} + WHERE + doctype = '{$mod->name}' + $itemtypes + "; + $docIds = get_records_sql_menu($query); + $docIdList = ($docIds) ? implode("','", array_keys($docIds)) : '' ; + + $query = " + SELECT id, + {$values[0]} as docid + FROM + {$CFG->prefix}{$values[1]} + WHERE + id NOT IN ('{$docIdList}') and + {$values[2]} > {$indexdate} + $where + "; + $records = get_records_sql($query); + + // foreach record, build a module specific search document using the get_document function + if (is_array($records)) { + foreach($records as $record) { + $add = $get_document_function($record->docid, $values[4]); + // some documents may not be indexable + if ($add) + $additions[] = $add; + } + } + } + + // foreach document, add it to the index and database table + foreach ($additions as $add) { + ++$addition_count; + + // object to insert into db + $dbid = $dbcontrol->addDocument($add); + + // synchronise db with index + $add->addField(Zend_Search_Lucene_Field::Keyword('dbid', $dbid)); + + mtrace(" Add: $add->title (database id = $add->dbid, moodle instance id = $add->docid)"); + + $index->addDocument($add); + } + } + else{ + mtrace("No types to add.\n"); + } + mtrace("Finished $mod->name.\n"); + } + } + } +} + +// commit changes +$index->commit(); + +// update index date and size +set_config("search_indexer_run_date", $startindextime); +set_config("search_index_size", (int)$CFG->search_index_size + (int)$addition_count); + +// print some additional info +mtrace("Added $addition_count documents."); +mtrace('Index size after: '.$index->count().''); ?> \ No newline at end of file diff --git a/search/delete.php b/search/delete.php index 8b86305e12901..3183e7ddb2f62 100644 --- a/search/delete.php +++ b/search/delete.php @@ -1,94 +1,132 @@ dirroot/search/lib.php"); - - require_login(); - - if (empty($CFG->enableglobalsearch)) { - error('Global searching is not enabled.'); - } - - if (!isadmin()) { - error("You need to be an admin user to use this page.", "$CFG->wwwroot/login/index.php"); - } //if - - //check for php5 (lib.php) - if (!search_check_php5()) { +/** +* Global Search Engine for Moodle +* Michael Champanis (mchampan) [cynnical@gmail.com] +* review 1.8+ : Valery Fremaux [valery.fremaux@club-internet.fr] +* 2007/08/02 +* +* Asynchronous index cleaner +* +* Major chages in this review is passing the xxxx_db_names return to +* multiple arity to handle multiple document types modules +*/ + +require_once('../config.php'); +require_once("$CFG->dirroot/search/lib.php"); + +require_login(); + +if (empty($CFG->enableglobalsearch)) { + error(get_string('globalsearchdisabled', 'search')); +} + +if (!isadmin()) { + error(get_string('beadmin', 'search'), "$CFG->wwwroot/login/index.php"); +} //if + +//check for php5 (lib.php) +if (!search_check_php5()) { $phpversion = phpversion(); mtrace("Sorry, global search requires PHP 5.0.0 or later (currently using version $phpversion)"); exit(0); - } //if - - require_once("$CFG->dirroot/search/indexlib.php"); - - $index = new Zend_Search_Lucene(SEARCH_INDEX_PATH); - $dbcontrol = new IndexDBControl(); - $deletion_count = 0; - - mtrace('
Starting clean-up of removed records...'); - mtrace('Index size before: '.$CFG->search_index_size."\n"); - - if ($mods = get_records_select('modules')) { - $mods = array_merge($mods, search_get_additional_modules()); - - foreach ($mods as $mod) { - //build function names - $class_file = $CFG->dirroot.'/search/documents/'.$mod->name.'_document.php'; - $delete_function = $mod->name.'_delete'; - $db_names_function = $mod->name.'_db_names'; - $deletions = array(); - - if (file_exists($class_file)) { - require_once($class_file); - - if (function_exists($delete_function) and function_exists($db_names_function)) { - mtrace("Checking $mod->name module for deletions."); - $values = $db_names_function(); - - $sql = "select id, docid from ".SEARCH_DATABASE_TABLE. - " where doctype like '$mod->name'". - " and docid not in". - " (select ".$values[0]." from ".$values[1].")"; - - $records = get_records_sql($sql); - - //build an array of all the deleted records - if (is_array($records)) { - foreach($records as $record) { - $deletions[] = $delete_function($record->docid); - } //foreach - } //if - - foreach ($deletions as $delete) { - //find the specific document in the index, using it's docid and doctype as keys - $doc = $index->find("+docid:$delete +doctype:$mod->name"); - - //get the record, should only be one - foreach ($doc as $thisdoc) { - ++$deletion_count; - mtrace(" Delete: $thisdoc->title (database id = $thisdoc->dbid, index id = $thisdoc->id, moodle instance id = $thisdoc->docid)"); - - //remove it from index and database table - $dbcontrol->delDocument($thisdoc); - $index->delete($thisdoc->id); - } //foreach - } //foreach - - mtrace("Finished $mod->name.\n"); - } //if - } //if - } //foreach - } //if - - //commit changes - $index->commit(); - - //update index date and index size - set_config("search_indexer_run_date", time()); - set_config("search_index_size", (int)$CFG->search_index_size - (int)$deletion_count); - - mtrace("Finished $deletion_count removals."); - mtrace('Index size after: '.$index->count().''); +} + +require_once("$CFG->dirroot/search/indexlib.php"); + +$index = new Zend_Search_Lucene(SEARCH_INDEX_PATH); +$dbcontrol = new IndexDBControl(); +$deletion_count = 0; +$startcleantime = time(); + +mtrace('
Starting clean-up of removed records...'); +mtrace('Index size before: '.$CFG->search_index_size."\n"); + +if ($mods = get_records_select('modules')) { + $mods = array_merge($mods, search_get_additional_modules()); + + foreach ($mods as $mod) { + //build function names + $class_file = $CFG->dirroot.'/search/documents/'.$mod->name.'_document.php'; + $delete_function = $mod->name.'_delete'; + $db_names_function = $mod->name.'_db_names'; + $deletions = array(); + + if (file_exists($class_file)) { + require_once($class_file); + + //if both required functions exist + if (function_exists($delete_function) and function_exists($db_names_function)) { + mtrace("Checking $mod->name module for deletions."); + $valuesArray = $db_names_function(); + if ($valuesArray){ + foreach($valuesArray as $values){ + $where = (isset($values[5])) ? 'WHERE '.$values[5] : ''; + $itemtypes = ($values[4] != '*') ? " itemtype = '{$values[4]}' AND " : '' ; + $query = " + SELECT + id, + {$values[0]} + FROM + {$CFG->prefix}{$values[1]} + $where + "; + $docIds = get_records_sql($query); + $docIdList = ($docIds) ? implode("','", array_keys($docIds)) : '' ; + + $table = SEARCH_DATABASE_TABLE; + $query = " + SELECT + id, + docid + FROM + {$CFG->prefix}{$table} + WHERE + doctype = '{$mod->name}' AND + $itemtypes + docid not in ('{$docIdList}') + "; + $records = get_records_sql($query); + + // build an array of all the deleted records + if (is_array($records)) { + foreach($records as $record) { + $deletions[] = $delete_function($record->docid, $values[4]); + } + } + } + + foreach ($deletions as $delete) { + // find the specific document in the index, using it's docid and doctype as keys + $doc = $index->find("+docid:{$delete->id} +doctype:$mod->name +itemtype:{$delete->itemtype}"); + + // get the record, should only be one + foreach ($doc as $thisdoc) { + ++$deletion_count; + mtrace(" Delete: $thisdoc->title (database id = $thisdoc->dbid, index id = $thisdoc->id, moodle instance id = $thisdoc->docid)"); + + //remove it from index and database table + $dbcontrol->delDocument($thisdoc); + $index->delete($thisdoc->id); + } + } + } + else{ + mtrace("No types to delete.\n"); + } + mtrace("Finished $mod->name.\n"); + } + } + } +} + +//commit changes +$index->commit(); + +//update index date and index size +set_config("search_indexer_cleanup_date", $startcleantime); +set_config("search_index_size", (int)$CFG->search_index_size - (int)$deletion_count); + +mtrace("Finished $deletion_count removals."); +mtrace('Index size after: '.$index->count().''); ?> \ No newline at end of file diff --git a/search/documents/chat_document.php b/search/documents/chat_document.php new file mode 100644 index 0000000000000..f3196d7da02a1 --- /dev/null +++ b/search/documents/chat_document.php @@ -0,0 +1,271 @@ +dirroot/search/documents/document.php"); +require_once("$CFG->dirroot/mod/chat/lib.php"); + +/* +* a class for representing searchable information +* +**/ +class ChatTrackSearchDocument extends SearchDocument { + + /** + * constructor + * + */ + public function __construct(&$chatsession, $chat_module_id, $course_id, $group_id, $context_id) { + // generic information; required + $doc->docid = $chat_module_id.'-'.$chatsession['sessionstart'].'-'.$chatsession['sessionend']; + $doc->documenttype = SEARCH_TYPE_CHAT; + $doc->itemtype = 'session'; + $doc->contextid = $context_id; + + $duration = $chatsession['sessionend'] - $chatsession['sessionstart']; + // we cannot call userdate with relevant locale at indexing time. + $doc->title = get_string('chatreport', 'chat').' '.get_string('openedon', 'search').' TT_'.$chatsession['sessionstart'].'_TT ('.get_string('duration', 'search').' : '.get_string('numseconds', '', $duration).')'; + $doc->date = $chatsession['sessionend']; + + //remove '(ip.ip.ip.ip)' from chat author list + $doc->author = preg_replace('/\(.*?\)/', '', $chatsession['authors']); + $doc->contents = $chatsession['content']; + $doc->url = chat_make_link($chat_module_id, $chatsession['sessionstart'], $chatsession['sessionend']); + + // module specific information; optional + $data->chat = $chat_module_id; + + // construct the parent class + parent::__construct($doc, $data, $course_id, $group_id, 0, PATH_FOR_SEARCH_TYPE_CHAT); + } //constructor +} //ChatTrackSearchDocument + + +/** +* constructs a valid link to a chat content +* @param cm_id the chat course module +* @param start the start time of the session +* @param end th end time of the session +* @return a well formed link to session display +*/ +function chat_make_link($cm_id, $start, $end) { + global $CFG; + + return $CFG->wwwroot.'/mod/chat/report.php?id='.$cm_id.'&start='.$start.'&end='.$end; +} //chat_make_link + +/** +* fetches all the records for a given session and assemble them as a unique track +* we revamped here the code of report.php for making sessions, but without any output. +* note that we should collect sessions "by groups" if groupmode() is SEPARATEGROUPS. +* @param chat_id the database +* @return an array of objects representing the chat sessions. +*/ +function chat_get_session_tracks($chat_id, $fromtime = 0, $totime = 0) { + global $CFG; + + $chat = get_record('chat', 'id', $chat_id); + $course = get_record('course', 'id', $chat->course); + $coursemodule = get_field('modules', 'id', 'name', 'data'); + $cm = get_record('course_modules', 'course', $course->id, 'module', $coursemodule, 'instance', $chat->id); + $groupmode = groupmode($course, $cm); + + $fromtimeclause = ($fromtime) ? "AND timestamp >= {$fromtime}" : ''; + $totimeclause = ($totime) ? "AND timestamp <= {$totime}" : ''; + $tracks = array(); + $messages = get_records_select('chat_messages', "chatid = '{$chat_id}' $fromtimeclause $totimeclause", "timestamp DESC"); + if ($messages){ + // splits discussions against groups + $groupedMessages = array(); + if ($groupmode != SEPARATEGROUPS){ + foreach($messages as $aMessage){ + $groupedMessages[$aMessage->groupid][] = $aMessage; + } + } + else{ + $groupedMessages[-1] = &$messages; + } + $sessiongap = 5 * 60; // 5 minutes silence means a new session + $sessionend = 0; + $sessionstart = 0; + $sessionusers = array(); + $lasttime = time(); + + foreach ($groupedMessages as $groupId => $messages) { // We are walking BACKWARDS through the messages + $messagesleft = count($messages); + foreach ($messages as $message) { // We are walking BACKWARDS through the messages + $messagesleft --; // Countdown + + if ($message->system) { + continue; + } + // we are within a session track + if ((($lasttime - $message->timestamp) < $sessiongap) and $messagesleft) { // Same session + if (count($tracks) > 0){ + if ($message->userid) { // Remember user and count messages + $tracks[count($tracks) - 1]->sessionusers[$message->userid] = $message->userid; + // update last track (if exists) record appending content (remember : we go backwards) + } + $tracks[count($tracks) - 1]->content .= ' '.$message->message; + $tracks[count($tracks) - 1]->sessionstart = $message->timestamp; + } + } + // we initiate a new session track (backwards) + else { + $track = new Object(); + $track->sessionend = $message->timestamp; + $track->sessionstart = $message->timestamp; + $track->content = $message->message; + // reset the accumulator of users + $track->sessionusers = array(); + $track->sessionusers[$message->userid] = $message->userid; + $track->groupid = $groupId; + $tracks[] = $track; + } + $lasttime = $message->timestamp; + } + } + } + return $tracks; +} //chat_get_session_tracks + +/** +* part of search engine API +* +*/ +function chat_iterator() { + $chatrooms = get_records('chat'); + return $chatrooms; +} //chat_iterator + +/** +* part of search engine API +* +*/ +function chat_get_content_for_index(&$chat) { + $documents = array(); + $course = get_record('course', 'id', $chat->course); + $coursemodule = get_field('modules', 'id', 'name', 'chat'); + $cm = get_record('course_modules', 'course', $course->id, 'module', $coursemodule, 'instance', $chat->id); + $context = get_context_instance(CONTEXT_MODULE, $cm->id); + + // getting records for indexing + $sessionTracks = chat_get_session_tracks($chat->id); + if ($sessionTracks){ + foreach($sessionTracks as $aTrackId => $aTrack) { + foreach($aTrack->sessionusers as $aUserId){ + $user = get_record('user', 'id', $aUserId); + $aTrack->authors = ($user) ? $user->firstname.' '.$user->lastname : '' ; + $documents[] = new ChatTrackSearchDocument(get_object_vars($aTrack), $cm->id, $chat->course, $aTrack->groupid, $context->id); + } + } + } + return $documents; +} //chat_get_content_for_index + +/** +* returns a single data search document based on a chat_session id +* chat session id is a text composite identifier made of : +* - the chat id +* - the timestamp when the session starts +* - the timestamp when the session ends +* @param id the multipart chat session id +* @param itemtype the type of information (session is the only type) +*/ +function chat_single_document($id, $itemtype) { + list($chat_id, $sessionstart, $sessionend) = split('-', $id); + $chat = get_record('chat', 'id', $chat_id); + $course = get_record('course', 'id', $chat->course); + $coursemodule = get_field('modules', 'id', 'name', 'chat'); + $cm = get_record('course_modules', 'course', $course->id, 'module', $coursemodule, 'instance', $chat->id); + $context = get_context_instance(CONTEXT_MODULE, $cm->id); + + // should be only one + $tracks = chat_get_session_tracks($chat->id, $sessionstart, $sessionstart); + if ($tracks){ + $aTrack = $tracks[0]; + $documents[] = new ChatTrackSearchDocument(get_object_vars($aTrack), $cm->id, $chat->course, $aTrack->groupid, $context->id); + } +} //chat_single_document + +/** +* dummy delete function that packs id with itemtype. +* this was here for a reason, but I can't remember it at the moment. +* +*/ +function chat_delete($info, $itemtype) { + $object->id = $info; + $object->itemtype = $itemtype; + return $object; +} //chat_delete + +/** +* returns the var names needed to build a sql query for addition/deletions +* // TODO chat indexable records are virtual. Should proceed in a special way +*/ +function chat_db_names() { + //[primary id], [table name], [time created field name], [time modified field name] + return null; +} //chat_db_names + +/** +* this function handles the access policy to contents indexed as searchable documents. If this +* function does not exist, the search engine assumes access is allowed. +* When this point is reached, we already know that : +* - user is legitimate in the surrounding context +* - user may be guest and guest access is allowed to the module +* - the function may perform local checks within the module information logic +* @param path the access path to the module script code +* @param itemtype the information subclassing (usefull for complex modules, defaults to 'standard') +* @param this_id the item id within the information class denoted by entry_type. In chats, this id +* points out a session history which is a close sequence of messages. +* @param user the user record denoting the user who searches +* @param group_id the current group used by the user when searching +* @return true if access is allowed, false elsewhere +*/ +function chat_check_text_access($path, $itemtype, $this_id, $user, $group_id, $context_id){ + global $CFG; + + include_once("{$CFG->dirroot}/{$path}/lib.php"); + + list($chat_id, $sessionstart, $sessionend) = split('-', $id); + + // get the chat session and all related stuff + $chat = get_record('chat', 'id', $chat_id); + $course = get_record('course', 'id', $chat->course); + $module_context = get_record('context', 'id', $context_id); + $cm = get_record('course_modules', 'id', $module_context->instanceid); + if (!$cm->visible and !has_capability('moodle/course:viewhiddenactivities', $module_context)) return false; + + //group consistency check : checks the following situations about groups + // trap if user is not same group and groups are separated + $current_group = get_current_group($course->id); + if ((groupmode($course) == SEPARATEGROUPS) && !ismember($group_id) && !has_capability('moodle/site:accessallgroups', $module_context)) return false; + + //ownership check : checks the following situations about user + // trap if user is not owner and has cannot see other's entries + // TODO : typically may be stored into indexing cache + if (!has_capability('mod/chat:readlog', $module_context)) return false; + + return true; +} //chat_check_text_access + +/** +* this call back is called when displaying the link for some last post processing +* +*/ +function chat_link_post_processing($title){ + setLocale(LC_TIME, substr(current_language(), 0, 2)); + $title = preg_replace('/TT_(.*)_TT/e', "userdate(\\1)", $title); + return $title; +} //chat_link_post_processing +?> \ No newline at end of file diff --git a/search/documents/data_document.php b/search/documents/data_document.php new file mode 100644 index 0000000000000..afb52901fef94 --- /dev/null +++ b/search/documents/data_document.php @@ -0,0 +1,370 @@ +dirroot/search/documents/document.php"); +require_once("$CFG->dirroot/mod/data/lib.php"); + +/* +* a class for representing searchable information (data records) +* +**/ +class DataSearchDocument extends SearchDocument { + + /** + * constructor + * + */ + public function __construct(&$record, $course_id, $context_id) { + // generic information; required + $doc->docid = $record['id']; + $doc->documenttype = SEARCH_TYPE_DATA; + $doc->itemtype = 'record'; + $doc->contextid = $context_id; + + $doc->title = $record['title']; + $doc->date = $record['timemodified']; + //remove '(ip.ip.ip.ip)' from data record author field + if ($record['userid']){ + $user = get_record('user', 'id', $record['userid']); + } + $doc->author = (isset($user)) ? $user->firstname.' '.$user->lastname : '' ; + $doc->contents = $record['content']; + $doc->url = data_make_link($record['dataid'], $record['id']); + + // module specific information; optional + // $data->params = serialize(@$record['params']); may be useful + $data->database = $record['dataid']; + + // construct the parent class + parent::__construct($doc, $data, $course_id, $record['groupid'], $record['userid'], PATH_FOR_SEARCH_TYPE_DATA); + } //constructor +} //ChatSearchDocument + +/* +* a class for representing searchable information (comments on data records) +* +**/ +class DataCommentSearchDocument extends SearchDocument { + + /** + * constructor + * + */ + public function __construct(&$comment, $course_id, $context_id) { + // generic information; required + $doc->docid = $comment['id']; + $doc->documenttype = SEARCH_TYPE_DATA; + $doc->itemtype = 'comment'; + $doc->contextid = $context_id; + + $doc->title = get_string('commenton', 'search').' '.$comment['title']; + $doc->date = $comment['modified']; + //remove '(ip.ip.ip.ip)' from data record author field + $doc->author = preg_replace('/\(.*?\)/', '', $comment['author']); + $doc->contents = $comment['content']; + $doc->url = data_make_link($data_id, $comment['recordid']); + + // module specific information; optional + $data->database = $comment['dataid']; + + // construct the parent class + parent::__construct($doc, $data, $course_id, $comment['groupid'], $comment['userid'], PATH_FOR_SEARCH_TYPE_DATA); + } //constructor +} //ChatCommentSearchDocument + +/** +* constructs a valid link to a data record content +* @param database_id the database reference +* @param record_id the record reference +* @return a valid url top access the information as a string +*/ +function data_make_link($database_id, $record_id) { + global $CFG; + + return $CFG->wwwroot.'/mod/data/view.php?d='.$database_id.'&rid='.$record_id; +} //data_make_link + +/** +* fetches all the records for a given database +* @param database_id the database +* @param typematch a comma separated list of types that should be considered for searching or * +* @return an array of objects representing the data records. +*/ +function data_get_records($database_id, $typematch = '*') { + global $CFG; + + $fieldset = get_records('data_fields', 'dataid', $database_id); + $query = " + SELECT + c.* + FROM + {$CFG->prefix}data_content as c, + {$CFG->prefix}data_records as r + WHERE + c.recordid = r.id AND + r.dataid = {$database_id} + ORDER BY + c.fieldid + "; + $data = get_records_sql($query); + $records = array(); + if ($data){ + foreach($data as $aDatum){ + if($typematch == '*' || preg_match("/\\b{$fieldset[$aDatum->fieldid]->type}\\b/", $typematch)){ + if (!isset($records[$aDatum->recordid])){ + $records[$aDatum->recordid]['_first'] = $aDatum->content.' '.$aDatum->content1.' '.$aDatum->content2.' '.$aDatum->content3.' '.$aDatum->content4.' '; + } + else{ + $records[$aDatum->recordid][$fieldset[$aDatum->fieldid]->name] = $aDatum->content.' '.$aDatum->content1.' '.$aDatum->content2.' '.$aDatum->content3.' '.$aDatum->content4.' '; + } + } + } + } + return $records; +} //data_get_records + +/** +* fetches all the comments for a given database +* @param database_id the database +* @return an array of objects representing the data record comments. +*/ +function data_get_comments($database_id) { + global $CFG; + + $query = " + SELECT + c.id, + r.groupid, + c.userid, + c.recordid, + c.content, + c.created, + c.modified, + r.dataid + FROM + {$CFG->prefix}data_comments as c, + {$CFG->prefix}data_records as r + WHERE + c.recordid = r.id + "; + $comments = get_records_sql($query); + return $comments; +} //data_get_comments + + +/** +* part of search engine API +* +*/ +function data_iterator() { + $databases = get_records('data'); + return $databases; +} //data_iterator + +/** +* part of search engine API +* @param database the database instance +* @return an array of searchable documents +*/ +function data_get_content_for_index(&$database) { + + $documents = array(); + $recordTitles = array(); + $coursemodule = get_field('modules', 'id', 'name', 'data'); + $cm = get_record('course_modules', 'course', $database->course, 'module', $coursemodule, 'instance', $database->id); + $context = get_context_instance(CONTEXT_MODULE, $cm->id); + + // getting records for indexing + $records_content = data_get_records($database->id, 'text'); + if ($records_content){ + foreach(array_keys($records_content) as $aRecordId) { + + // extract title as first record in order + $first = $records_content[$aRecordId]['_first']; + unset($records_content[$aRecordId]['_first']); + + // concatenates all other texts + foreach($records_content[$aRecordId] as $aField){ + $content = @$content.' '.$aField; + } + if (strlen($content) > 0) { + unset($recordMetaData); + $recordMetaData = get_record('data_records', 'id', $aRecordId); + $recordMetaData->title = $first; + $recordTitles[$aRecordId] = $first; + $recordMetaData->content = $content; + $documents[] = new DataSearchDocument(get_object_vars($recordMetaData), $database->course, $context->id); + } + } + } + + // getting comments for indexing + $records_comments = data_get_comments($database->id); + if ($records_comments){ + foreach($records_comments as $aComment){ + $aComment->title = $recordsTitle[$aComment->recordid]; + $documents[] = new DataCommentSearchDocument(get_object_vars($aComment), $database->course, $context->id); + } + } + return $documents; +} //data_get_content_for_index + +/** +* returns a single data search document based on a data entry id +* @param id the id of the record +* @param the type of the information +* @return a single searchable document +*/ +function data_single_document($id, $itemtype) { + + if ($itemtype == 'record'){ + // get main record + $recordMetaData = get_record('data_records', 'id', $id); + // get context + $record_course = get_field('data', 'course', 'id', $recordMetaData->dataid); + $coursemodule = get_field('modules', 'id', 'name', 'data'); + $cm = get_record('course_modules', 'course', $record_course, 'module', $coursemodule, 'instance', $recordMetaData->dataid); + $context = get_context_instance(CONTEXT_MODULE, $cm->id); + // compute text + $recordData = get_records_select('data_content', "recordid = $id AND type = 'text'", 'recordid'); + $accumulator = ''; + if ($recordData){ + $first = $recordData[0]; + if (count($recordData) > 1){ + $others = array_splice($recordData, 0, 1); + foreach($others as $aDatum){ + $accumulator .= $data->content.' '.$data->content1.' '.$data->content2.' '.$data->content3.' '.$data->content4.' '; + } + } + } + // add extra fields + $recordMetaData->title = $first; + $recordMetaData->content = $accumulator; + // make document + $documents[] = new DataSearchDocument(get_object_vars($recordMetaData), $record_course, $context->id); + } + elseif($itemtype == 'comment'){ + // get main records + $comment = get_record('data_comments', 'id', $id); + $record = get_record('data_records', 'id', $comment->recordid); + // get context + $record_course = get_field('data', 'course', 'id', $record->dataid); + $coursemodule = get_field('modules', 'id', 'name', 'data'); + $cm = get_record('course_modules', 'course', $record_course, 'module', $coursemodule, 'instance', $recordMetaData->dataid); + $context = get_context_instance(CONTEXT_MODULE, $cm->id); + // add extra fields + $comment->title = get_field('search_document', 'title', 'docid', $record->id, 'itemtype', 'record'); + $comment->dataid = $record->dataid; + $comment->groupid = $record->groupid; + // make document + $documents[] = new DataCommentSearchDocument(get_object_vars($comment), $record_course, $context->id); + } + else{ + mtrace('Error : bad or missing item type'); + } +} //data_single_document + +/** +* dummy delete function that packs id with itemtype. +* this was here for a reason, but I can't remember it at the moment. +* +*/ +function data_delete($info, $itemtype) { + $object->id = $info; + $object->itemtype = $itemtype; + return $object; +} //data_delete + +/** +* returns the var names needed to build a sql query for addition/deletions +* +*/ +function data_db_names() { + //[primary id], [table name], [time created field name], [time modified field name] + return array( + array('id', 'data_records', 'timecreated', 'timemodified', 'record'), + array('id', 'data_comments', 'created', 'modified', 'comment') + ); +} //data_db_names + +/** +* this function handles the access policy to contents indexed as searchable documents. If this +* function does not exist, the search engine assumes access is allowed. +* When this point is reached, we already know that : +* - user is legitimate in the surrounding context +* - user may be guest and guest access is allowed to the module +* - the function may perform local checks within the module information logic +* @param path the access path to the module script code +* @param itemtype the information subclassing (usefull for complex modules, defaults to 'standard') +* @param this_id the item id within the information class denoted by itemtype. In databases, this id +* points out an indexed data record page. +* @param user the user record denoting the user who searches +* @param group_id the current group used by the user when searching +* @return true if access is allowed, false elsewhere +*/ +function data_check_text_access($path, $itemtype, $this_id, $user, $group_id, $context_id){ + global $CFG; + + // get the database object and all related stuff + if ($itemtype == 'record'){ + $record = get_record('data_records', 'id', $this_id); + } + elseif($itemtype == 'comment'){ + $comment = get_record('data_comments', 'id', $this_id); + $record = get_record('data_records', 'id', $comment->recordid); + } + else{ + // we do not know what type of information is required + return false; + } + $data = get_record('data', 'id', $record->dataid); + $course = get_record('course', 'id', $data->course); + $module_context = get_record('context', 'id', $context_id); + $cm = get_record('course_modules', 'id', $module_context->instance); + if (!$cm->visible and !has_capability('moodle/course:viewhiddenactivities', $module_context)) return false; + + //group consistency check : checks the following situations about groups + // trap if user is not same group and groups are separated + $current_group = get_current_group($course->id); + if ((groupmode($course) == SEPARATEGROUPS) && !ismember($group_id) && !has_capability('moodle/site:accessallgroups', $module_context)) return false; + + //ownership check : checks the following situations about user + // trap if user is not owner and has cannot see other's entries + if ($itemtype == 'record'){ + if ($user->id != $record->userid && !has_capability('mod/data:viewentry', $module_context) && !has_capability('mod/data:manageentries', $module_context)) return false; + } + + //approval check + // trap if unapproved and has not approval capabilities + // TODO : report a potential capability lack of : mod/data:approve + $approval = get_field('data_records', 'approved', 'id', $record->id); + if (!$approval && !isteacher($data->course) && !has_capability('mod/data:manageentries', $module_context)) return false; + + //minimum records to view check + // trap if too few records + // TODO : report a potential capability lack of : mod/data:viewhiddenentries + $recordsAmount = count_records('data_records', 'dataid', $data->id); + if ($data->requiredentriestoview > $recordsAmount && !isteacher($data->course) && !has_capability('mod/data:manageentries', $module_context)) return false; + + //opening periods check + // trap if user has not capability to see hidden records and date is out of opening range + // TODO : report a potential capability lack of : mod/data:viewhiddenentries + $now = usertime(time()); + if ($data->timeviewfrom > 0) + if ($now < $data->timeviewfrom && !isteacher($data->course) && !has_capability('mod/data:manageentries', $module_context)) return false; + if ($data->timeviewto > 0) + if ($now > $data->timeviewto && !isteacher($data->course) && !has_capability('mod/data:manageentries', $module_context)) return false; + + return true; +} // data_check_text_access +?> \ No newline at end of file diff --git a/search/documents/document.php b/search/documents/document.php index 466302fdc70b4..205a2ac18a2f9 100644 --- a/search/documents/document.php +++ b/search/documents/document.php @@ -1,24 +1,58 @@ addField(Zend_Search_Lucene_Field::Keyword('docid', $doc->docid)); - $this->addField(Zend_Search_Lucene_Field::Text('title', $doc->title)); - $this->addField(Zend_Search_Lucene_Field::Text('author', $doc->author)); - $this->addField(Zend_Search_Lucene_Field::UnStored('contents', $doc->contents)); - $this->addField(Zend_Search_Lucene_Field::UnIndexed('url', $doc->url)); - $this->addField(Zend_Search_Lucene_Field::UnIndexed('date', $doc->date)); - - //additional data added on a per-module basis - $this->addField(Zend_Search_Lucene_Field::Binary('data', serialize($data))); - - $this->addField(Zend_Search_Lucene_Field::Keyword('doctype', $document_type)); - $this->addField(Zend_Search_Lucene_Field::Keyword('course_id', $course_id)); - $this->addField(Zend_Search_Lucene_Field::Keyword('group_id', $group_id)); +abstract class SearchDocument extends Zend_Search_Lucene_Document { + public function __construct(&$doc, &$data, $course_id, $group_id, $user_id, $path) { + //document identification and indexing + $this->addField(Zend_Search_Lucene_Field::Keyword('docid', $doc->docid)); + //document type : the name of the Moodle element that manages it + $this->addField(Zend_Search_Lucene_Field::Keyword('doctype', $doc->documenttype)); + //allows subclassing information from complex modules. + $this->addField(Zend_Search_Lucene_Field::Keyword('itemtype', $doc->itemtype)); + //caches the course context. + $this->addField(Zend_Search_Lucene_Field::Keyword('course_id', $course_id)); + //caches the originator's group. + $this->addField(Zend_Search_Lucene_Field::Keyword('group_id', $group_id)); + //caches the originator if any + $this->addField(Zend_Search_Lucene_Field::Keyword('user_id', $user_id)); + // caches the context of this information. i-e, the context in which this information + // is being produced/attached. Speeds up the "check for access" process as context in + // which the information resides (a course, a module, a block, the site) is stable. + $this->addField(Zend_Search_Lucene_Field::UnIndexed('context_id', $doc->contextid)); + + //data for document + $this->addField(Zend_Search_Lucene_Field::Text('title', $doc->title)); + $this->addField(Zend_Search_Lucene_Field::Text('author', $doc->author)); + $this->addField(Zend_Search_Lucene_Field::UnStored('contents', $doc->contents)); + $this->addField(Zend_Search_Lucene_Field::UnIndexed('url', $doc->url)); + $this->addField(Zend_Search_Lucene_Field::UnIndexed('date', $doc->date)); + + //additional data added on a per-module basis + $this->addField(Zend_Search_Lucene_Field::Binary('data', serialize($data))); + + // adding a path allows the document to know where to find specific library calls + // for checking access to a module or block content. The Lucene records should only + // be responsible to bring back to that call sufficient and consistent information + // in order to perform the check. + $this->addField(Zend_Search_Lucene_Field::UnIndexed('path', $path)); + /* + // adding a capability set required for viewing. -1 if no capability required. + // the capability required for viewing is depending on the local situation + // of the document. each module should provide this information when pushing + // out search document structure. Although capability model should be kept flat + // there is no exclusion some module or block developpers use logical combinations + // of multiple capabilities in their code. This possibility should be left open here. + $this->addField(Zend_Search_Lucene_Field::UnIndexed('capabilities', $caps)); + */ } //constructor - } //SearchDocument +} //SearchDocument ?> \ No newline at end of file diff --git a/search/documents/forum_document.php b/search/documents/forum_document.php index e447a53f44c21..a3b13d40a9277 100644 --- a/search/documents/forum_document.php +++ b/search/documents/forum_document.php @@ -1,135 +1,269 @@ dirroot/search/documents/document.php"); +require_once("$CFG->dirroot/mod/forum/lib.php"); - require_once("$CFG->dirroot/search/documents/document.php"); - require_once("$CFG->dirroot/mod/forum/lib.php"); +/* +* a class for representing searchable information +* +**/ +class ForumSearchDocument extends SearchDocument { - class ForumSearchDocument extends SearchDocument { - public function __construct(&$post, $forum_id, $course_id, $group_id) { - // generic information - $doc->docid = $post['id']; - $doc->title = $post['subject']; - $doc->author = $post['firstname']." ".$post['lastname']; - $doc->contents = $post['message']; - $doc->date = $post['created']; + /** + * constructor + * + */ + public function __construct(&$post, $forum_id, $course_id, $itemtype, $context_id) { + // generic information + $doc->docid = $post['id']; + $doc->documenttype = SEARCH_TYPE_FORUM; + $doc->itemtype = $itemtype; + $doc->contextid = $context_id; - $doc->url = forum_make_link($post['discussion'], $post['id']); - - // module specific information - $data->forum = $forum_id; - $data->discussion = $post['discussion']; - - parent::__construct($doc, $data, SEARCH_TYPE_FORUM, $course_id, $group_id); + $doc->title = $post['subject']; + $doc->author = $post['firstname']." ".$post['lastname']; + $doc->contents = $post['message']; + $doc->date = $post['created']; + $doc->url = forum_make_link($post['discussion'], $post['id']); + + // module specific information + $data->forum = $forum_id; + $data->discussion = $post['discussion']; + + parent::__construct($doc, $data, $course_id, $post['groupid'], $post['userid'], PATH_FOR_SEARCH_TYPE_FORUM); } //constructor - } //ForumSearchDocument +} //ForumSearchDocument - function forum_make_link($discussion_id, $post_id) { +/** +* constructs a valid link to a chat content +* @param discussion_id the discussion +* @param post_id the id of a single post +* @return a well formed link to forum message display +*/ +function forum_make_link($discussion_id, $post_id) { global $CFG; + return $CFG->wwwroot.'/mod/forum/discuss.php?d='.$discussion_id.'#'.$post_id; - } //forum_make_link - - function forum_iterator() { - //no @ = Undefined index: 82 in moodle/lib/datalib.php on line 2671 - return @get_all_instances_in_courses("forum", get_courses()); - } //forum_iterator - - function forum_get_content_for_index(&$forum) { - $documents = array(); - if (!$forum) return $documents; - - $posts = forum_get_discussions_fast($forum->id); - if (!$posts) return $documents; - - while (!$posts->EOF) { - $post = $posts->fields; - - if (is_array($post)) { - if (strlen($post['message']) > 0 && ($post['deleted'] != 1)) { - $documents[] = new ForumSearchDocument($post, $forum->id, $forum->course, $post['groupid']); - } //if +} //forum_make_link - if ($children = forum_get_child_posts_fast($post['id'], $forum->id)) { - while (!$children->EOF) { - $child = $children->fields; +/** +* search standard API +* +*/ +function forum_iterator() { + $forums = get_records('forum'); + return $forums; +} //forum_iterator - if (strlen($child['message']) > 0 && ($child['deleted'] != 1)) { - $documents[] = new ForumSearchDocument($child, $forum->id, $forum->course, $post['groupid']); - } //if +/** +* search standard API +* @param forum a forum instance +* @return an array of searchable documents +*/ +function forum_get_content_for_index(&$forum) { - $children->MoveNext(); - } //foreach - } //if - } //if + $documents = array(); + if (!$forum) return $documents; - $posts->MoveNext(); - } //foreach + $posts = forum_get_discussions_fast($forum->id); + if (!$posts) return $documents; - return $documents; - } //forum_get_content_for_index + $coursemodule = get_field('modules', 'id', 'name', 'forum'); + $cm = get_record('course_modules', 'course', $forum->course, 'module', $coursemodule, 'instance', $forum->id); + $context = get_context_instance(CONTEXT_MODULE, $cm->id); - //returns a single forum search document based on a forum_entry id - function forum_single_document($id) { - $posts = get_recordset('forum_posts', 'id', $id); - $post = $posts->fields; + foreach($posts as $aPost) { + $aPost->itemtype = 'head'; + if ($aPost) { + if (strlen($aPost->message) > 0) { + $documents[] = new ForumSearchDocument(get_object_vars($aPost), $forum->id, $forum->course, 'head', $context->id); + } + if ($children = forum_get_child_posts_fast($aPost->id, $forum->id)) { + foreach($children as $aChild) { + $aChild->itemtype = 'post'; + if (strlen($aChild->message) > 0) { + $documents[] = new ForumSearchDocument(get_object_vars($child), $forum->id, $forum->course, 'post', $context->id); + } + } + } + } + } + return $documents; +} //forum_get_content_for_index - $discussions = get_recordset('forum_discussions', 'id', $post['discussion']); - $discussion = $discussions->fields; +/** +* returns a single forum search document based on a forum entry id +* @param id an id for a single information stub +* @param itemtype the type of information +*/ +function forum_single_document($id, $itemtype) { - $forums = get_recordset('forum', 'id', $discussion['forum']); - $forum = $forums->fields; + // both known item types are posts so get them the same way + $post = get_record('forum_posts', 'id', $id); + $discussion = get_record('forum_discussions', 'id', $post->discussion); + $coursemodule = get_field('modules', 'id', 'name', 'forum'); + $cm = get_record('course_modules', 'course', $discussion->course, 'module', $coursemodule, 'instance', $discussion->forum); + $context = get_context_instance(CONTEXT_MODULE, $cm->id); + return new ForumSearchDocument(get_object_vars($post), $discussion->forum, $discussion->course, $itemtype, $context->id); +} //forum_single_document - return new ForumSearchDocument($post, $forum['id'], $forum['course'], $post['groupid']); - } //forum_single_document +/** +* dummy delete function that aggregates id with itemtype. +* this was here for a reason, but I can't remember it at the moment. +* +*/ +function forum_delete($info, $itemtype) { + $object->id = $info; + $object->itemtype = $itemtype; + return $object; +} //forum_delete - function forum_delete($info) { - return $info; - } //forum_delete - - //returns the var names needed to build a sql query for addition/deletions - function forum_db_names() { +/** +* returns the var names needed to build a sql query for addition/deletions +* +*/ +function forum_db_names() { //[primary id], [table name], [time created field name], [time modified field name] - return array('id', 'forum_posts', 'created', 'modified'); - } //forum_db_names + return array( + array('id', 'forum_posts', 'created', 'modified', 'head', 'parent = 0'), + array('id', 'forum_posts', 'created', 'modified', 'post', 'parent != 0') + ); +} //forum_db_names - //reworked faster version from /mod/forum/lib.php - function forum_get_discussions_fast($forum) { +/** +* reworked faster version from /mod/forum/lib.php +* @param forum_id a forum identifier +* @return an array of posts +*/ +function forum_get_discussions_fast($forum_id) { global $CFG, $USER; - + $timelimit=''; - if (!empty($CFG->forum_enabletimedposts)) { - if (!((isadmin() and !empty($CFG->admineditalways)) || isteacher(get_field('forum', 'course', 'id', $forum)))) { - $now = time(); - $timelimit = " AND ((d.timestart = 0 OR d.timestart <= '$now') AND (d.timeend = 0 OR d.timeend > '$now')"; - if (!empty($USER->id)) { - $timelimit .= " OR d.userid = '$USER->id'"; + if (!((isadmin() and !empty($CFG->admineditalways)) || isteacher(get_field('forum', 'course', 'id', $forum_id)))) { + $now = time(); + $timelimit = " AND ((d.timestart = 0 OR d.timestart <= '$now') AND (d.timeend = 0 OR d.timeend > '$now')"; + if (!empty($USER->id)) { + $timelimit .= " OR d.userid = '$USER->id'"; + } + $timelimit .= ')'; } - $timelimit .= ')'; - } } + + $query = " + SELECT + p.id, + p.subject, + p.discussion, + p.message, + p.created, + d.groupid, + p.userid, + u.firstname, + u.lastname + FROM + {$CFG->prefix}forum_discussions d + JOIN + {$CFG->prefix}forum_posts p + ON + p.discussion = d.id + JOIN + {$CFG->prefix}user u + ON + p.userid = u.id + WHERE + d.forum = '{$forum_id}' AND + p.parent = 0 + $timelimit + ORDER BY + d.timemodified DESC + "; + return get_records_sql($query); +} //forum_get_discussions_fast - return get_recordset_sql("SELECT p.id, p.subject, p.discussion, p.message, - p.deleted, d.groupid, u.firstname, u.lastname - FROM {$CFG->prefix}forum_discussions d - JOIN {$CFG->prefix}forum_posts p ON p.discussion = d.id - JOIN {$CFG->prefix}user u ON p.userid = u.id - WHERE d.forum = '$forum' - AND p.parent = 0 - $timelimit - ORDER BY d.timemodified DESC"); - } //forum_get_discussions_fast - - //reworked faster version from /mod/forum/lib.php - function forum_get_child_posts_fast($parent, $forumid) { +/** +* reworked faster version from /mod/forum/lib.php +* @param parent the id of the first post within the discussion +* @param forum_id the forum identifier +* @return an array of posts +*/ +function forum_get_child_posts_fast($parent, $forum_id) { global $CFG; + + $query = " + SELECT + p.id, + p.subject, + p.discussion, + p.message, + p.created, + {$forum_id} AS forum, + p.userid, + u.firstname, + u.lastname + FROM + {$CFG->prefix}forum_posts p + LEFT JOIN + {$CFG->prefix}user u + ON + p.userid = u.id + WHERE + p.parent = '{$parent}' + ORDER BY + p.created ASC + "; + return get_records_sql($query); +} //forum_get_child_posts_fast + +/** +* this function handles the access policy to contents indexed as searchable documents. If this +* function does not exist, the search engine assumes access is allowed. +* When this point is reached, we already know that : +* - user is legitimate in the surrounding context +* - user may be guest and guest access is allowed to the module +* - the function may perform local checks within the module information logic +* @param path the access path to the module script code +* @param itemtype the information subclassing (usefull for complex modules, defaults to 'standard') +* @param this_id the item id within the information class denoted by itemtype. In forums, this id +* points out the individual post. +* @param user the user record denoting the user who searches +* @param group_id the current group used by the user when searching +* @return true if access is allowed, false elsewhere +*/ +function forum_check_text_access($path, $itemtype, $this_id, $user, $group_id){ + global $CFG; + + include_once("{$CFG->dirroot}/{$path}/lib.php"); + + // get the glossary object and all related stuff + $post = get_record('forum_posts', 'id', $this_id); + $dicussion = get_record('forum_discussion', 'id', $post->discussion); + $course = get_record('course', 'id', $discussion->course); + $context_module = get_record('context', 'id', $context_id); + $cm = get_record('course_modules', 'id', $context_module->instanceid); + if (!$cm->visible and !has_capability('moodle/course:viewhiddenactivities', $context_module)) return false; + + // approval check : entries should be approved for being viewed, or belongs to the user + if (!$post->mailed && !has_capability('mod/forum:viewhiddentimeposts')) return false; - return get_recordset_sql("SELECT p.id, p.subject, p.discussion, p.message, p.deleted, - $forumid AS forum, u.firstname, u.lastname - FROM {$CFG->prefix}forum_posts p - LEFT JOIN {$CFG->prefix}user u ON p.userid = u.id - WHERE p.parent = '$parent' - ORDER BY p.created ASC"); - } //forum_get_child_posts_fast + // group check : entries should be in accessible groups + $current_group = get_current_group($course->id); + if ((groupmode($course, $cm) == SEPARATEGROUPS) && ($group_id != $current_group) && !has_capability('mod/forum:viewdiscussionsfromallgroups')) return false; + + return true; +} //forum_check_text_access ?> \ No newline at end of file diff --git a/search/documents/glossary_document.php b/search/documents/glossary_document.php index bfa692544be5a..da9e5e78fad2c 100644 --- a/search/documents/glossary_document.php +++ b/search/documents/glossary_document.php @@ -1,88 +1,235 @@ dirroot/search/documents/document.php"); - - class GlossarySearchDocument extends SearchDocument { - public function __construct(&$entry, $glossary_id, $course_id, $group_id) { - // generic information; required - $doc->docid = $entry['id']; - $doc->title = $entry['concept']; - $doc->date = $entry['timecreated']; - - $user = get_recordset('user', 'id', $entry['userid'])->fields; - - $doc->author = $user['firstname'].' '.$user['lastname']; - $doc->contents = $entry['definition']; - $doc->url = glossary_make_link($entry['id']); - - // module specific information; optional - $data->glossary = $glossary_id; - - // construct the parent class - parent::__construct($doc, $data, SEARCH_TYPE_GLOSSARY, $course_id, $group_id); +/** +* Global Search Engine for Moodle +* Michael Champanis (mchampan) [cynnical@gmail.com] +* review 1.8+ : Valery Fremaux [valery.fremaux@club-internet.fr] +* 2007/08/02 +* +* document handling for glossary activity module +* This file contains a mapping between a glossary entry and it's indexable counterpart, +* +* Functions for iterating and retrieving the necessary records are now also included +* in this file, rather than mod/glossary/lib.php +**/ + +require_once("$CFG->dirroot/search/documents/document.php"); + +/* +* a class for representing searchable information +* +**/ +class GlossarySearchDocument extends SearchDocument { + + /** + * document constructor + * + */ + public function __construct(&$entry, $course_id, $context_id) { + // generic information; required + $doc->docid = $entry['id']; + $doc->documenttype = SEARCH_TYPE_GLOSSARY; + $doc->itemtype = 'standard'; + $doc->contextid = $context_id; + + $doc->title = $entry['concept']; + $doc->date = $entry['timecreated']; + + if ($entry['userid']) + $user = get_record('user', 'id', $entry['userid']); + $doc->author = ($user ) ? $user->firstname.' '.$user->lastname : '' ; + $doc->contents = strip_tags($entry['definition']); + $doc->url = glossary_make_link($entry['id']); + + // module specific information; optional + $data->glossary = $entry['glossaryid']; + + // construct the parent class + parent::__construct($doc, $data, $course_id, -1, $entry['userid'], PATH_FOR_SEARCH_TYPE_GLOSSARY); } //constructor - } //GlossarySearchDocument - - function glossary_make_link($entry_id) { +} //GlossarySearchDocument + +/* +* a class for representing searchable information +* +**/ +class GlossaryCommentSearchDocument extends SearchDocument { + + /** + * document constructor + * + */ + public function __construct(&$entry, $glossary_id, $course_id, $context_id) { + // generic information; required + $doc->docid = $entry['id']; + $doc->documenttype = SEARCH_TYPE_GLOSSARY; + $doc->itemtype = 'comment'; + $doc->contextid = $context_id; + + $doc->title = get_string('commenton', 'search') . ' ' . $entry['concept']; + $doc->date = $entry['timemodified']; + + if ($entry['userid']) + $user = get_record('user', 'id', $entry['userid']); + $doc->author = ($user ) ? $user->firstname.' '.$user->lastname : '' ; + $doc->contents = strip_tags($entry['entrycomment']); + $doc->url = glossary_make_link($entry['entryid']); + + // module specific information; optional + $data->glossary = $glossary_id; + + // construct the parent class + parent::__construct($doc, $data, $course_id, -1, $entry['userid'], PATH_FOR_SEARCH_TYPE_GLOSSARY); + } //constructor +} //GlossaryCommentSearchDocument + +/** +* constructs valid access links to information +* @param entry_id the id of the glossary entry +* @return a full featured link element as a string +*/ +function glossary_make_link($entry_id) { global $CFG; //links directly to entry - //return $CFG->wwwroot.'/mod/glossary/showentry.php?eid='.$entry_id; + // return $CFG->wwwroot.'/mod/glossary/showentry.php?eid='.$entry_id; - //preserve glossary pop-up, be careful where you place your ' and "s + // TOO LONG URL + // Suggestion : bounce on popup within the glossarie's showentry page + // preserve glossary pop-up, be careful where you place your ' and "s //this function is meant to return a url that is placed between href='[url here]' - return "$CFG->wwwroot/mod/glossary/showentry.php?eid=$entry_id' onclick='return openpopup(\"/mod/glossary/showentry.php?eid=$entry_id\", \"entry\", \"menubar=0,location=0,scrollbars,resizable,width=600,height=450\", 0);"; - } //glossary_make_link + return "$CFG->wwwroot/mod/glossary/showentry.php?eid=$entry_id' onclick='return openpopup(\"/mod/glossary/showentry.php?eid=$entry_id\", \"entry\", DEFAULT_POPUP_SETTINGS, 0);"; +} //glossary_make_link + +/** +* part of search engine API +* +*/ +function glossary_iterator() { + $glossaries = get_records('glossary'); + return $glossaries; +} //glossary_iterator + +/** +* part of search engine API +* @glossary a glossary instance +* @return an array of searchable documents +*/ +function glossary_get_content_for_index(&$glossary) { + + // get context + $coursemodule = get_field('modules', 'id', 'name', 'glossary'); + $cm = get_record('course_modules', 'course', $glossary->course, 'module', $coursemodule, 'instance', $glossary->id); + $context = get_context_instance(CONTEXT_MODULE, $cm->id); - function glossary_iterator() { - return get_all_instances_in_courses("glossary", get_courses()); - } //glossary_iterator - - function glossary_get_content_for_index(&$glossary) { $documents = array(); - - $entries = get_recordset('glossary_entries', 'glossaryid', $glossary->id); - - while (!$entries->EOF) { - $entry = $entries->fields; - - if ($entry and strlen($entry['definition']) > 0) { - $documents[] = new GlossarySearchDocument($entry, $glossary->id, $glossary->course, -1); - } //if - - $entries->MoveNext(); - } //foreach - + $entryIds = array(); + // index entries + $entries = get_records('glossary_entries', 'glossaryid', $glossary->id); + if ($entries){ + foreach($entries as $entry) { + $concepts[$entry->id] = $entry->concept; + if (strlen($entry->definition) > 0) { + $entryIds[] = $entry->id; + $documents[] = new GlossarySearchDocument(get_object_vars($entry), $glossary->course, $context->id); + } + } + } + + // index comments + if (count($entryIds)){ + $entryIdList = implode(',', $entryIds); + $comments = get_records_list('glossary_comments', 'entryid', $entryIdList); + if ($comments){ + foreach($comments as $comment) { + if (strlen($comment->entrycomment) > 0) { + $comment->concept = $concepts[$comment->entryid]; + $documents[] = new GlossaryCommentSearchDocument(get_object_vars($comment), $glossary->id, $glossary->course, $context->id); + } + } + } + } return $documents; - } //glossary_get_content_for_index - - //returns a single glossary search document based on a glossary_entry id - function glossary_single_document($id) { - $entries = get_recordset('glossary_entries', 'id', $id); - $entry = $entries->fields; - - $glossaries = get_recordset('glossary', 'id', $entry['glossaryid']); - $glossary = $glossaries->fields; - - return new GlossarySearchDocument($entry, $entry['glossaryid'], $glossary['course'], -1); - } //glossary_single_document - - //dummy delete function that converts docid from the search table to itself.. - //this was here for a reason, but I can't remember it at the moment. - function glossary_delete($info) { - return $info; - } //glossary_delete - - //returns the var names needed to build a sql query for addition/deletions - function glossary_db_names() { +} //glossary_get_content_for_index + +/** +* part of search engine API +* @param id the glossary entry identifier +* @itemtype the type of information +* @return a single search document based on a glossary entry +*/ +function glossary_single_document($id, $itemtype) { + if ($itemtype == 'standard'){ + $entry = get_record('glossary_entries', 'id', $id); + } + elseif ($itemtype == 'comment'){ + $comment = get_record('glossary_comments', 'id', $id); + $entry = get_record('glossary_entries', 'id', $comment->entryid); + } + $glossary_course = get_field('glossary', 'course', 'id', $entry->glossaryid); + $coursemodule = get_field('modules', 'id', 'name', 'glossary'); + $cm = get_record('course_modules', 'course', $glossary_course, 'module', $coursemodule, 'instance', $entry->glossaryid); + $context = get_context_instance(CONTEXT_MODULE, $cm->id); + if ($itemtype == 'standard'){ + return new GlossarySearchDocument(get_object_vars($entry), $glossary_course, $context->id); + } + elseif ($itemtype == 'comment'){ + return new GlossaryCommentSearchDocument(get_object_vars($comment), $entry->glossaryid, $glossary_course, $context->id); + } +} //glossary_single_document + +/** +* dummy delete function that packs id with itemtype. +* this was here for a reason, but I can't remember it at the moment. +* +*/ +function glossary_delete($info, $itemtype) { + $object->id = $info; + $object->itemtype = $itemtype; + return $object; +} //glossary_delete + +/** +* returns the var names needed to build a sql query for addition/deletions +* +*/ +function glossary_db_names() { //[primary id], [table name], [time created field name], [time modified field name] - return array('id', 'glossary_entries', 'timecreated', 'timemodified'); - } //glossary_db_names + return array( + array('id', 'glossary_entries', 'timecreated', 'timemodified', 'standard'), + array('id', 'glossary_comments', 'timemodified', 'timemodified', 'comment') + ); +} //glossary_db_names + +/** +* this function handles the access policy to contents indexed as searchable documents. If this +* function does not exist, the search engine assumes access is allowed. +* When this point is reached, we already know that : +* - user is legitimate in the surrounding context +* - user may be guest and guest access is allowed to the module +* - the function may perform local checks within the module information logic +* @param path the access path to the module script code +* @param itemtype the information subclassing (usefull for complex modules, defaults to 'standard') +* @param this_id the item id within the information class denoted by itemtype. In glossaries, this id +* points out the indexed glossary item. +* @param user the user record denoting the user who searches +* @param group_id the current group used by the user when searching +* @return true if access is allowed, false elsewhere +*/ +function glossary_check_text_access($path, $itemtype, $this_id, $user, $group_id, $context_id){ + global $CFG; + + // get the glossary object and all related stuff + $entry = get_record('glossary_entries', 'id', $id); + $glossary = get_record('glossary', 'id', $entry->glossaryid); + $course = get_record('course', 'id', $glossary->course); + $module_context = get_record('context', 'id', $context_id); + $cm = get_record('course_modules', 'id', $module_context->instance); + if (!$cm->visible && !has_capability('moodle/course:viewhiddenactivities', $module_context)) return false; + + //approval check : entries should be approved for being viewed, or belongs to the user unless the viewer can approve them or manage them + if (!$entry->approved && $user != $entry->userid && !has_capability('mod/glossary:approve', $module_context) && !has_capability('mod/glossary:manageentries', $module_context)) return false; + + return true; +} //glossary_check_text_access ?> \ No newline at end of file diff --git a/search/documents/physical_doc.php b/search/documents/physical_doc.php new file mode 100644 index 0000000000000..3260451f5a7dd --- /dev/null +++ b/search/documents/physical_doc.php @@ -0,0 +1,47 @@ +id)) return; + + // just call pdftotext over stdout and capture the output + if (!empty($CFG->block_search_word_to_text_cmd)){ + if (!file_exists("{$CFG->dirroot}/{$CFG->block_search_word_to_text_cmd}")){ + mtrace('Error with MSWord to text converter command : exectuable not found.'); + } + else{ + $file = $CFG->dataroot.'/'.$resource->course.'/'.$resource->reference; + $text_converter_cmd = "{$CFG->dirroot}/{$CFG->block_search_word_to_text_cmd} $file"; + if ($CFG->block_search_word_to_text_env){ + putenv($CFG->block_search_word_to_text_env); + } + $result = shell_exec($text_converter_cmd); + if ($result){ + return mb_convert_encoding($result, 'UTF8', 'auto'); + } + else{ + mtrace('Error with MSWord to text converter command : execution failed.'); + return ''; + } + } + } + else { + mtrace('Error with MSWord to text converter command : command not set up. Execute once search block configuration.'); + return ''; + } +} +?> \ No newline at end of file diff --git a/search/documents/physical_htm.php b/search/documents/physical_htm.php new file mode 100644 index 0000000000000..256dd365fc361 --- /dev/null +++ b/search/documents/physical_htm.php @@ -0,0 +1,39 @@ +id)) return; + + // just get text + $text = implode('', file("{$CFG->dataroot}/{$resource->course}/($resource->reference)")); + + // extract keywords and other interesting meta information and put it back as real content for indexing + if (preg_match('/(.*)]*)>(.*)/is',$text, $matches)){ + $prefix = $matches[1]; + $meta_attributes = $matches[2]; + $suffix = $matches{3]; + if (preg_match('/name="(keywords|description)"/i', $attributes)){ + preg_match('/content="[^"]+"/i', $attributes, $matches); + $text = $prefix.' '.$matches[1].' '.$suffix; + } + } + // filter all html tags + // $text = clean_text($text, FORMAT_PLAIN); + // NOTE : this is done in ResourceSearchDocument __constructor + + if (!empty($CFG->block_search_limit_index_body)){ + $text = shorten($text, $CFG->block_search_limit_index_body); + } + return $text; +} +?> \ No newline at end of file diff --git a/search/documents/physical_html.php b/search/documents/physical_html.php new file mode 100644 index 0000000000000..8455e709b40f3 --- /dev/null +++ b/search/documents/physical_html.php @@ -0,0 +1,17 @@ + \ No newline at end of file diff --git a/search/documents/physical_pdf.php b/search/documents/physical_pdf.php new file mode 100644 index 0000000000000..12765b06863e3 --- /dev/null +++ b/search/documents/physical_pdf.php @@ -0,0 +1,41 @@ +id)) return; + + // just call pdftotext over stdout and capture the output + if (!empty($CFG->block_search_pdf_to_text_cmd)){ + preg_match("/^\S+/", $CFG->block_search_pdf_to_text_cmd, $matches); + if (!file_exists("{$CFG->dirroot}/{$matches[0]}")){ + mtrace('Error with pdf to text converter command : exectuable not found.'); + } + else{ + $file = $CFG->dataroot.'/'.$resource->course.'/'.$resource->reference; + $text_converter_cmd = "{$CFG->dirroot}/{$CFG->block_search_pdf_to_text_cmd} $file -"; + $result = shell_exec($text_converter_cmd); + if ($result){ + return $result; + } + else{ + mtrace('Error with pdf to text converter command : execution failed.'); + return ''; + } + } + } + else { + mtrace('Error with pdf to text converter command : command not set up. Execute once search block configuration.'); + return ''; + } +} +?> \ No newline at end of file diff --git a/search/documents/physical_ppt.php b/search/documents/physical_ppt.php new file mode 100644 index 0000000000000..c9ceb55e08253 --- /dev/null +++ b/search/documents/physical_ppt.php @@ -0,0 +1,80 @@ +id)) return; + + $text = implode('', file("{$CFG->dataroot}/{$resource->course}/{$resource->reference}")); + + $remains = $text; + $fragments = array(); + while (preg_match('/\x00\x9F\x0F\x04.{9}(......)(.*)/s', $remains, $matches)){ + $unpacked = unpack("ncode/Llength", $matches[1]); + $sequencecode = $unpacked['code']; + $length = $unpacked['length']; + // print "length : ".$length." ; segment type : ".sprintf("%x", $sequencecode)."
Sorry, you need to confirm indexing via indexersplash.php" .". (Back to query page)."); exit(0); - } //if +} //if - //check for php5 (lib.php) - if (!search_check_php5()) { +//check for php5 (lib.php) +if (!search_check_php5()) { $phpversion = phpversion(); mtrace("Sorry, global search requires PHP 5.0.0 or later (currently using version $phpversion)"); exit(0); - } //if +} - //php5 found, continue including php5-only files - //require_once("$CFG->dirroot/search/Zend/Search/Lucene.php"); - require_once("$CFG->dirroot/search/indexlib.php"); +//php5 found, continue including php5-only files +//require_once("$CFG->dirroot/search/Zend/Search/Lucene.php"); +require_once("$CFG->dirroot/search/indexlib.php"); - mtrace('
Server Time: '.date('r',time())."\n"); +mtrace(''); +mtrace(''); + +//finished, turn busy flag off +set_config("search_indexer_busy", "0"); + +//mark the time we last updated +set_config("search_indexer_run_date", time()); + +//and the index size +set_config("search_index_size", (int)$index->count()); ?> \ No newline at end of file diff --git a/search/indexersplash.php b/search/indexersplash.php index 913e2ae669b33..058118677df4e 100644 --- a/search/indexersplash.php +++ b/search/indexersplash.php @@ -1,33 +1,39 @@ dirroot/search/lib.php"); +require_once('../config.php'); +require_once("$CFG->dirroot/search/lib.php"); - require_login(); +require_login(); - if (empty($CFG->enableglobalsearch)) { - error('Global searching is not enabled.'); - } +if (empty($CFG->enableglobalsearch)) { + error(get_string('globalsearchdisabled', 'search')); +} - if (!isadmin()) { - error("You need to be an admin user to use this page.", "$CFG->wwwroot/login/index.php"); - } //if +if (!isadmin()) { + error(get_string('beadmin', 'search'), "$CFG->wwwroot/login/index.php"); +} - //check for php5 (lib.php) - if (!search_check_php5()) { +//check for php5 (lib.php) +if (!search_check_php5()) { $phpversion = phpversion(); mtrace("Sorry, global search requires PHP 5.0.0 or later (currently using version $phpversion)"); exit(0); - } //if +} - require_once("$CFG->dirroot/search/indexlib.php"); - $indexinfo = new IndexInfo(); +require_once("$CFG->dirroot/search/indexlib.php"); +$indexinfo = new IndexInfo(); - if ($indexinfo->valid()) { +if ($indexinfo->valid()) { mtrace("Server Time: '.date('r',time())."\n"); - if ($CFG->search_indexer_busy == '1') { +if ($CFG->search_indexer_busy == '1') { //means indexing was not finished previously mtrace("Warning: Indexing was not successfully completed last time, restarting.\n"); - } //if +} - //turn on busy flag - set_config('search_indexer_busy', '1'); +//turn on busy flag +set_config('search_indexer_busy', '1'); - //paths - $index_path = SEARCH_INDEX_PATH; - $index_db_file = "$CFG->dirroot/search/db/$CFG->dbtype.sql"; - $dbcontrol = new IndexDBControl(); +//paths +$index_path = SEARCH_INDEX_PATH; +$index_db_file = "{$CFG->dirroot}/search/db/$CFG->dbtype.sql"; +$dbcontrol = new IndexDBControl(); - //setup directory in data root - if (!file_exists($index_path)) { +//setup directory in data root +if (!file_exists($index_path)) { mtrace("Data directory ($index_path) does not exist, attempting to create."); if (!mkdir($index_path)) { - search_pexit("Error creating data directory at: $index_path. Please correct."); - } else { - mtrace("Directory successfully created."); - } //else - } else { + search_pexit("Error creating data directory at: $index_path. Please correct."); + } + else { + mtrace("Directory successfully created."); + } +} +else { mtrace("Using $index_path as data directory."); - } //else +} - $index = new Zend_Search_Lucene($index_path, true); +$index = new Zend_Search_Lucene($index_path, true); - if (!$dbcontrol->checkDB()) { +if (!$dbcontrol->checkDB()) { search_pexit("Database error. Please check settings/files."); - } //if - - //begin timer - search_stopwatch(); - mtrace("Starting activity modules\n"); - - //the presence of the required search functions - - // * mod_iterator - // * mod_get_content_for_index - //are the sole basis for including a module in the index at the moment. - - if ($mods = get_records_select('modules' /*'index this module?' where statement*/)) { - //add virtual modules onto the back of the array - $mods = array_merge($mods, search_get_additional_modules()); - - foreach ($mods as $mod) { - $class_file = $CFG->dirroot.'/search/documents/'.$mod->name.'_document.php'; - - if (file_exists($class_file)) { - include_once($class_file); - - //build function names - $iter_function = $mod->name.'_iterator'; - $index_function = $mod->name.'_get_content_for_index'; - - $counter = 0; - $doc = new stdClass; - - if (function_exists($index_function) && function_exists($iter_function)) { - mtrace("Processing module function $index_function ..."); - - foreach ($iter_function() as $i) { - $documents = $index_function($i); - - //begin transaction - - foreach($documents as $document) { - $counter++; - - //object to insert into db - $dbid = $dbcontrol->addDocument($document); - - //synchronise db with index - $document->addField(Zend_Search_Lucene_Field::Keyword('dbid', $dbid)); - - //add document to index - $index->addDocument($document); - - //commit every x new documents, and print a status message - if (($counter%2000) == 0) { +} + +//begin timer +search_stopwatch(); +mtrace("Starting activity modules\n"); + +//the presence of the required search functions - +// * mod_iterator +// * mod_get_content_for_index +//are the sole basis for including a module in the index at the moment. +$searchables = array(); + +// collects modules +if ($mods = get_records('modules', '', '', '', 'id,name')) { + $searchables = array_merge($searchables, $mods); +} +mtrace(count($searchables).' modules found.'); + +// collects blocks as indexable information may be found in blocks either +if ($blocks = get_records('block', '', '', '', 'id,name')) { + // prepend the "block_" prefix to discriminate document type plugins + foreach(array_keys($blocks) as $aBlockId){ + $blocks[$aBlockId]->name = 'block_'.$blocks[$aBlockId]->name; + } + $searchables = array_merge($searchables, $blocks); + mtrace(count($blocks).' blocks found.'); +} + +//add virtual modules onto the back of the array +$searchables = array_merge($searchables, search_get_additional_modules()); +if ($searchables){ + foreach ($searchables as $mod) { + $class_file = $CFG->dirroot.'/search/documents/'.$mod->name.'_document.php'; + + if (file_exists($class_file)) { + include_once($class_file); + + //build function names + $iter_function = $mod->name.'_iterator'; + $index_function = $mod->name.'_get_content_for_index'; + $counter = 0; + if (function_exists($index_function) && function_exists($iter_function)) { + mtrace("Processing module function $index_function ..."); + $sources = $iter_function(); + if ($sources){ + foreach ($sources as $i) { + $documents = $index_function($i); + + //begin transaction + if ($documents){ + foreach($documents as $document) { + $counter++; + + //object to insert into db + $dbid = $dbcontrol->addDocument($document); + + //synchronise db with index + $document->addField(Zend_Search_Lucene_Field::Keyword('dbid', $dbid)); + + //add document to index + $index->addDocument($document); + + //commit every x new documents, and print a status message + if (($counter % 2000) == 0) { + $index->commit(); + mtrace(".. $counter"); + } + } + } + //end transaction + } + } + + //commit left over documents, and finish up $index->commit(); - mtrace(".. $counter"); - } //if - } //foreach - - //end transaction - - } //foreach - - //commit left over documents, and finish up - $index->commit(); - - mtrace("-- $counter documents indexed"); - mtrace("done.\n"); - } //if - } //if - } //foreach - } //if - - //finished modules - mtrace('Finished activity modules'); - search_stopwatch(); - - //now blocks... - // - - mtrace(".'); - - //finished, turn busy flag off - set_config("search_indexer_busy", "0"); - - //mark the time we last updated - set_config("search_indexer_run_date", time()); - - //and the index size - set_config("search_index_size", (int)$index->count()); + + mtrace("-- $counter documents indexed"); + mtrace("done.\n"); + } + } + } +} + +//finished modules +mtrace('Finished activity modules'); +search_stopwatch(); + +mtrace(".
Back to query page."); - mtrace('
Back to query page."); +mtrace('
The data directory ($indexinfo->path) contains $indexinfo->filecount files, and\n" ."there are ".$indexinfo->dbcount." records in the search_documents table.\n" ."\n" @@ -42,7 +48,8 @@ ."Test indexing or " ."Continue indexing or Back to query page." .""); - } else { +} +else { header('Location: indexer.php?areyousure=yes'); - } //else +} ?> \ No newline at end of file diff --git a/search/indexlib.php b/search/indexlib.php index cc479e866270d..32df62f386783 100644 --- a/search/indexlib.php +++ b/search/indexlib.php @@ -1,15 +1,20 @@ dirroot/search/lib.php"); - require_once("$CFG->dirroot/search/Zend/Search/Lucene.php"); - - class IndexInfo { +/* +* Author: Michael Champanis +* +* Reviewed by: Valery Fremaux (2007) +* +* Index info class +* +* Used to retrieve information about an index. +* Has methods to check for valid database and data directory, +* and the index itself. +**/ + +require_once("$CFG->dirroot/search/lib.php"); +require_once("$CFG->dirroot/search/Zend/Search/Lucene.php"); + +class IndexInfo { private $path, //index data directory $size, //size of directory (i.e. the whole index) $filecount, //number of files @@ -18,193 +23,226 @@ class IndexInfo { $types, //array of [document types => count] $complete, //is index completely formed? $time; //date index was generated - - public function __construct($path=SEARCH_INDEX_PATH) { - global $CFG, $db; - - $this->path = $path; - - //test to see if there is a valid index on disk, at the specified path - try { - $test_index = new Zend_Search_Lucene($this->path, false); - $validindex = true; - } catch(Exception $e) { - $validindex = false; - } //catch - - //retrieve file system info about the index if it is valid - if ($validindex) { - $this->size = display_size(get_directory_size($this->path)); - $index_dir = get_directory_list($this->path, '', false, false); - $this->filecount = count($index_dir); - $this->indexcount = $test_index->count(); - } else { - $this->size = 0; - $this->filecount = 0; - $this->indexcount = 0; - } //else - - $db_exists = false; //for now - - //get all the current tables in moodle - $admin_tables = $db->MetaTables(); - - //TODO: use new IndexDBControl class for database checks? - - //check if our search table exists - if (in_array($CFG->prefix.SEARCH_DATABASE_TABLE, $admin_tables)) { - //retrieve database information if it does - $db_exists = true; - - //total documents - $this->dbcount = count_records(SEARCH_DATABASE_TABLE); - - //individual document types - $types = search_get_document_types(); - sort($types); - - foreach($types as $type) { - $c = count_records(SEARCH_DATABASE_TABLE, 'doctype', $type); - $this->types[$type] = (int)$c; - } //foreach - } else { - $this->dbcount = 0; - $this->types = array(); - } //else - - //check if the busy flag is set - if ($CFG->search_indexer_busy == '1') { - $this->complete = false; - } else { - $this->complete = true; - } //if - - //get the last run date for the indexer - if ($this->valid() && $CFG->search_indexer_run_date) { - $this->time = $CFG->search_indexer_run_date; - } else { - $this->time = 0; - } //else + + public function __construct($path = SEARCH_INDEX_PATH) { + global $CFG, $db; + + $this->path = $path; + + //test to see if there is a valid index on disk, at the specified path + try { + $test_index = new Zend_Search_Lucene($this->path, false); + $validindex = true; + } catch(Exception $e) { + $validindex = false; + } //catch + + //retrieve file system info about the index if it is valid + if ($validindex) { + $this->size = display_size(get_directory_size($this->path)); + $index_dir = get_directory_list($this->path, '', false, false); + $this->filecount = count($index_dir); + $this->indexcount = $test_index->count(); + } + else { + $this->size = 0; + $this->filecount = 0; + $this->indexcount = 0; + } + + $db_exists = false; //for now + + //get all the current tables in moodle + $admin_tables = $db->MetaTables(); + + //TODO: use new IndexDBControl class for database checks? + + //check if our search table exists + if (in_array($CFG->prefix.SEARCH_DATABASE_TABLE, $admin_tables)) { + //retrieve database information if it does + $db_exists = true; + + //total documents + $this->dbcount = count_records(SEARCH_DATABASE_TABLE); + + //individual document types + $types = search_get_document_types(); + sort($types); + + foreach($types as $type) { + $c = count_records(SEARCH_DATABASE_TABLE, 'doctype', $type); + $this->types[$type] = (int)$c; + } + } + else { + $this->dbcount = 0; + $this->types = array(); + } + + //check if the busy flag is set + if ($CFG->search_indexer_busy == '1') { + $this->complete = false; + } + else { + $this->complete = true; + } + + //get the last run date for the indexer + if ($this->valid() && $CFG->search_indexer_run_date) { + $this->time = $CFG->search_indexer_run_date; + } + else { + $this->time = 0; + } } //__construct - - //returns false on error, and the error message via referenced variable $err - public function valid(&$err=null) { - $err = array(); - $ret = true; - - if (!$this->is_valid_dir()) { - $err['dir'] = 'Index directory either contains an invalid index, or nothing at all.'; - $ret = false; - } //if - - if (!$this->is_valid_db()) { - $err['db'] = 'Database table is not present, or contains no index records.'; - $ret = false; - } //if - - if (!$this->complete) { - $err['index'] = 'Indexing was not successfully completed, please restart it.'; - $ret = false; - } //if - - return $ret; + + /** + * returns false on error, and the error message via referenced variable $err + * + */ + public function valid(&$err = null) { + $err = array(); + $ret = true; + + if (!$this->is_valid_dir()) { + $err['dir'] = get_string('invalidindexerror', 'search'); + $ret = false; + } + + if (!$this->is_valid_db()) { + $err['db'] = get_string('emptydatabaseerror', 'search'); + $ret = false; + } + + if (!$this->complete) { + $err['index'] = get_string('uncompleteindexingerror','search'); + $ret = false; + } + + return $ret; } //valid - - //is the index dir valid + + /** + * is the index dir valid + * + */ public function is_valid_dir() { - if ($this->filecount > 0) { - return true; - } else { - return false; - } //else + if ($this->filecount > 0) { + return true; + } + else { + return false; + } } //is_valid_dir - - //is the db table valid + + /** + * is the db table valid + * + */ public function is_valid_db() { - if ($this->dbcount > 0) { - return true; - } else { - return false; - } //else + if ($this->dbcount > 0) { + return true; + } + else { + return false; + } } //is_valid_db - - //shorthand get method for the class variables + + /** + * shorthand get method for the class variables + * + */ public function __get($var) { - if (in_array($var, array_keys(get_class_vars(get_class($this))))) { - return $this->$var; - } //if + if (in_array($var, array_keys(get_class_vars(get_class($this))))) { + return $this->$var; + } } //__get - } //IndexInfo +} //IndexInfo - /* DB Index control class - * - * Used to control the search index database table - * */ +/* +* DB Index control class +* +* Used to control the search index database table +**/ +class IndexDBControl { - class IndexDBControl { - //does the table exist? + /** + * does the table exist? + * + */ public function checkTableExists() { - global $CFG, $db; - - $table = SEARCH_DATABASE_TABLE; - $tables = $db->MetaTables(); - - if (in_array($CFG->prefix.$table, $tables)) { - return true; - } else { - return false; - } //else + global $CFG, $db; + + $table = SEARCH_DATABASE_TABLE; + $tables = $db->MetaTables(); + if (in_array($CFG->prefix.$table, $tables)) { + return true; + } + else { + return false; + } } //checkTableExists - //is our database setup valid? + /** + * is our database setup valid? + * + */ public function checkDB() { - global $CFG, $db; - - $sqlfile = "$CFG->dirroot/search/db/$CFG->dbtype.sql"; - $ret = false; - - if ($this->checkTableExists()) { - execute_sql('drop table '.$CFG->prefix.SEARCH_DATABASE_TABLE, false); - } //if + global $CFG, $db; + + $sqlfile = "$CFG->dirroot/blocks/search/db/$CFG->dbtype.sql"; + $ret = false; + if ($this->checkTableExists()) { + execute_sql('drop table '.$CFG->prefix.SEARCH_DATABASE_TABLE, false); + } - ob_start(); //turn output buffering on - to hide modify_database() output - $ret = modify_database($sqlfile, '', false); - ob_end_clean(); //chuck the buffer and resume normal operation + //turn output buffering on - to hide modify_database() output + ob_start(); + $ret = modify_database($sqlfile, '', false); - return $ret; + //chuck the buffer and resume normal operation + ob_end_clean(); + return $ret; } //checkDB - //add a document record to the table + /** + * add a document record to the table + * @param document must be a Lucene SearchDocument instance + */ public function addDocument($document=null) { - global $db; - - if ($document == null) { - return false; - } //if - - //object to insert into db - $doc->doctype = $document->doctype; - $doc->docid = $document->docid; - $doc->title = search_escape_string($document->title); - $doc->url = search_escape_string($document->url); - $doc->update = time(); - $doc->docdate = $document->date; - $doc->courseid = $document->course_id; - $doc->groupid = $document->group_id; - - //insert summary into db - $id = insert_record(SEARCH_DATABASE_TABLE, $doc); - - return $id; + global $db, $CFG; + + if ($document == null) { + return false; + } + + // object to insert into db + $doc->doctype = $document->doctype; + $doc->docid = $document->docid; + $doc->itemtype = $document->itemtype; + $doc->title = search_escape_string($document->title); + $doc->url = search_escape_string($document->url); + $doc->update = time(); + $doc->docdate = $document->date; + $doc->courseid = $document->course_id; + $doc->groupid = $document->group_id; + + //insert summary into db + $id = insert_record(SEARCH_DATABASE_TABLE, $doc); + + return $id; } //addDocument - //remove a document record from the index + /** + * remove a document record from the index + * @param document must be a Lucene document instance, or at least a dbid enveloppe + */ public function delDocument($document) { - global $db; - - delete_records(SEARCH_DATABASE_TABLE, 'id', $document->dbid); + global $db; + + delete_records(SEARCH_DATABASE_TABLE, 'id', $document->dbid); } //delDocument - } //IndexControl +} //IndexControl ?> \ No newline at end of file diff --git a/search/lib.php b/search/lib.php index 8cdd62c9df925..b9e6f9195c667 100644 --- a/search/lib.php +++ b/search/lib.php @@ -1,113 +1,156 @@ dataroot/search"); - define('SEARCH_DATABASE_TABLE', 'search_documents'); - - //document types that can be searched - //define('SEARCH_TYPE_NONE', 'none'); - define('SEARCH_TYPE_WIKI', 'wiki'); - define('SEARCH_TYPE_FORUM', 'forum'); - define('SEARCH_TYPE_GLOSSARY', 'glossary'); - define('SEARCH_TYPE_RESOURCE', 'resource'); - - //returns all the document type constants - function search_get_document_types($prefix='SEARCH_TYPE') { +/* +* Author: Michael Champanis +* +* This file must not contain any PHP 5, because it is used to test for PHP 5 +* itself, and needs to be able to be executed on PHP 4 installations. +* +* Reviewed by: Valery Fremaux (2007) +* - adding techproject search capabilities +* - adding full internationalization +**/ + +/* +// function reference +function search_get_document_types($prefix = 'SEARCH_TYPE_') { +function search_get_additional_modules() { +function search_shorten_url($url, $length=30) { +function search_escape_string($str) { +function search_check_php5($feedback = false) { +function search_stopwatch($cli = false) { +function search_pexit($str = "") { +*/ + +define('SEARCH_INDEX_PATH', "$CFG->dataroot/search"); +define('SEARCH_DATABASE_TABLE', 'search_documents'); + +//document types that can be searched +//define('SEARCH_TYPE_NONE', 'none'); +define('SEARCH_TYPE_WIKI', 'wiki'); +define('PATH_FOR_SEARCH_TYPE_WIKI', 'mod/wiki'); +define('SEARCH_TYPE_FORUM', 'forum'); +define('PATH_FOR_SEARCH_TYPE_FORUM', 'mod/forum'); +define('SEARCH_TYPE_GLOSSARY', 'glossary'); +define('PATH_FOR_SEARCH_TYPE_GLOSSARY', 'mod/glossary'); +define('SEARCH_TYPE_RESOURCE', 'resource'); +define('PATH_FOR_SEARCH_TYPE_RESOURCE', 'mod/resource'); +define('SEARCH_TYPE_TECHPROJECT', 'techproject'); +define('PATH_FOR_SEARCH_TYPE_TECHPROJECT', 'mod/techproject'); +define('SEARCH_TYPE_DATA', 'data'); +define('PATH_FOR_SEARCH_TYPE_DATA', 'mod/data'); +define('SEARCH_TYPE_CHAT', 'chat'); +define('PATH_FOR_SEARCH_TYPE_CHAT', 'mod/chat'); + +/** +* returns all the document type constants +* @param prefix a pattern for recognizing constants +* @return an array of type labels +*/ +function search_get_document_types($prefix = 'SEARCH_TYPE_') { $ret = array(); - - foreach (get_defined_constants() as $key=>$value) { - if (substr($key, 0, strlen($prefix)) == $prefix) { - $ret[$key] = $value; - } //if - } //foreach - + foreach (get_defined_constants() as $key => $value) { + if (preg_match("/^{$prefix}/", $key)){ + $ret[$key] = $value; + } + } sort($ret); - return $ret; - } //search_get_document_types - - // additional virtual modules to index - // - // By adding 'moo' to the extras array, an additional document type - // documents/moo_document.php will be indexed - this allows for - // virtual modules to be added to the index, i.e. non-module specific - // information. - function search_get_additional_modules() { +} //search_get_document_types + +/** +* additional virtual modules to index +* +* By adding 'moo' to the extras array, an additional document type +* documents/moo_document.php will be indexed - this allows for +* virtual modules to be added to the index, i.e. non-module specific +* information. +*/ +function search_get_additional_modules() { $extras = array(/* additional keywords go here */); $ret = array(); - foreach($extras as $extra) { - $temp->name = $extra; - $ret[] = clone($temp); - } //foreach - + $temp->name = $extra; + $ret[] = clone($temp); + } return $ret; - } //search_get_additional_modules - - //shortens a url so it can fit on the results page - function search_shorten_url($url, $length=30) { +} //search_get_additional_modules + +/** +* shortens a url so it can fit on the results page +* @param url the url +* @param length the size limit we want +*/ +function search_shorten_url($url, $length=30) { return substr($url, 0, $length)."..."; - } //search_shorten_url - - function search_escape_string($str) { +} //search_shorten_url + +/** +* a local function for escaping +* @param str the string to escape +* @return the escaped string +*/ +function search_escape_string($str) { global $CFG; switch ($CFG->dbfamily) { - case 'mysql': - $s = mysql_real_escape_string($str); - break; - case 'postgres': - $s = pg_escape_string($str); - break; - default: - $s = addslashes($str); - } //switch - + case 'mysql': + $s = mysql_real_escape_string($str); + break; + case 'postgres': + $s = pg_escape_string($str); + break; + default: + $s = addslashes($str); + } return $s; - } //search_escape_string - - //get a real php 5 version number, using 5.0.0 arbitrarily - function search_check_php5($feedback=false) { +} //search_escape_string + +/** +* get a real php 5 version number, using 5.0.0 arbitrarily +* @param feedback if true, prints a feedback message to output. +* @return true if version of PHP is high enough +*/ +function search_check_php5($feedback = false) { if (!check_php_version("5.0.0")) { - if ($feedback) { - $phpversion = phpversion(); - print_heading("Sorry, global search requires PHP 5.0.0 or later (currently using version $phpversion)"); - } //if - - return false; - } else { + if ($feedback) { + print_heading(get_string('versiontoolow', 'search')); + } + return false; + } + else { return true; - } //else - } //search_check_php5 - - //simple timer function, outputs result on 2nd call - function search_stopwatch($cli = false) { + } +} //search_check_php5 + +/** +* simple timer function, on first call, records a current microtime stamp, outputs result on 2nd call +* @param cli an output formatting switch +* @return void +*/ +function search_stopwatch($cli = false) { if (!empty($GLOBALS['search_script_start_time'])) { - if (!$cli) print ''; - print round(microtime(true) - $GLOBALS['search_script_start_time'], 6).' seconds'; - if (!$cli) print ''; - - unset($GLOBALS['search_script_start_time']); - } else { - $GLOBALS['search_script_start_time'] = microtime(true); - } //else - } //search_stopwatch - - //print and exit (for debugging) - function search_pexit($str = "") { + if (!$cli) print ''; + print round(microtime(true) - $GLOBALS['search_script_start_time'], 6).' '.get_string('seconds', 'search'); + if (!$cli) print ''; + unset($GLOBALS['search_script_start_time']); + } + else { + $GLOBALS['search_script_start_time'] = microtime(true); + } +} //search_stopwatch + +/** +* print and exit (for debugging) +* @param str a variable to explore +* @return void +*/ +function search_pexit($str = "") { if (is_array($str) or is_object($str)) { - print_r($str); + print_r($str); } else if ($str) { - print $str."
Admin: There appears to be no search index. Please create an index.
\n"; - } //if +print ' '; +print_string('documents', 'search'); +print '.'; - print '' . get_string('noindexmessage', 'search') . '' . get_string('createanindex', 'search')."
\n"; +} - print_simple_box_end(); - - if ($sq->is_valid()) { - print_simple_box_start('center', '50%', 'white', 10); +?> + +is_valid()) { + print_box_start(); + search_stopwatch(); $hit_count = $sq->count(); - + print "Starting index update (updates)...\n"); - - if ($mods = get_records_select('modules')) { - $mods = array_merge($mods, search_get_additional_modules()); - - foreach ($mods as $mod) { - $class_file = $CFG->dirroot.'/search/documents/'.$mod->name.'_document.php'; - $get_document_function = $mod->name.'_single_document'; - $delete_function = $mod->name.'_delete'; - $db_names_function = $mod->name.'_db_names'; - $updates = array(); - - if (file_exists($class_file)) { - require_once($class_file); - - if (function_exists($delete_function) and function_exists($db_names_function) and function_exists($get_document_function)) { - mtrace("Checking $mod->name module for updates."); - $values = $db_names_function(); - - //TODO: check 'in' syntax with other RDBMS' (add and update.php as well) - $sql = "select id, ".$values[0]." as docid from ".$values[1]. - " where ".$values[3]." > $indexdate". - " and id in (select docid from ".SEARCH_DATABASE_TABLE.")"; - - $records = get_records_sql($sql); - - if (is_array($records)) { - foreach($records as $record) { - $updates[] = $delete_function($record->docid); - } //foreach - } //if - - foreach ($updates as $update) { - ++$update_count; - - //delete old document - $doc = $index->find("+docid:$update +doctype:$mod->name"); - - //get the record, should only be one - foreach ($doc as $thisdoc) { - mtrace(" Delete: $thisdoc->title (database id = $thisdoc->dbid, index id = $thisdoc->id, moodle instance id = $thisdoc->docid)"); - - $dbcontrol->delDocument($thisdoc); - $index->delete($thisdoc->id); - } //foreach - - //add new modified document back into index - $add = $get_document_function($update); - - //object to insert into db - $dbid = $dbcontrol->addDocument($add); - - //synchronise db with index - $add->addField(Zend_Search_Lucene_Field::Keyword('dbid', $dbid)); - - mtrace(" Add: $add->title (database id = $add->dbid, moodle instance id = $add->docid)"); - - $index->addDocument($add); - } //foreach - - mtrace("Finished $mod->name.\n"); - } //if - } //if - } //foreach - } //if - - //commit changes - $index->commit(); - - //update index date - set_config("search_indexer_run_date", time()); - - mtrace("Finished $update_count updates."); +} + +require_once("$CFG->dirroot/search/indexlib.php"); + +$index = new Zend_Search_Lucene(SEARCH_INDEX_PATH); +$dbcontrol = new IndexDBControl(); +$update_count = 0; +$indexdate = $CFG->search_indexer_update_date; +$startupdatedate = time(); + +mtrace("
Starting index update (updates)...\n"); + +if ($mods = get_records_select('modules')) { + $mods = array_merge($mods, search_get_additional_modules()); + + foreach ($mods as $mod) { + $class_file = $CFG->dirroot.'/search/documents/'.$mod->name.'_document.php'; + $get_document_function = $mod->name.'_single_document'; + $delete_function = $mod->name.'_delete'; + $db_names_function = $mod->name.'_db_names'; + $updates = array(); + + if (file_exists($class_file)) { + require_once($class_file); + + //if both required functions exist + if (function_exists($delete_function) and function_exists($db_names_function) and function_exists($get_document_function)) { + mtrace("Checking $mod->name module for updates."); + $valuesArray = $db_names_function(); + if ($valuesArray){ + foreach($valuesArray as $values){ + + $where = (isset($values[5])) ? 'AND ('.$values[5].')' : ''; + $itemtypes = ($values[4] != '*') ? " AND itemtype = '{$values[4]}' " : '' ; + + //TODO: check 'in' syntax with other RDBMS' (add and update.php as well) + $table = SEARCH_DATABASE_TABLE; + $query = " + SELECT + docid, + itemtype + FROM + {$CFG->prefix}{$table} + WHERE + doctype = '{$mod->name}' + $itemtypes + "; + $docIds = get_records_sql_menu($query); + $docIdList = ($docIds) ? implode("','", array_keys($docIds)) : '' ; + + $query = " + SELECT + id, + {$values[0]} as docid + FROM + {$CFG->prefix}{$values[1]} + WHERE + {$values[3]} > {$indexdate} AND + id IN ('{$docIdList}') + $where + "; + $records = get_records_sql($query); + if (is_array($records)) { + foreach($records as $record) { + $updates[] = $delete_function($record->docid, $docIds[$record->docid]); + } + } + } + + foreach ($updates as $update) { + ++$update_count; + + //delete old document + $doc = $index->find("+docid:{$update->id} +doctype:{$mod->name} +itemtype:{$update->itemtype}"); + + //get the record, should only be one + foreach ($doc as $thisdoc) { + mtrace(" Delete: $thisdoc->title (database id = $thisdoc->dbid, index id = $thisdoc->id, moodle instance id = $thisdoc->docid)"); + $dbcontrol->delDocument($thisdoc); + $index->delete($thisdoc->id); + } + + //add new modified document back into index + $add = $get_document_function($update->id, $update->itemtype); + + //object to insert into db + $dbid = $dbcontrol->addDocument($add); + + //synchronise db with index + $add->addField(Zend_Search_Lucene_Field::Keyword('dbid', $dbid)); + mtrace(" Add: $add->title (database id = $add->dbid, moodle instance id = $add->docid)"); + $index->addDocument($add); + } + } + else{ + mtrace("No types to update.\n"); + } + mtrace("Finished $mod->name.\n"); + } + } + } +} + +//commit changes +$index->commit(); + +//update index date +set_config("search_indexer_update_date", $startupdatedate); + +mtrace("Finished $update_count updates."); ?> \ No newline at end of file