diff --git a/search/LISEZMOI.txt b/search/LISEZMOI.txt new file mode 100644 index 0000000000000..8780ee3f74102 --- /dev/null +++ b/search/LISEZMOI.txt @@ -0,0 +1,89 @@ +Cette distribution partielle contient une refonte du moteur de +recherche globalde Moodle. + +Le moteur de recherche est capable d'indexer et de rechercher +des informations dans un grand nombre de contenus stockés +dans la plate-forme à travers la manipulation des activités et +des blocs. + +Le moteur de recherche procède à une première indexation des +ressources disponibles par action de l'administrateur. Une fois +cette indexation effectuée, le moteur maintient régulièrement les +indexes, en ajoutant les nouvelles entrées et en nettoyant les +entrées obsolètes. + +La recherche permet d'obtenir des références d'accès au contexte +qui diffuse cette information, au nom de l'utilisateur courant. +Le filtrage des résultats enlève de la liste des réponses toute +ressource que la situation de l'utilisateur empêcherait de voir +s'il y accédait dans son contexte habituel. + +Mise en oeuvre +############## + +Pour déployer le moteur : + + +* Copie de fichiers + +1. Ajouter les deux librairies fournies aux librairies de Moodle +2. Ecraser le répertoire "search" par le répertoire fourni +3. Ecraser le bloc "blocs/search" par le bloc fourni. + +* Installation logique + +4. Aller dans les notifications administratives et dérouler la procédure d'installation/mise à jour du bloc. L'installation crée la table image +des documents indexés et utilisés dans le module search. + +5. Insérer un nouveau bloc de recherche globale dans la plate-forme + +6. Effectuer une recherche vide (en administrateur) + +7. Aller sur la page des statistiques + +8. Activer l'indexation (indexsplash.php). Attention, si la plate-form contient beaucoup de contenus cette indexation peut être TRES LONGUE. + +Pour effectuer des recherches, une fois la première indexation terminée, retourner au bloc de recherche et tenter une recherche. + +Eléments pris en charge +####################### + +Dans l'état actuel, les éléments indexés par le moteur sont : + +- les entrées de forum +- les fiches de base de données +- les commentaires sur fiches de données +- les entrées de glossaire +- les commentaires sur entrées de glossaire +- les ressources natives Moodle +- les ressources physiques de type MSWord +- les ressources physiques de type PDF +- les ressources physiques de type fichier texte (.txt) +- les ressources physiques de type HTML (.htm et .html) +- les ressources physiques de type XML (.xml) +- les ressources physiques de type (Microsoft) Powerpoint (.ppt) +- les pages de wiki +- les entités de projet technique +- les sessions de chat + +Extensions +########## + +L'API du moteur de recherche permet désormais : + +- l'indexation de contenus de blocs. +- l'indexation de modules contenant une information complexe ou de plusieurs types distincts +- la sécurisation des informations indexées lors des extractions de résultats +- l'indexation de tout module tiers par ajout d'un fichier php calibré +- l'indexation de toute nouvelle resource physique par ajout d'un fichier php calibré + +Extensions futures +################## + +- De nouvelles prises en charge de contenus tels que les attachements des forums, les attachement des glossaires, ainsi que d'autres modules non encore +implémentés. + +- l'extension mnet de la recherche dans un réseau de moodle interconnectés. + + + diff --git a/search/READMETOO.txt b/search/READMETOO.txt new file mode 100644 index 0000000000000..e6c0983ed786d --- /dev/null +++ b/search/READMETOO.txt @@ -0,0 +1,90 @@ +This partial distribution contains a complete review of the +Global Search Engine of Moodle. + +The Global Search Engine stores indexes about a huge quantity +of information from within modules, block or resources stored +by Moodle either in the database or the file system. + +The administrator initialy indexes the existing content. Once this +first initialization performed, the search engine maintains indexes +regularily, adding new entries, deleting obsolete one or updating +some that have changed. + +Search will produce links for acceding the information in a similar +context as usually accessed, from the current user point of view. +Results filtering removes from results any link to information the +current user would not be allowed to acces on a straight situation. + +Deployement +########### + +For setting the engine : + + +* File copy + +1. Add to Moodle's library both additional libraries provided in the distribution +2. Replace the "search" directory with the new one +3. Replace the "blocks/search" with the new one. + +* Logical install + +4. Browse to the administrative notification screen and let the +install/update process run. The install process creates the Moodle +table needed for backing the indexed documents identities. + +5. Go to the block administration panel and setup once the Global Search +block. This will initialize useful parameters for the global search engine. + +6. Insert a new Global Search block somewhere in a course or top-level screen. + +7. Launch an empty search (you must be administrator). + +8. Go to the statistics screen. + +9. Activate indexation (indexersplash.php). Beware, if your Moodle has +a large amount of content, indexing process may be VERY LONG. + +To search, go back to the search block and try a query. + +Handled information for indexing +################################ + +In the actual state, the engine indexes the following information: + +- forum posts +- database records (using textual fields only) +- database comments +- glossary entries +- glossary comments on entries +- Moodle native resources +- physical MSWord files as resources (.doc) +- physical Powerpoint files as resources (.ppt) +- physical PDF files as resources +- physical text files as resources (.txt) +- physical html files as resources (.htm and .html) +- physical xml files as resources (.xml) +- wiki pages +- techproject descriptions +- char sessions + +Extensions +########## + +The reviewed search engine API allows: + +- indexing of blocks contents +- indexation of modules or blocks containing a complex information model +- securing the access to the results +- adding indexing handling adding a php calibrated script +- adding physical filetype handling adding a php calibrated script + +Future extensions +################# + +- Should be added more information to index such as forum and glossary attachements, so will other standard module contents. + +- extending the search capability to a mnet network information space. + + + diff --git a/search/add.php b/search/add.php index 285c9d948c8a7..6d45795507be4 100644 --- a/search/add.php +++ b/search/add.php @@ -1,103 +1,144 @@ dirroot/search/lib.php"); - - require_login(); - - if (empty($CFG->enableglobalsearch)) { - error('Global searching is not enabled.'); - } - - if (!isadmin()) { - error("You need to be an admin user to use this page.", "$CFG->wwwroot/login/index.php"); - } //if - - //check for php5 (lib.php) - if (!search_check_php5()) { +/** +* Global Search Engine for Moodle +* Michael Champanis (mchampan) [cynnical@gmail.com] +* review 1.8+ : Valery Fremaux [valery.fremaux@club-internet.fr] +* 2007/08/02 +* +* Asynchronous adder for new indexable contents +* +* Major chages in this review is passing the xxxx_db_names return to +* multiple arity to handle multiple document types modules +*/ + +require_once('../config.php'); +require_once("$CFG->dirroot/search/lib.php"); + +require_login(); + +if (empty($CFG->enableglobalsearch)) { + error(get_string('globalsearchdisabled', 'search')); +} + +if (!isadmin()) { + error(get_string('beadmin', 'search'), "$CFG->wwwroot/login/index.php"); +} + +//check for php5 (lib.php) +if (!search_check_php5()) { $phpversion = phpversion(); mtrace("Sorry, global search requires PHP 5.0.0 or later (currently using version $phpversion)"); exit(0); - } //if - - require_once("$CFG->dirroot/search/indexlib.php"); - - $index = new Zend_Search_Lucene(SEARCH_INDEX_PATH); - $dbcontrol = new IndexDBControl(); - $addition_count = 0; - - $indexdate = $CFG->search_indexer_run_date; - - mtrace('
Starting index update (additions)...');
-  mtrace('Index size before: '.$CFG->search_index_size."\n");
-
-  //get all modules
-  if ($mods = get_records_select('modules')) {
-  //append virtual modules onto array
-  $mods = array_merge($mods, search_get_additional_modules());
-
-  foreach ($mods as $mod) {
-    //build include file and function names
-    $class_file = $CFG->dirroot.'/search/documents/'.$mod->name.'_document.php';
-    $db_names_function = $mod->name.'_db_names';
-    $get_document_function = $mod->name.'_single_document';
-    $additions = array();
-
-    if (file_exists($class_file)) {
-      require_once($class_file);
-
-      //if both required functions exist
-      if (function_exists($db_names_function) and function_exists($get_document_function)) {
-        mtrace("Checking $mod->name module for additions.");
-        $values = $db_names_function();
-        $where = (isset($values[4])) ? $values[4] : '';
-
-        //select records in MODULE table, but not in SEARCH_DATABASE_TABLE
-        $sql =  "select id, ".$values[0]." as docid from ".$values[1].
-                " where id not in".
-                " (select docid from ".SEARCH_DATABASE_TABLE." where doctype like '$mod->name')".
-                " and ".$values[2]." > $indexdate".
-                " $where";
-
-        $records = get_records_sql($sql);
-
-        //foreach record, build a module specific search document using the get_document function
-        if (is_array($records)) {
-          foreach($records as $record) {
-            $additions[] = $get_document_function($record->id);
-          } //foreach
-        } //if
-
-        //foreach document, add it to the index and database table
-        foreach ($additions as $add) {
-          ++$addition_count;
-
-          //object to insert into db
-          $dbid = $dbcontrol->addDocument($add);
-
-          //synchronise db with index
-          $add->addField(Zend_Search_Lucene_Field::Keyword('dbid', $dbid));
-
-          mtrace("  Add: $add->title (database id = $add->dbid, moodle instance id = $add->docid)");
-
-          $index->addDocument($add);
-        } //foreach
-
-        mtrace("Finished $mod->name.\n");
-      } //if
-    } //if
-  } //foreach
-  } //if
-
-  //commit changes
-  $index->commit();
-
-  //update index date and size
-  set_config("search_indexer_run_date", time());
-  set_config("search_index_size", (int)$CFG->search_index_size + (int)$addition_count);
-
-  //print some additional info
-  mtrace("Added $addition_count documents.");
-  mtrace('Index size after: '.$index->count().'
'); +} + +require_once("$CFG->dirroot/search/indexlib.php"); + +$index = new Zend_Search_Lucene(SEARCH_INDEX_PATH); +$dbcontrol = new IndexDBControl(); +$addition_count = 0; +$startindextime = time(); + +$indexdate = $CFG->search_indexer_run_date; + +mtrace('
Starting index update (additions)...');
+mtrace('Index size before: '.$CFG->search_index_size."\n");
+
+//get all modules
+if ($mods = get_records_select('modules')) {
+
+//append virtual modules onto array
+$mods = array_merge($mods, search_get_additional_modules());
+    foreach ($mods as $mod) {
+        //build include file and function names
+        $class_file = $CFG->dirroot.'/search/documents/'.$mod->name.'_document.php';
+        $db_names_function = $mod->name.'_db_names';
+        $get_document_function = $mod->name.'_single_document';
+        $get_newrecords_function = $mod->name.'_new_records';
+        $additions = array();
+        
+        if (file_exists($class_file)) {
+            require_once($class_file);
+            
+            //if both required functions exist
+            if (function_exists($db_names_function) and function_exists($get_document_function)) {
+                mtrace("Checking $mod->name module for additions.");
+                $valuesArray = $db_names_function();
+                if ($valuesArray){
+                    foreach($valuesArray as $values){
+                        $where = (isset($values[5])) ? 'AND ('.$values[5].')' : '';
+                        $itemtypes = ($values[4] != '*') ? " AND itemtype = '{$values[4]}' " : '' ;
+                        
+                        //select records in MODULE table, but not in SEARCH_DATABASE_TABLE
+                        $table = SEARCH_DATABASE_TABLE;
+                        $query = "
+                            SELECT 
+                                docid,
+                                itemtype 
+                            FROM 
+                                {$CFG->prefix}{$table}
+                            WHERE 
+                                doctype = '{$mod->name}'
+                                $itemtypes
+                        ";
+                        $docIds = get_records_sql_menu($query);
+                        $docIdList = ($docIds) ? implode("','", array_keys($docIds)) : '' ;
+                        
+                        $query =  "
+                            SELECT id, 
+                                {$values[0]} as docid 
+                            FROM 
+                                {$CFG->prefix}{$values[1]} 
+                            WHERE 
+                                id NOT IN ('{$docIdList}') and 
+                                {$values[2]} > {$indexdate}
+                                $where
+                        ";
+                        $records = get_records_sql($query);
+                        
+                        // foreach record, build a module specific search document using the get_document function
+                        if (is_array($records)) {
+                            foreach($records as $record) {
+                                $add = $get_document_function($record->docid, $values[4]);
+                                // some documents may not be indexable
+                                if ($add)
+                                    $additions[] = $add;
+                            } 
+                        } 
+                    } 
+                    
+                    // foreach document, add it to the index and database table
+                    foreach ($additions as $add) {
+                        ++$addition_count;
+                        
+                        // object to insert into db
+                        $dbid = $dbcontrol->addDocument($add);
+                        
+                        // synchronise db with index
+                        $add->addField(Zend_Search_Lucene_Field::Keyword('dbid', $dbid));
+                        
+                        mtrace("  Add: $add->title (database id = $add->dbid, moodle instance id = $add->docid)");
+                        
+                        $index->addDocument($add);
+                    } 
+                }
+                else{
+                    mtrace("No types to add.\n");
+                }
+                mtrace("Finished $mod->name.\n");
+            } 
+        } 
+    } 
+} 
+
+// commit changes
+$index->commit();
+
+// update index date and size
+set_config("search_indexer_run_date", $startindextime);
+set_config("search_index_size", (int)$CFG->search_index_size + (int)$addition_count);
+
+// print some additional info
+mtrace("Added $addition_count documents.");
+mtrace('Index size after: '.$index->count().'
'); ?> \ No newline at end of file diff --git a/search/delete.php b/search/delete.php index 8b86305e12901..3183e7ddb2f62 100644 --- a/search/delete.php +++ b/search/delete.php @@ -1,94 +1,132 @@ dirroot/search/lib.php"); - - require_login(); - - if (empty($CFG->enableglobalsearch)) { - error('Global searching is not enabled.'); - } - - if (!isadmin()) { - error("You need to be an admin user to use this page.", "$CFG->wwwroot/login/index.php"); - } //if - - //check for php5 (lib.php) - if (!search_check_php5()) { +/** +* Global Search Engine for Moodle +* Michael Champanis (mchampan) [cynnical@gmail.com] +* review 1.8+ : Valery Fremaux [valery.fremaux@club-internet.fr] +* 2007/08/02 +* +* Asynchronous index cleaner +* +* Major chages in this review is passing the xxxx_db_names return to +* multiple arity to handle multiple document types modules +*/ + +require_once('../config.php'); +require_once("$CFG->dirroot/search/lib.php"); + +require_login(); + +if (empty($CFG->enableglobalsearch)) { + error(get_string('globalsearchdisabled', 'search')); +} + +if (!isadmin()) { + error(get_string('beadmin', 'search'), "$CFG->wwwroot/login/index.php"); +} //if + +//check for php5 (lib.php) +if (!search_check_php5()) { $phpversion = phpversion(); mtrace("Sorry, global search requires PHP 5.0.0 or later (currently using version $phpversion)"); exit(0); - } //if - - require_once("$CFG->dirroot/search/indexlib.php"); - - $index = new Zend_Search_Lucene(SEARCH_INDEX_PATH); - $dbcontrol = new IndexDBControl(); - $deletion_count = 0; - - mtrace('
Starting clean-up of removed records...');
-  mtrace('Index size before: '.$CFG->search_index_size."\n");
-
-  if ($mods = get_records_select('modules')) {
-  $mods = array_merge($mods, search_get_additional_modules());
-
-  foreach ($mods as $mod) {
-    //build function names
-    $class_file = $CFG->dirroot.'/search/documents/'.$mod->name.'_document.php';
-    $delete_function = $mod->name.'_delete';
-    $db_names_function = $mod->name.'_db_names';
-    $deletions = array();
-
-    if (file_exists($class_file)) {
-      require_once($class_file);
-
-      if (function_exists($delete_function) and function_exists($db_names_function)) {
-        mtrace("Checking $mod->name module for deletions.");
-        $values = $db_names_function();
-
-        $sql = "select id, docid from ".SEARCH_DATABASE_TABLE.
-                " where doctype like '$mod->name'".
-                " and docid not in".
-                " (select ".$values[0]." from ".$values[1].")";
-
-        $records = get_records_sql($sql);
-
-        //build an array of all the deleted records
-        if (is_array($records)) {
-          foreach($records as $record) {
-            $deletions[] = $delete_function($record->docid);
-          } //foreach
-        } //if
-
-        foreach ($deletions as $delete) {
-          //find the specific document in the index, using it's docid and doctype as keys
-          $doc = $index->find("+docid:$delete +doctype:$mod->name");
-
-          //get the record, should only be one
-          foreach ($doc as $thisdoc) {
-            ++$deletion_count;
-            mtrace("  Delete: $thisdoc->title (database id = $thisdoc->dbid, index id = $thisdoc->id, moodle instance id = $thisdoc->docid)");
-
-            //remove it from index and database table
-            $dbcontrol->delDocument($thisdoc);
-            $index->delete($thisdoc->id);
-          } //foreach
-        } //foreach
-
-        mtrace("Finished $mod->name.\n");
-      } //if
-    } //if
-  } //foreach
-  } //if
-
-  //commit changes
-  $index->commit();
-
-  //update index date and index size
-  set_config("search_indexer_run_date", time());
-  set_config("search_index_size", (int)$CFG->search_index_size - (int)$deletion_count);
-
-  mtrace("Finished $deletion_count removals.");
-  mtrace('Index size after: '.$index->count().'
'); +} + +require_once("$CFG->dirroot/search/indexlib.php"); + +$index = new Zend_Search_Lucene(SEARCH_INDEX_PATH); +$dbcontrol = new IndexDBControl(); +$deletion_count = 0; +$startcleantime = time(); + +mtrace('
Starting clean-up of removed records...');
+mtrace('Index size before: '.$CFG->search_index_size."\n");
+
+if ($mods = get_records_select('modules')) {
+    $mods = array_merge($mods, search_get_additional_modules());
+    
+    foreach ($mods as $mod) {
+        //build function names
+        $class_file = $CFG->dirroot.'/search/documents/'.$mod->name.'_document.php';
+        $delete_function = $mod->name.'_delete';
+        $db_names_function = $mod->name.'_db_names';
+        $deletions = array();
+        
+        if (file_exists($class_file)) {
+            require_once($class_file);
+            
+            //if both required functions exist
+            if (function_exists($delete_function) and function_exists($db_names_function)) {
+                mtrace("Checking $mod->name module for deletions.");
+                $valuesArray = $db_names_function();
+                if ($valuesArray){
+                    foreach($valuesArray as $values){
+                       $where = (isset($values[5])) ? 'WHERE '.$values[5] : '';
+                       $itemtypes = ($values[4] != '*') ? " itemtype = '{$values[4]}' AND " : '' ;
+                       $query = "
+                            SELECT 
+                                id,
+                                {$values[0]}
+                            FROM 
+                                {$CFG->prefix}{$values[1]}
+                                $where
+                        ";
+                        $docIds = get_records_sql($query);
+                        $docIdList = ($docIds) ? implode("','", array_keys($docIds)) : '' ;
+                        
+                        $table = SEARCH_DATABASE_TABLE;
+                        $query = "
+                            SELECT 
+                                id, 
+                                docid 
+                            FROM 
+                                {$CFG->prefix}{$table}
+                            WHERE 
+                                doctype = '{$mod->name}' AND 
+                                $itemtypes
+                                docid not in ('{$docIdList}')
+                        ";
+                        $records = get_records_sql($query);
+                        
+                        // build an array of all the deleted records
+                        if (is_array($records)) {
+                            foreach($records as $record) {
+                                $deletions[] = $delete_function($record->docid, $values[4]);
+                            }
+                        }
+                    }
+                    
+                    foreach ($deletions as $delete) {
+                        // find the specific document in the index, using it's docid and doctype as keys
+                        $doc = $index->find("+docid:{$delete->id} +doctype:$mod->name +itemtype:{$delete->itemtype}");
+                        
+                        // get the record, should only be one
+                        foreach ($doc as $thisdoc) {
+                            ++$deletion_count;
+                            mtrace("  Delete: $thisdoc->title (database id = $thisdoc->dbid, index id = $thisdoc->id, moodle instance id = $thisdoc->docid)");
+                            
+                            //remove it from index and database table
+                            $dbcontrol->delDocument($thisdoc);
+                            $index->delete($thisdoc->id);
+                        }
+                    }
+                }
+                else{
+                    mtrace("No types to delete.\n");
+                }
+                mtrace("Finished $mod->name.\n");
+            }
+        }
+    }
+}
+
+//commit changes
+$index->commit();
+
+//update index date and index size
+set_config("search_indexer_cleanup_date", $startcleantime);
+set_config("search_index_size", (int)$CFG->search_index_size - (int)$deletion_count);
+
+mtrace("Finished $deletion_count removals.");
+mtrace('Index size after: '.$index->count().'
'); ?> \ No newline at end of file diff --git a/search/documents/chat_document.php b/search/documents/chat_document.php new file mode 100644 index 0000000000000..f3196d7da02a1 --- /dev/null +++ b/search/documents/chat_document.php @@ -0,0 +1,271 @@ +dirroot/search/documents/document.php"); +require_once("$CFG->dirroot/mod/chat/lib.php"); + +/* +* a class for representing searchable information +* +**/ +class ChatTrackSearchDocument extends SearchDocument { + + /** + * constructor + * + */ + public function __construct(&$chatsession, $chat_module_id, $course_id, $group_id, $context_id) { + // generic information; required + $doc->docid = $chat_module_id.'-'.$chatsession['sessionstart'].'-'.$chatsession['sessionend']; + $doc->documenttype = SEARCH_TYPE_CHAT; + $doc->itemtype = 'session'; + $doc->contextid = $context_id; + + $duration = $chatsession['sessionend'] - $chatsession['sessionstart']; + // we cannot call userdate with relevant locale at indexing time. + $doc->title = get_string('chatreport', 'chat').' '.get_string('openedon', 'search').' TT_'.$chatsession['sessionstart'].'_TT ('.get_string('duration', 'search').' : '.get_string('numseconds', '', $duration).')'; + $doc->date = $chatsession['sessionend']; + + //remove '(ip.ip.ip.ip)' from chat author list + $doc->author = preg_replace('/\(.*?\)/', '', $chatsession['authors']); + $doc->contents = $chatsession['content']; + $doc->url = chat_make_link($chat_module_id, $chatsession['sessionstart'], $chatsession['sessionend']); + + // module specific information; optional + $data->chat = $chat_module_id; + + // construct the parent class + parent::__construct($doc, $data, $course_id, $group_id, 0, PATH_FOR_SEARCH_TYPE_CHAT); + } //constructor +} //ChatTrackSearchDocument + + +/** +* constructs a valid link to a chat content +* @param cm_id the chat course module +* @param start the start time of the session +* @param end th end time of the session +* @return a well formed link to session display +*/ +function chat_make_link($cm_id, $start, $end) { + global $CFG; + + return $CFG->wwwroot.'/mod/chat/report.php?id='.$cm_id.'&start='.$start.'&end='.$end; +} //chat_make_link + +/** +* fetches all the records for a given session and assemble them as a unique track +* we revamped here the code of report.php for making sessions, but without any output. +* note that we should collect sessions "by groups" if groupmode() is SEPARATEGROUPS. +* @param chat_id the database +* @return an array of objects representing the chat sessions. +*/ +function chat_get_session_tracks($chat_id, $fromtime = 0, $totime = 0) { + global $CFG; + + $chat = get_record('chat', 'id', $chat_id); + $course = get_record('course', 'id', $chat->course); + $coursemodule = get_field('modules', 'id', 'name', 'data'); + $cm = get_record('course_modules', 'course', $course->id, 'module', $coursemodule, 'instance', $chat->id); + $groupmode = groupmode($course, $cm); + + $fromtimeclause = ($fromtime) ? "AND timestamp >= {$fromtime}" : ''; + $totimeclause = ($totime) ? "AND timestamp <= {$totime}" : ''; + $tracks = array(); + $messages = get_records_select('chat_messages', "chatid = '{$chat_id}' $fromtimeclause $totimeclause", "timestamp DESC"); + if ($messages){ + // splits discussions against groups + $groupedMessages = array(); + if ($groupmode != SEPARATEGROUPS){ + foreach($messages as $aMessage){ + $groupedMessages[$aMessage->groupid][] = $aMessage; + } + } + else{ + $groupedMessages[-1] = &$messages; + } + $sessiongap = 5 * 60; // 5 minutes silence means a new session + $sessionend = 0; + $sessionstart = 0; + $sessionusers = array(); + $lasttime = time(); + + foreach ($groupedMessages as $groupId => $messages) { // We are walking BACKWARDS through the messages + $messagesleft = count($messages); + foreach ($messages as $message) { // We are walking BACKWARDS through the messages + $messagesleft --; // Countdown + + if ($message->system) { + continue; + } + // we are within a session track + if ((($lasttime - $message->timestamp) < $sessiongap) and $messagesleft) { // Same session + if (count($tracks) > 0){ + if ($message->userid) { // Remember user and count messages + $tracks[count($tracks) - 1]->sessionusers[$message->userid] = $message->userid; + // update last track (if exists) record appending content (remember : we go backwards) + } + $tracks[count($tracks) - 1]->content .= ' '.$message->message; + $tracks[count($tracks) - 1]->sessionstart = $message->timestamp; + } + } + // we initiate a new session track (backwards) + else { + $track = new Object(); + $track->sessionend = $message->timestamp; + $track->sessionstart = $message->timestamp; + $track->content = $message->message; + // reset the accumulator of users + $track->sessionusers = array(); + $track->sessionusers[$message->userid] = $message->userid; + $track->groupid = $groupId; + $tracks[] = $track; + } + $lasttime = $message->timestamp; + } + } + } + return $tracks; +} //chat_get_session_tracks + +/** +* part of search engine API +* +*/ +function chat_iterator() { + $chatrooms = get_records('chat'); + return $chatrooms; +} //chat_iterator + +/** +* part of search engine API +* +*/ +function chat_get_content_for_index(&$chat) { + $documents = array(); + $course = get_record('course', 'id', $chat->course); + $coursemodule = get_field('modules', 'id', 'name', 'chat'); + $cm = get_record('course_modules', 'course', $course->id, 'module', $coursemodule, 'instance', $chat->id); + $context = get_context_instance(CONTEXT_MODULE, $cm->id); + + // getting records for indexing + $sessionTracks = chat_get_session_tracks($chat->id); + if ($sessionTracks){ + foreach($sessionTracks as $aTrackId => $aTrack) { + foreach($aTrack->sessionusers as $aUserId){ + $user = get_record('user', 'id', $aUserId); + $aTrack->authors = ($user) ? $user->firstname.' '.$user->lastname : '' ; + $documents[] = new ChatTrackSearchDocument(get_object_vars($aTrack), $cm->id, $chat->course, $aTrack->groupid, $context->id); + } + } + } + return $documents; +} //chat_get_content_for_index + +/** +* returns a single data search document based on a chat_session id +* chat session id is a text composite identifier made of : +* - the chat id +* - the timestamp when the session starts +* - the timestamp when the session ends +* @param id the multipart chat session id +* @param itemtype the type of information (session is the only type) +*/ +function chat_single_document($id, $itemtype) { + list($chat_id, $sessionstart, $sessionend) = split('-', $id); + $chat = get_record('chat', 'id', $chat_id); + $course = get_record('course', 'id', $chat->course); + $coursemodule = get_field('modules', 'id', 'name', 'chat'); + $cm = get_record('course_modules', 'course', $course->id, 'module', $coursemodule, 'instance', $chat->id); + $context = get_context_instance(CONTEXT_MODULE, $cm->id); + + // should be only one + $tracks = chat_get_session_tracks($chat->id, $sessionstart, $sessionstart); + if ($tracks){ + $aTrack = $tracks[0]; + $documents[] = new ChatTrackSearchDocument(get_object_vars($aTrack), $cm->id, $chat->course, $aTrack->groupid, $context->id); + } +} //chat_single_document + +/** +* dummy delete function that packs id with itemtype. +* this was here for a reason, but I can't remember it at the moment. +* +*/ +function chat_delete($info, $itemtype) { + $object->id = $info; + $object->itemtype = $itemtype; + return $object; +} //chat_delete + +/** +* returns the var names needed to build a sql query for addition/deletions +* // TODO chat indexable records are virtual. Should proceed in a special way +*/ +function chat_db_names() { + //[primary id], [table name], [time created field name], [time modified field name] + return null; +} //chat_db_names + +/** +* this function handles the access policy to contents indexed as searchable documents. If this +* function does not exist, the search engine assumes access is allowed. +* When this point is reached, we already know that : +* - user is legitimate in the surrounding context +* - user may be guest and guest access is allowed to the module +* - the function may perform local checks within the module information logic +* @param path the access path to the module script code +* @param itemtype the information subclassing (usefull for complex modules, defaults to 'standard') +* @param this_id the item id within the information class denoted by entry_type. In chats, this id +* points out a session history which is a close sequence of messages. +* @param user the user record denoting the user who searches +* @param group_id the current group used by the user when searching +* @return true if access is allowed, false elsewhere +*/ +function chat_check_text_access($path, $itemtype, $this_id, $user, $group_id, $context_id){ + global $CFG; + + include_once("{$CFG->dirroot}/{$path}/lib.php"); + + list($chat_id, $sessionstart, $sessionend) = split('-', $id); + + // get the chat session and all related stuff + $chat = get_record('chat', 'id', $chat_id); + $course = get_record('course', 'id', $chat->course); + $module_context = get_record('context', 'id', $context_id); + $cm = get_record('course_modules', 'id', $module_context->instanceid); + if (!$cm->visible and !has_capability('moodle/course:viewhiddenactivities', $module_context)) return false; + + //group consistency check : checks the following situations about groups + // trap if user is not same group and groups are separated + $current_group = get_current_group($course->id); + if ((groupmode($course) == SEPARATEGROUPS) && !ismember($group_id) && !has_capability('moodle/site:accessallgroups', $module_context)) return false; + + //ownership check : checks the following situations about user + // trap if user is not owner and has cannot see other's entries + // TODO : typically may be stored into indexing cache + if (!has_capability('mod/chat:readlog', $module_context)) return false; + + return true; +} //chat_check_text_access + +/** +* this call back is called when displaying the link for some last post processing +* +*/ +function chat_link_post_processing($title){ + setLocale(LC_TIME, substr(current_language(), 0, 2)); + $title = preg_replace('/TT_(.*)_TT/e', "userdate(\\1)", $title); + return $title; +} //chat_link_post_processing +?> \ No newline at end of file diff --git a/search/documents/data_document.php b/search/documents/data_document.php new file mode 100644 index 0000000000000..afb52901fef94 --- /dev/null +++ b/search/documents/data_document.php @@ -0,0 +1,370 @@ +dirroot/search/documents/document.php"); +require_once("$CFG->dirroot/mod/data/lib.php"); + +/* +* a class for representing searchable information (data records) +* +**/ +class DataSearchDocument extends SearchDocument { + + /** + * constructor + * + */ + public function __construct(&$record, $course_id, $context_id) { + // generic information; required + $doc->docid = $record['id']; + $doc->documenttype = SEARCH_TYPE_DATA; + $doc->itemtype = 'record'; + $doc->contextid = $context_id; + + $doc->title = $record['title']; + $doc->date = $record['timemodified']; + //remove '(ip.ip.ip.ip)' from data record author field + if ($record['userid']){ + $user = get_record('user', 'id', $record['userid']); + } + $doc->author = (isset($user)) ? $user->firstname.' '.$user->lastname : '' ; + $doc->contents = $record['content']; + $doc->url = data_make_link($record['dataid'], $record['id']); + + // module specific information; optional + // $data->params = serialize(@$record['params']); may be useful + $data->database = $record['dataid']; + + // construct the parent class + parent::__construct($doc, $data, $course_id, $record['groupid'], $record['userid'], PATH_FOR_SEARCH_TYPE_DATA); + } //constructor +} //ChatSearchDocument + +/* +* a class for representing searchable information (comments on data records) +* +**/ +class DataCommentSearchDocument extends SearchDocument { + + /** + * constructor + * + */ + public function __construct(&$comment, $course_id, $context_id) { + // generic information; required + $doc->docid = $comment['id']; + $doc->documenttype = SEARCH_TYPE_DATA; + $doc->itemtype = 'comment'; + $doc->contextid = $context_id; + + $doc->title = get_string('commenton', 'search').' '.$comment['title']; + $doc->date = $comment['modified']; + //remove '(ip.ip.ip.ip)' from data record author field + $doc->author = preg_replace('/\(.*?\)/', '', $comment['author']); + $doc->contents = $comment['content']; + $doc->url = data_make_link($data_id, $comment['recordid']); + + // module specific information; optional + $data->database = $comment['dataid']; + + // construct the parent class + parent::__construct($doc, $data, $course_id, $comment['groupid'], $comment['userid'], PATH_FOR_SEARCH_TYPE_DATA); + } //constructor +} //ChatCommentSearchDocument + +/** +* constructs a valid link to a data record content +* @param database_id the database reference +* @param record_id the record reference +* @return a valid url top access the information as a string +*/ +function data_make_link($database_id, $record_id) { + global $CFG; + + return $CFG->wwwroot.'/mod/data/view.php?d='.$database_id.'&rid='.$record_id; +} //data_make_link + +/** +* fetches all the records for a given database +* @param database_id the database +* @param typematch a comma separated list of types that should be considered for searching or * +* @return an array of objects representing the data records. +*/ +function data_get_records($database_id, $typematch = '*') { + global $CFG; + + $fieldset = get_records('data_fields', 'dataid', $database_id); + $query = " + SELECT + c.* + FROM + {$CFG->prefix}data_content as c, + {$CFG->prefix}data_records as r + WHERE + c.recordid = r.id AND + r.dataid = {$database_id} + ORDER BY + c.fieldid + "; + $data = get_records_sql($query); + $records = array(); + if ($data){ + foreach($data as $aDatum){ + if($typematch == '*' || preg_match("/\\b{$fieldset[$aDatum->fieldid]->type}\\b/", $typematch)){ + if (!isset($records[$aDatum->recordid])){ + $records[$aDatum->recordid]['_first'] = $aDatum->content.' '.$aDatum->content1.' '.$aDatum->content2.' '.$aDatum->content3.' '.$aDatum->content4.' '; + } + else{ + $records[$aDatum->recordid][$fieldset[$aDatum->fieldid]->name] = $aDatum->content.' '.$aDatum->content1.' '.$aDatum->content2.' '.$aDatum->content3.' '.$aDatum->content4.' '; + } + } + } + } + return $records; +} //data_get_records + +/** +* fetches all the comments for a given database +* @param database_id the database +* @return an array of objects representing the data record comments. +*/ +function data_get_comments($database_id) { + global $CFG; + + $query = " + SELECT + c.id, + r.groupid, + c.userid, + c.recordid, + c.content, + c.created, + c.modified, + r.dataid + FROM + {$CFG->prefix}data_comments as c, + {$CFG->prefix}data_records as r + WHERE + c.recordid = r.id + "; + $comments = get_records_sql($query); + return $comments; +} //data_get_comments + + +/** +* part of search engine API +* +*/ +function data_iterator() { + $databases = get_records('data'); + return $databases; +} //data_iterator + +/** +* part of search engine API +* @param database the database instance +* @return an array of searchable documents +*/ +function data_get_content_for_index(&$database) { + + $documents = array(); + $recordTitles = array(); + $coursemodule = get_field('modules', 'id', 'name', 'data'); + $cm = get_record('course_modules', 'course', $database->course, 'module', $coursemodule, 'instance', $database->id); + $context = get_context_instance(CONTEXT_MODULE, $cm->id); + + // getting records for indexing + $records_content = data_get_records($database->id, 'text'); + if ($records_content){ + foreach(array_keys($records_content) as $aRecordId) { + + // extract title as first record in order + $first = $records_content[$aRecordId]['_first']; + unset($records_content[$aRecordId]['_first']); + + // concatenates all other texts + foreach($records_content[$aRecordId] as $aField){ + $content = @$content.' '.$aField; + } + if (strlen($content) > 0) { + unset($recordMetaData); + $recordMetaData = get_record('data_records', 'id', $aRecordId); + $recordMetaData->title = $first; + $recordTitles[$aRecordId] = $first; + $recordMetaData->content = $content; + $documents[] = new DataSearchDocument(get_object_vars($recordMetaData), $database->course, $context->id); + } + } + } + + // getting comments for indexing + $records_comments = data_get_comments($database->id); + if ($records_comments){ + foreach($records_comments as $aComment){ + $aComment->title = $recordsTitle[$aComment->recordid]; + $documents[] = new DataCommentSearchDocument(get_object_vars($aComment), $database->course, $context->id); + } + } + return $documents; +} //data_get_content_for_index + +/** +* returns a single data search document based on a data entry id +* @param id the id of the record +* @param the type of the information +* @return a single searchable document +*/ +function data_single_document($id, $itemtype) { + + if ($itemtype == 'record'){ + // get main record + $recordMetaData = get_record('data_records', 'id', $id); + // get context + $record_course = get_field('data', 'course', 'id', $recordMetaData->dataid); + $coursemodule = get_field('modules', 'id', 'name', 'data'); + $cm = get_record('course_modules', 'course', $record_course, 'module', $coursemodule, 'instance', $recordMetaData->dataid); + $context = get_context_instance(CONTEXT_MODULE, $cm->id); + // compute text + $recordData = get_records_select('data_content', "recordid = $id AND type = 'text'", 'recordid'); + $accumulator = ''; + if ($recordData){ + $first = $recordData[0]; + if (count($recordData) > 1){ + $others = array_splice($recordData, 0, 1); + foreach($others as $aDatum){ + $accumulator .= $data->content.' '.$data->content1.' '.$data->content2.' '.$data->content3.' '.$data->content4.' '; + } + } + } + // add extra fields + $recordMetaData->title = $first; + $recordMetaData->content = $accumulator; + // make document + $documents[] = new DataSearchDocument(get_object_vars($recordMetaData), $record_course, $context->id); + } + elseif($itemtype == 'comment'){ + // get main records + $comment = get_record('data_comments', 'id', $id); + $record = get_record('data_records', 'id', $comment->recordid); + // get context + $record_course = get_field('data', 'course', 'id', $record->dataid); + $coursemodule = get_field('modules', 'id', 'name', 'data'); + $cm = get_record('course_modules', 'course', $record_course, 'module', $coursemodule, 'instance', $recordMetaData->dataid); + $context = get_context_instance(CONTEXT_MODULE, $cm->id); + // add extra fields + $comment->title = get_field('search_document', 'title', 'docid', $record->id, 'itemtype', 'record'); + $comment->dataid = $record->dataid; + $comment->groupid = $record->groupid; + // make document + $documents[] = new DataCommentSearchDocument(get_object_vars($comment), $record_course, $context->id); + } + else{ + mtrace('Error : bad or missing item type'); + } +} //data_single_document + +/** +* dummy delete function that packs id with itemtype. +* this was here for a reason, but I can't remember it at the moment. +* +*/ +function data_delete($info, $itemtype) { + $object->id = $info; + $object->itemtype = $itemtype; + return $object; +} //data_delete + +/** +* returns the var names needed to build a sql query for addition/deletions +* +*/ +function data_db_names() { + //[primary id], [table name], [time created field name], [time modified field name] + return array( + array('id', 'data_records', 'timecreated', 'timemodified', 'record'), + array('id', 'data_comments', 'created', 'modified', 'comment') + ); +} //data_db_names + +/** +* this function handles the access policy to contents indexed as searchable documents. If this +* function does not exist, the search engine assumes access is allowed. +* When this point is reached, we already know that : +* - user is legitimate in the surrounding context +* - user may be guest and guest access is allowed to the module +* - the function may perform local checks within the module information logic +* @param path the access path to the module script code +* @param itemtype the information subclassing (usefull for complex modules, defaults to 'standard') +* @param this_id the item id within the information class denoted by itemtype. In databases, this id +* points out an indexed data record page. +* @param user the user record denoting the user who searches +* @param group_id the current group used by the user when searching +* @return true if access is allowed, false elsewhere +*/ +function data_check_text_access($path, $itemtype, $this_id, $user, $group_id, $context_id){ + global $CFG; + + // get the database object and all related stuff + if ($itemtype == 'record'){ + $record = get_record('data_records', 'id', $this_id); + } + elseif($itemtype == 'comment'){ + $comment = get_record('data_comments', 'id', $this_id); + $record = get_record('data_records', 'id', $comment->recordid); + } + else{ + // we do not know what type of information is required + return false; + } + $data = get_record('data', 'id', $record->dataid); + $course = get_record('course', 'id', $data->course); + $module_context = get_record('context', 'id', $context_id); + $cm = get_record('course_modules', 'id', $module_context->instance); + if (!$cm->visible and !has_capability('moodle/course:viewhiddenactivities', $module_context)) return false; + + //group consistency check : checks the following situations about groups + // trap if user is not same group and groups are separated + $current_group = get_current_group($course->id); + if ((groupmode($course) == SEPARATEGROUPS) && !ismember($group_id) && !has_capability('moodle/site:accessallgroups', $module_context)) return false; + + //ownership check : checks the following situations about user + // trap if user is not owner and has cannot see other's entries + if ($itemtype == 'record'){ + if ($user->id != $record->userid && !has_capability('mod/data:viewentry', $module_context) && !has_capability('mod/data:manageentries', $module_context)) return false; + } + + //approval check + // trap if unapproved and has not approval capabilities + // TODO : report a potential capability lack of : mod/data:approve + $approval = get_field('data_records', 'approved', 'id', $record->id); + if (!$approval && !isteacher($data->course) && !has_capability('mod/data:manageentries', $module_context)) return false; + + //minimum records to view check + // trap if too few records + // TODO : report a potential capability lack of : mod/data:viewhiddenentries + $recordsAmount = count_records('data_records', 'dataid', $data->id); + if ($data->requiredentriestoview > $recordsAmount && !isteacher($data->course) && !has_capability('mod/data:manageentries', $module_context)) return false; + + //opening periods check + // trap if user has not capability to see hidden records and date is out of opening range + // TODO : report a potential capability lack of : mod/data:viewhiddenentries + $now = usertime(time()); + if ($data->timeviewfrom > 0) + if ($now < $data->timeviewfrom && !isteacher($data->course) && !has_capability('mod/data:manageentries', $module_context)) return false; + if ($data->timeviewto > 0) + if ($now > $data->timeviewto && !isteacher($data->course) && !has_capability('mod/data:manageentries', $module_context)) return false; + + return true; +} // data_check_text_access +?> \ No newline at end of file diff --git a/search/documents/document.php b/search/documents/document.php index 466302fdc70b4..205a2ac18a2f9 100644 --- a/search/documents/document.php +++ b/search/documents/document.php @@ -1,24 +1,58 @@ addField(Zend_Search_Lucene_Field::Keyword('docid', $doc->docid)); - $this->addField(Zend_Search_Lucene_Field::Text('title', $doc->title)); - $this->addField(Zend_Search_Lucene_Field::Text('author', $doc->author)); - $this->addField(Zend_Search_Lucene_Field::UnStored('contents', $doc->contents)); - $this->addField(Zend_Search_Lucene_Field::UnIndexed('url', $doc->url)); - $this->addField(Zend_Search_Lucene_Field::UnIndexed('date', $doc->date)); - - //additional data added on a per-module basis - $this->addField(Zend_Search_Lucene_Field::Binary('data', serialize($data))); - - $this->addField(Zend_Search_Lucene_Field::Keyword('doctype', $document_type)); - $this->addField(Zend_Search_Lucene_Field::Keyword('course_id', $course_id)); - $this->addField(Zend_Search_Lucene_Field::Keyword('group_id', $group_id)); +abstract class SearchDocument extends Zend_Search_Lucene_Document { + public function __construct(&$doc, &$data, $course_id, $group_id, $user_id, $path) { + //document identification and indexing + $this->addField(Zend_Search_Lucene_Field::Keyword('docid', $doc->docid)); + //document type : the name of the Moodle element that manages it + $this->addField(Zend_Search_Lucene_Field::Keyword('doctype', $doc->documenttype)); + //allows subclassing information from complex modules. + $this->addField(Zend_Search_Lucene_Field::Keyword('itemtype', $doc->itemtype)); + //caches the course context. + $this->addField(Zend_Search_Lucene_Field::Keyword('course_id', $course_id)); + //caches the originator's group. + $this->addField(Zend_Search_Lucene_Field::Keyword('group_id', $group_id)); + //caches the originator if any + $this->addField(Zend_Search_Lucene_Field::Keyword('user_id', $user_id)); + // caches the context of this information. i-e, the context in which this information + // is being produced/attached. Speeds up the "check for access" process as context in + // which the information resides (a course, a module, a block, the site) is stable. + $this->addField(Zend_Search_Lucene_Field::UnIndexed('context_id', $doc->contextid)); + + //data for document + $this->addField(Zend_Search_Lucene_Field::Text('title', $doc->title)); + $this->addField(Zend_Search_Lucene_Field::Text('author', $doc->author)); + $this->addField(Zend_Search_Lucene_Field::UnStored('contents', $doc->contents)); + $this->addField(Zend_Search_Lucene_Field::UnIndexed('url', $doc->url)); + $this->addField(Zend_Search_Lucene_Field::UnIndexed('date', $doc->date)); + + //additional data added on a per-module basis + $this->addField(Zend_Search_Lucene_Field::Binary('data', serialize($data))); + + // adding a path allows the document to know where to find specific library calls + // for checking access to a module or block content. The Lucene records should only + // be responsible to bring back to that call sufficient and consistent information + // in order to perform the check. + $this->addField(Zend_Search_Lucene_Field::UnIndexed('path', $path)); + /* + // adding a capability set required for viewing. -1 if no capability required. + // the capability required for viewing is depending on the local situation + // of the document. each module should provide this information when pushing + // out search document structure. Although capability model should be kept flat + // there is no exclusion some module or block developpers use logical combinations + // of multiple capabilities in their code. This possibility should be left open here. + $this->addField(Zend_Search_Lucene_Field::UnIndexed('capabilities', $caps)); + */ } //constructor - } //SearchDocument +} //SearchDocument ?> \ No newline at end of file diff --git a/search/documents/forum_document.php b/search/documents/forum_document.php index e447a53f44c21..a3b13d40a9277 100644 --- a/search/documents/forum_document.php +++ b/search/documents/forum_document.php @@ -1,135 +1,269 @@ dirroot/search/documents/document.php"); +require_once("$CFG->dirroot/mod/forum/lib.php"); - require_once("$CFG->dirroot/search/documents/document.php"); - require_once("$CFG->dirroot/mod/forum/lib.php"); +/* +* a class for representing searchable information +* +**/ +class ForumSearchDocument extends SearchDocument { - class ForumSearchDocument extends SearchDocument { - public function __construct(&$post, $forum_id, $course_id, $group_id) { - // generic information - $doc->docid = $post['id']; - $doc->title = $post['subject']; - $doc->author = $post['firstname']." ".$post['lastname']; - $doc->contents = $post['message']; - $doc->date = $post['created']; + /** + * constructor + * + */ + public function __construct(&$post, $forum_id, $course_id, $itemtype, $context_id) { + // generic information + $doc->docid = $post['id']; + $doc->documenttype = SEARCH_TYPE_FORUM; + $doc->itemtype = $itemtype; + $doc->contextid = $context_id; - $doc->url = forum_make_link($post['discussion'], $post['id']); - - // module specific information - $data->forum = $forum_id; - $data->discussion = $post['discussion']; - - parent::__construct($doc, $data, SEARCH_TYPE_FORUM, $course_id, $group_id); + $doc->title = $post['subject']; + $doc->author = $post['firstname']." ".$post['lastname']; + $doc->contents = $post['message']; + $doc->date = $post['created']; + $doc->url = forum_make_link($post['discussion'], $post['id']); + + // module specific information + $data->forum = $forum_id; + $data->discussion = $post['discussion']; + + parent::__construct($doc, $data, $course_id, $post['groupid'], $post['userid'], PATH_FOR_SEARCH_TYPE_FORUM); } //constructor - } //ForumSearchDocument +} //ForumSearchDocument - function forum_make_link($discussion_id, $post_id) { +/** +* constructs a valid link to a chat content +* @param discussion_id the discussion +* @param post_id the id of a single post +* @return a well formed link to forum message display +*/ +function forum_make_link($discussion_id, $post_id) { global $CFG; + return $CFG->wwwroot.'/mod/forum/discuss.php?d='.$discussion_id.'#'.$post_id; - } //forum_make_link - - function forum_iterator() { - //no @ = Undefined index: 82 in moodle/lib/datalib.php on line 2671 - return @get_all_instances_in_courses("forum", get_courses()); - } //forum_iterator - - function forum_get_content_for_index(&$forum) { - $documents = array(); - if (!$forum) return $documents; - - $posts = forum_get_discussions_fast($forum->id); - if (!$posts) return $documents; - - while (!$posts->EOF) { - $post = $posts->fields; - - if (is_array($post)) { - if (strlen($post['message']) > 0 && ($post['deleted'] != 1)) { - $documents[] = new ForumSearchDocument($post, $forum->id, $forum->course, $post['groupid']); - } //if +} //forum_make_link - if ($children = forum_get_child_posts_fast($post['id'], $forum->id)) { - while (!$children->EOF) { - $child = $children->fields; +/** +* search standard API +* +*/ +function forum_iterator() { + $forums = get_records('forum'); + return $forums; +} //forum_iterator - if (strlen($child['message']) > 0 && ($child['deleted'] != 1)) { - $documents[] = new ForumSearchDocument($child, $forum->id, $forum->course, $post['groupid']); - } //if +/** +* search standard API +* @param forum a forum instance +* @return an array of searchable documents +*/ +function forum_get_content_for_index(&$forum) { - $children->MoveNext(); - } //foreach - } //if - } //if + $documents = array(); + if (!$forum) return $documents; - $posts->MoveNext(); - } //foreach + $posts = forum_get_discussions_fast($forum->id); + if (!$posts) return $documents; - return $documents; - } //forum_get_content_for_index + $coursemodule = get_field('modules', 'id', 'name', 'forum'); + $cm = get_record('course_modules', 'course', $forum->course, 'module', $coursemodule, 'instance', $forum->id); + $context = get_context_instance(CONTEXT_MODULE, $cm->id); - //returns a single forum search document based on a forum_entry id - function forum_single_document($id) { - $posts = get_recordset('forum_posts', 'id', $id); - $post = $posts->fields; + foreach($posts as $aPost) { + $aPost->itemtype = 'head'; + if ($aPost) { + if (strlen($aPost->message) > 0) { + $documents[] = new ForumSearchDocument(get_object_vars($aPost), $forum->id, $forum->course, 'head', $context->id); + } + if ($children = forum_get_child_posts_fast($aPost->id, $forum->id)) { + foreach($children as $aChild) { + $aChild->itemtype = 'post'; + if (strlen($aChild->message) > 0) { + $documents[] = new ForumSearchDocument(get_object_vars($child), $forum->id, $forum->course, 'post', $context->id); + } + } + } + } + } + return $documents; +} //forum_get_content_for_index - $discussions = get_recordset('forum_discussions', 'id', $post['discussion']); - $discussion = $discussions->fields; +/** +* returns a single forum search document based on a forum entry id +* @param id an id for a single information stub +* @param itemtype the type of information +*/ +function forum_single_document($id, $itemtype) { - $forums = get_recordset('forum', 'id', $discussion['forum']); - $forum = $forums->fields; + // both known item types are posts so get them the same way + $post = get_record('forum_posts', 'id', $id); + $discussion = get_record('forum_discussions', 'id', $post->discussion); + $coursemodule = get_field('modules', 'id', 'name', 'forum'); + $cm = get_record('course_modules', 'course', $discussion->course, 'module', $coursemodule, 'instance', $discussion->forum); + $context = get_context_instance(CONTEXT_MODULE, $cm->id); + return new ForumSearchDocument(get_object_vars($post), $discussion->forum, $discussion->course, $itemtype, $context->id); +} //forum_single_document - return new ForumSearchDocument($post, $forum['id'], $forum['course'], $post['groupid']); - } //forum_single_document +/** +* dummy delete function that aggregates id with itemtype. +* this was here for a reason, but I can't remember it at the moment. +* +*/ +function forum_delete($info, $itemtype) { + $object->id = $info; + $object->itemtype = $itemtype; + return $object; +} //forum_delete - function forum_delete($info) { - return $info; - } //forum_delete - - //returns the var names needed to build a sql query for addition/deletions - function forum_db_names() { +/** +* returns the var names needed to build a sql query for addition/deletions +* +*/ +function forum_db_names() { //[primary id], [table name], [time created field name], [time modified field name] - return array('id', 'forum_posts', 'created', 'modified'); - } //forum_db_names + return array( + array('id', 'forum_posts', 'created', 'modified', 'head', 'parent = 0'), + array('id', 'forum_posts', 'created', 'modified', 'post', 'parent != 0') + ); +} //forum_db_names - //reworked faster version from /mod/forum/lib.php - function forum_get_discussions_fast($forum) { +/** +* reworked faster version from /mod/forum/lib.php +* @param forum_id a forum identifier +* @return an array of posts +*/ +function forum_get_discussions_fast($forum_id) { global $CFG, $USER; - + $timelimit=''; - if (!empty($CFG->forum_enabletimedposts)) { - if (!((isadmin() and !empty($CFG->admineditalways)) || isteacher(get_field('forum', 'course', 'id', $forum)))) { - $now = time(); - $timelimit = " AND ((d.timestart = 0 OR d.timestart <= '$now') AND (d.timeend = 0 OR d.timeend > '$now')"; - if (!empty($USER->id)) { - $timelimit .= " OR d.userid = '$USER->id'"; + if (!((isadmin() and !empty($CFG->admineditalways)) || isteacher(get_field('forum', 'course', 'id', $forum_id)))) { + $now = time(); + $timelimit = " AND ((d.timestart = 0 OR d.timestart <= '$now') AND (d.timeend = 0 OR d.timeend > '$now')"; + if (!empty($USER->id)) { + $timelimit .= " OR d.userid = '$USER->id'"; + } + $timelimit .= ')'; } - $timelimit .= ')'; - } } + + $query = " + SELECT + p.id, + p.subject, + p.discussion, + p.message, + p.created, + d.groupid, + p.userid, + u.firstname, + u.lastname + FROM + {$CFG->prefix}forum_discussions d + JOIN + {$CFG->prefix}forum_posts p + ON + p.discussion = d.id + JOIN + {$CFG->prefix}user u + ON + p.userid = u.id + WHERE + d.forum = '{$forum_id}' AND + p.parent = 0 + $timelimit + ORDER BY + d.timemodified DESC + "; + return get_records_sql($query); +} //forum_get_discussions_fast - return get_recordset_sql("SELECT p.id, p.subject, p.discussion, p.message, - p.deleted, d.groupid, u.firstname, u.lastname - FROM {$CFG->prefix}forum_discussions d - JOIN {$CFG->prefix}forum_posts p ON p.discussion = d.id - JOIN {$CFG->prefix}user u ON p.userid = u.id - WHERE d.forum = '$forum' - AND p.parent = 0 - $timelimit - ORDER BY d.timemodified DESC"); - } //forum_get_discussions_fast - - //reworked faster version from /mod/forum/lib.php - function forum_get_child_posts_fast($parent, $forumid) { +/** +* reworked faster version from /mod/forum/lib.php +* @param parent the id of the first post within the discussion +* @param forum_id the forum identifier +* @return an array of posts +*/ +function forum_get_child_posts_fast($parent, $forum_id) { global $CFG; + + $query = " + SELECT + p.id, + p.subject, + p.discussion, + p.message, + p.created, + {$forum_id} AS forum, + p.userid, + u.firstname, + u.lastname + FROM + {$CFG->prefix}forum_posts p + LEFT JOIN + {$CFG->prefix}user u + ON + p.userid = u.id + WHERE + p.parent = '{$parent}' + ORDER BY + p.created ASC + "; + return get_records_sql($query); +} //forum_get_child_posts_fast + +/** +* this function handles the access policy to contents indexed as searchable documents. If this +* function does not exist, the search engine assumes access is allowed. +* When this point is reached, we already know that : +* - user is legitimate in the surrounding context +* - user may be guest and guest access is allowed to the module +* - the function may perform local checks within the module information logic +* @param path the access path to the module script code +* @param itemtype the information subclassing (usefull for complex modules, defaults to 'standard') +* @param this_id the item id within the information class denoted by itemtype. In forums, this id +* points out the individual post. +* @param user the user record denoting the user who searches +* @param group_id the current group used by the user when searching +* @return true if access is allowed, false elsewhere +*/ +function forum_check_text_access($path, $itemtype, $this_id, $user, $group_id){ + global $CFG; + + include_once("{$CFG->dirroot}/{$path}/lib.php"); + + // get the glossary object and all related stuff + $post = get_record('forum_posts', 'id', $this_id); + $dicussion = get_record('forum_discussion', 'id', $post->discussion); + $course = get_record('course', 'id', $discussion->course); + $context_module = get_record('context', 'id', $context_id); + $cm = get_record('course_modules', 'id', $context_module->instanceid); + if (!$cm->visible and !has_capability('moodle/course:viewhiddenactivities', $context_module)) return false; + + // approval check : entries should be approved for being viewed, or belongs to the user + if (!$post->mailed && !has_capability('mod/forum:viewhiddentimeposts')) return false; - return get_recordset_sql("SELECT p.id, p.subject, p.discussion, p.message, p.deleted, - $forumid AS forum, u.firstname, u.lastname - FROM {$CFG->prefix}forum_posts p - LEFT JOIN {$CFG->prefix}user u ON p.userid = u.id - WHERE p.parent = '$parent' - ORDER BY p.created ASC"); - } //forum_get_child_posts_fast + // group check : entries should be in accessible groups + $current_group = get_current_group($course->id); + if ((groupmode($course, $cm) == SEPARATEGROUPS) && ($group_id != $current_group) && !has_capability('mod/forum:viewdiscussionsfromallgroups')) return false; + + return true; +} //forum_check_text_access ?> \ No newline at end of file diff --git a/search/documents/glossary_document.php b/search/documents/glossary_document.php index bfa692544be5a..da9e5e78fad2c 100644 --- a/search/documents/glossary_document.php +++ b/search/documents/glossary_document.php @@ -1,88 +1,235 @@ dirroot/search/documents/document.php"); - - class GlossarySearchDocument extends SearchDocument { - public function __construct(&$entry, $glossary_id, $course_id, $group_id) { - // generic information; required - $doc->docid = $entry['id']; - $doc->title = $entry['concept']; - $doc->date = $entry['timecreated']; - - $user = get_recordset('user', 'id', $entry['userid'])->fields; - - $doc->author = $user['firstname'].' '.$user['lastname']; - $doc->contents = $entry['definition']; - $doc->url = glossary_make_link($entry['id']); - - // module specific information; optional - $data->glossary = $glossary_id; - - // construct the parent class - parent::__construct($doc, $data, SEARCH_TYPE_GLOSSARY, $course_id, $group_id); +/** +* Global Search Engine for Moodle +* Michael Champanis (mchampan) [cynnical@gmail.com] +* review 1.8+ : Valery Fremaux [valery.fremaux@club-internet.fr] +* 2007/08/02 +* +* document handling for glossary activity module +* This file contains a mapping between a glossary entry and it's indexable counterpart, +* +* Functions for iterating and retrieving the necessary records are now also included +* in this file, rather than mod/glossary/lib.php +**/ + +require_once("$CFG->dirroot/search/documents/document.php"); + +/* +* a class for representing searchable information +* +**/ +class GlossarySearchDocument extends SearchDocument { + + /** + * document constructor + * + */ + public function __construct(&$entry, $course_id, $context_id) { + // generic information; required + $doc->docid = $entry['id']; + $doc->documenttype = SEARCH_TYPE_GLOSSARY; + $doc->itemtype = 'standard'; + $doc->contextid = $context_id; + + $doc->title = $entry['concept']; + $doc->date = $entry['timecreated']; + + if ($entry['userid']) + $user = get_record('user', 'id', $entry['userid']); + $doc->author = ($user ) ? $user->firstname.' '.$user->lastname : '' ; + $doc->contents = strip_tags($entry['definition']); + $doc->url = glossary_make_link($entry['id']); + + // module specific information; optional + $data->glossary = $entry['glossaryid']; + + // construct the parent class + parent::__construct($doc, $data, $course_id, -1, $entry['userid'], PATH_FOR_SEARCH_TYPE_GLOSSARY); } //constructor - } //GlossarySearchDocument - - function glossary_make_link($entry_id) { +} //GlossarySearchDocument + +/* +* a class for representing searchable information +* +**/ +class GlossaryCommentSearchDocument extends SearchDocument { + + /** + * document constructor + * + */ + public function __construct(&$entry, $glossary_id, $course_id, $context_id) { + // generic information; required + $doc->docid = $entry['id']; + $doc->documenttype = SEARCH_TYPE_GLOSSARY; + $doc->itemtype = 'comment'; + $doc->contextid = $context_id; + + $doc->title = get_string('commenton', 'search') . ' ' . $entry['concept']; + $doc->date = $entry['timemodified']; + + if ($entry['userid']) + $user = get_record('user', 'id', $entry['userid']); + $doc->author = ($user ) ? $user->firstname.' '.$user->lastname : '' ; + $doc->contents = strip_tags($entry['entrycomment']); + $doc->url = glossary_make_link($entry['entryid']); + + // module specific information; optional + $data->glossary = $glossary_id; + + // construct the parent class + parent::__construct($doc, $data, $course_id, -1, $entry['userid'], PATH_FOR_SEARCH_TYPE_GLOSSARY); + } //constructor +} //GlossaryCommentSearchDocument + +/** +* constructs valid access links to information +* @param entry_id the id of the glossary entry +* @return a full featured link element as a string +*/ +function glossary_make_link($entry_id) { global $CFG; //links directly to entry - //return $CFG->wwwroot.'/mod/glossary/showentry.php?eid='.$entry_id; + // return $CFG->wwwroot.'/mod/glossary/showentry.php?eid='.$entry_id; - //preserve glossary pop-up, be careful where you place your ' and "s + // TOO LONG URL + // Suggestion : bounce on popup within the glossarie's showentry page + // preserve glossary pop-up, be careful where you place your ' and "s //this function is meant to return a url that is placed between href='[url here]' - return "$CFG->wwwroot/mod/glossary/showentry.php?eid=$entry_id' onclick='return openpopup(\"/mod/glossary/showentry.php?eid=$entry_id\", \"entry\", \"menubar=0,location=0,scrollbars,resizable,width=600,height=450\", 0);"; - } //glossary_make_link + return "$CFG->wwwroot/mod/glossary/showentry.php?eid=$entry_id' onclick='return openpopup(\"/mod/glossary/showentry.php?eid=$entry_id\", \"entry\", DEFAULT_POPUP_SETTINGS, 0);"; +} //glossary_make_link + +/** +* part of search engine API +* +*/ +function glossary_iterator() { + $glossaries = get_records('glossary'); + return $glossaries; +} //glossary_iterator + +/** +* part of search engine API +* @glossary a glossary instance +* @return an array of searchable documents +*/ +function glossary_get_content_for_index(&$glossary) { + + // get context + $coursemodule = get_field('modules', 'id', 'name', 'glossary'); + $cm = get_record('course_modules', 'course', $glossary->course, 'module', $coursemodule, 'instance', $glossary->id); + $context = get_context_instance(CONTEXT_MODULE, $cm->id); - function glossary_iterator() { - return get_all_instances_in_courses("glossary", get_courses()); - } //glossary_iterator - - function glossary_get_content_for_index(&$glossary) { $documents = array(); - - $entries = get_recordset('glossary_entries', 'glossaryid', $glossary->id); - - while (!$entries->EOF) { - $entry = $entries->fields; - - if ($entry and strlen($entry['definition']) > 0) { - $documents[] = new GlossarySearchDocument($entry, $glossary->id, $glossary->course, -1); - } //if - - $entries->MoveNext(); - } //foreach - + $entryIds = array(); + // index entries + $entries = get_records('glossary_entries', 'glossaryid', $glossary->id); + if ($entries){ + foreach($entries as $entry) { + $concepts[$entry->id] = $entry->concept; + if (strlen($entry->definition) > 0) { + $entryIds[] = $entry->id; + $documents[] = new GlossarySearchDocument(get_object_vars($entry), $glossary->course, $context->id); + } + } + } + + // index comments + if (count($entryIds)){ + $entryIdList = implode(',', $entryIds); + $comments = get_records_list('glossary_comments', 'entryid', $entryIdList); + if ($comments){ + foreach($comments as $comment) { + if (strlen($comment->entrycomment) > 0) { + $comment->concept = $concepts[$comment->entryid]; + $documents[] = new GlossaryCommentSearchDocument(get_object_vars($comment), $glossary->id, $glossary->course, $context->id); + } + } + } + } return $documents; - } //glossary_get_content_for_index - - //returns a single glossary search document based on a glossary_entry id - function glossary_single_document($id) { - $entries = get_recordset('glossary_entries', 'id', $id); - $entry = $entries->fields; - - $glossaries = get_recordset('glossary', 'id', $entry['glossaryid']); - $glossary = $glossaries->fields; - - return new GlossarySearchDocument($entry, $entry['glossaryid'], $glossary['course'], -1); - } //glossary_single_document - - //dummy delete function that converts docid from the search table to itself.. - //this was here for a reason, but I can't remember it at the moment. - function glossary_delete($info) { - return $info; - } //glossary_delete - - //returns the var names needed to build a sql query for addition/deletions - function glossary_db_names() { +} //glossary_get_content_for_index + +/** +* part of search engine API +* @param id the glossary entry identifier +* @itemtype the type of information +* @return a single search document based on a glossary entry +*/ +function glossary_single_document($id, $itemtype) { + if ($itemtype == 'standard'){ + $entry = get_record('glossary_entries', 'id', $id); + } + elseif ($itemtype == 'comment'){ + $comment = get_record('glossary_comments', 'id', $id); + $entry = get_record('glossary_entries', 'id', $comment->entryid); + } + $glossary_course = get_field('glossary', 'course', 'id', $entry->glossaryid); + $coursemodule = get_field('modules', 'id', 'name', 'glossary'); + $cm = get_record('course_modules', 'course', $glossary_course, 'module', $coursemodule, 'instance', $entry->glossaryid); + $context = get_context_instance(CONTEXT_MODULE, $cm->id); + if ($itemtype == 'standard'){ + return new GlossarySearchDocument(get_object_vars($entry), $glossary_course, $context->id); + } + elseif ($itemtype == 'comment'){ + return new GlossaryCommentSearchDocument(get_object_vars($comment), $entry->glossaryid, $glossary_course, $context->id); + } +} //glossary_single_document + +/** +* dummy delete function that packs id with itemtype. +* this was here for a reason, but I can't remember it at the moment. +* +*/ +function glossary_delete($info, $itemtype) { + $object->id = $info; + $object->itemtype = $itemtype; + return $object; +} //glossary_delete + +/** +* returns the var names needed to build a sql query for addition/deletions +* +*/ +function glossary_db_names() { //[primary id], [table name], [time created field name], [time modified field name] - return array('id', 'glossary_entries', 'timecreated', 'timemodified'); - } //glossary_db_names + return array( + array('id', 'glossary_entries', 'timecreated', 'timemodified', 'standard'), + array('id', 'glossary_comments', 'timemodified', 'timemodified', 'comment') + ); +} //glossary_db_names + +/** +* this function handles the access policy to contents indexed as searchable documents. If this +* function does not exist, the search engine assumes access is allowed. +* When this point is reached, we already know that : +* - user is legitimate in the surrounding context +* - user may be guest and guest access is allowed to the module +* - the function may perform local checks within the module information logic +* @param path the access path to the module script code +* @param itemtype the information subclassing (usefull for complex modules, defaults to 'standard') +* @param this_id the item id within the information class denoted by itemtype. In glossaries, this id +* points out the indexed glossary item. +* @param user the user record denoting the user who searches +* @param group_id the current group used by the user when searching +* @return true if access is allowed, false elsewhere +*/ +function glossary_check_text_access($path, $itemtype, $this_id, $user, $group_id, $context_id){ + global $CFG; + + // get the glossary object and all related stuff + $entry = get_record('glossary_entries', 'id', $id); + $glossary = get_record('glossary', 'id', $entry->glossaryid); + $course = get_record('course', 'id', $glossary->course); + $module_context = get_record('context', 'id', $context_id); + $cm = get_record('course_modules', 'id', $module_context->instance); + if (!$cm->visible && !has_capability('moodle/course:viewhiddenactivities', $module_context)) return false; + + //approval check : entries should be approved for being viewed, or belongs to the user unless the viewer can approve them or manage them + if (!$entry->approved && $user != $entry->userid && !has_capability('mod/glossary:approve', $module_context) && !has_capability('mod/glossary:manageentries', $module_context)) return false; + + return true; +} //glossary_check_text_access ?> \ No newline at end of file diff --git a/search/documents/physical_doc.php b/search/documents/physical_doc.php new file mode 100644 index 0000000000000..3260451f5a7dd --- /dev/null +++ b/search/documents/physical_doc.php @@ -0,0 +1,47 @@ +id)) return; + + // just call pdftotext over stdout and capture the output + if (!empty($CFG->block_search_word_to_text_cmd)){ + if (!file_exists("{$CFG->dirroot}/{$CFG->block_search_word_to_text_cmd}")){ + mtrace('Error with MSWord to text converter command : exectuable not found.'); + } + else{ + $file = $CFG->dataroot.'/'.$resource->course.'/'.$resource->reference; + $text_converter_cmd = "{$CFG->dirroot}/{$CFG->block_search_word_to_text_cmd} $file"; + if ($CFG->block_search_word_to_text_env){ + putenv($CFG->block_search_word_to_text_env); + } + $result = shell_exec($text_converter_cmd); + if ($result){ + return mb_convert_encoding($result, 'UTF8', 'auto'); + } + else{ + mtrace('Error with MSWord to text converter command : execution failed.'); + return ''; + } + } + } + else { + mtrace('Error with MSWord to text converter command : command not set up. Execute once search block configuration.'); + return ''; + } +} +?> \ No newline at end of file diff --git a/search/documents/physical_htm.php b/search/documents/physical_htm.php new file mode 100644 index 0000000000000..256dd365fc361 --- /dev/null +++ b/search/documents/physical_htm.php @@ -0,0 +1,39 @@ +id)) return; + + // just get text + $text = implode('', file("{$CFG->dataroot}/{$resource->course}/($resource->reference)")); + + // extract keywords and other interesting meta information and put it back as real content for indexing + if (preg_match('/(.*)]*)>(.*)/is',$text, $matches)){ + $prefix = $matches[1]; + $meta_attributes = $matches[2]; + $suffix = $matches{3]; + if (preg_match('/name="(keywords|description)"/i', $attributes)){ + preg_match('/content="[^"]+"/i', $attributes, $matches); + $text = $prefix.' '.$matches[1].' '.$suffix; + } + } + // filter all html tags + // $text = clean_text($text, FORMAT_PLAIN); + // NOTE : this is done in ResourceSearchDocument __constructor + + if (!empty($CFG->block_search_limit_index_body)){ + $text = shorten($text, $CFG->block_search_limit_index_body); + } + return $text; +} +?> \ No newline at end of file diff --git a/search/documents/physical_html.php b/search/documents/physical_html.php new file mode 100644 index 0000000000000..8455e709b40f3 --- /dev/null +++ b/search/documents/physical_html.php @@ -0,0 +1,17 @@ + \ No newline at end of file diff --git a/search/documents/physical_pdf.php b/search/documents/physical_pdf.php new file mode 100644 index 0000000000000..12765b06863e3 --- /dev/null +++ b/search/documents/physical_pdf.php @@ -0,0 +1,41 @@ +id)) return; + + // just call pdftotext over stdout and capture the output + if (!empty($CFG->block_search_pdf_to_text_cmd)){ + preg_match("/^\S+/", $CFG->block_search_pdf_to_text_cmd, $matches); + if (!file_exists("{$CFG->dirroot}/{$matches[0]}")){ + mtrace('Error with pdf to text converter command : exectuable not found.'); + } + else{ + $file = $CFG->dataroot.'/'.$resource->course.'/'.$resource->reference; + $text_converter_cmd = "{$CFG->dirroot}/{$CFG->block_search_pdf_to_text_cmd} $file -"; + $result = shell_exec($text_converter_cmd); + if ($result){ + return $result; + } + else{ + mtrace('Error with pdf to text converter command : execution failed.'); + return ''; + } + } + } + else { + mtrace('Error with pdf to text converter command : command not set up. Execute once search block configuration.'); + return ''; + } +} +?> \ No newline at end of file diff --git a/search/documents/physical_ppt.php b/search/documents/physical_ppt.php new file mode 100644 index 0000000000000..c9ceb55e08253 --- /dev/null +++ b/search/documents/physical_ppt.php @@ -0,0 +1,80 @@ +id)) return; + + $text = implode('', file("{$CFG->dataroot}/{$resource->course}/{$resource->reference}")); + + $remains = $text; + $fragments = array(); + while (preg_match('/\x00\x9F\x0F\x04.{9}(......)(.*)/s', $remains, $matches)){ + $unpacked = unpack("ncode/Llength", $matches[1]); + $sequencecode = $unpacked['code']; + $length = $unpacked['length']; + // print "length : ".$length." ; segment type : ".sprintf("%x", $sequencecode)."
"; + $followup = $matches[2]; + // local system encoding sequence + if ($sequencecode == 0xA80F){ + $aFragment = substr($followup, 0, $length); + $remains = substr($followup, $length); + $fragments[] = $aFragment; + } + // denotes unicode encoded sequence + elseif ($sequencecode == 0xA00F){ + $aFragment = substr($followup, 0, $length); + // $aFragment = mb_convert_encoding($aFragment, 'UTF-16', 'UTF-8'); + $aFragment = preg_replace('/\xA0\x00\x19\x20/s', "'", $aFragment); // some quotes + $aFragment = preg_replace('/\x00/s', "", $aFragment); + $remains = substr($followup, $length); + $fragments[] = $aFragment; + } + else{ + $remains = $followup; + } + } + $indextext = implode(' ', $fragments); + $indextext = preg_replace('/\x19\x20/', "'", $indextext); // some quotes + $indextext = preg_replace('/\x09/', '', $indextext); // some extra chars + $indextext = preg_replace('/\x0D/', "\n", $indextext); // some quotes + $indextext = preg_replace('/\x0A/', "\n", $indextext); // some quotes + $indextextprint = implode('
', $fragments); + + $logppt = fopen("C:/php5/logs/pptlog", "w"); + fwrite($logppt, $indextext); + fclose($logppt); + + if (!empty($CFG->block_search_limit_index_body)){ + $indextext = shorten($text, $CFG->block_search_limit_index_body); + } + + $indextext = mb_convert_encoding($indextext, 'UTF8', 'auto'); + return $indextext; +} +?> \ No newline at end of file diff --git a/search/documents/physical_txt.php b/search/documents/physical_txt.php new file mode 100644 index 0000000000000..1ef3fd8d379ee --- /dev/null +++ b/search/documents/physical_txt.php @@ -0,0 +1,24 @@ +id)) return; + + // just try to get text empirically from ppt binary flow + $text = implode('', file("{$CFG->dataroot}/{$resource->course}/{$resource->reference}")); + if (!empty($CFG->block_search_limit_index_body)){ + $text = shorten($text, $CFG->block_search_limit_index_body); + } + return $text; +} +?> \ No newline at end of file diff --git a/search/documents/physical_xml.php b/search/documents/physical_xml.php new file mode 100644 index 0000000000000..64162a252b01e --- /dev/null +++ b/search/documents/physical_xml.php @@ -0,0 +1,28 @@ +id)) return; + + // just get text + $text = implode('', file("{$CFG->dataroot}/{$resource->course}/($resource->reference)")); + + // filter out all xml tags + $text = preg_replace("/<[^>]*>/", ' ', $text); + + if (!empty($CFG->block_search_limit_index_body)){ + $text = shorten($text, $CFG->block_search_limit_index_body); + } + return $text; +} +?> \ No newline at end of file diff --git a/search/documents/resource_document.php b/search/documents/resource_document.php index bf1f6dd44dc7a..3bc782b0f29e4 100644 --- a/search/documents/resource_document.php +++ b/search/documents/resource_document.php @@ -1,86 +1,307 @@ dirroot/search/documents/document.php"); +require_once("$CFG->dirroot/search/documents/document.php"); +require_once("$CFG->dirroot/mod/resource/lib.php"); - class ResourceSearchDocument extends SearchDocument { - public function __construct(&$resource) { - // generic information; required - $doc->docid = $resource['id']; - $doc->title = strip_tags($resource['name']); - $doc->date = $resource['timemodified']; +/* +* a class for representing searchable information +* +**/ +class ResourceSearchDocument extends SearchDocument { + public function __construct(&$resource, $context_id) { + // generic information; required + $doc->docid = $resource['trueid']; + $doc->documenttype = SEARCH_TYPE_RESOURCE; + $doc->itemtype = $resource['type']; + $doc->contextid = $context_id; - $doc->author = ''; - $doc->contents = strip_tags($resource['summary']).' '.strip_tags($resource['alltext']); - $doc->url = resource_make_link($resource['id']); - - // module specific information; optional - $data = array(); - - // construct the parent class - parent::__construct($doc, $data, SEARCH_TYPE_RESOURCE, $resource['course'], -1); + $doc->title = strip_tags($resource['name']); + $doc->date = $resource['timemodified']; + $doc->author = ''; + $doc->contents = strip_tags($resource['summary']).' '.strip_tags($resource['alltext']); + $doc->url = resource_make_link($resource['id']); + + // module specific information; optional + $data = array(); + + // construct the parent class + parent::__construct($doc, $data, $resource['course'], 0, 0, PATH_FOR_SEARCH_TYPE_RESOURCE); } //constructor - } //ResourceSearchDocument +} //ResourceSearchDocument - function resource_make_link($resource_id) { +/** +* constructs valid access links to information +* @param resourceId the of the resource +* @return a full featured link element as a string +*/ +function resource_make_link($resource_id) { global $CFG; - return $CFG->wwwroot.'/mod/resource/view.php?r='.$resource_id; - } //resource_make_link + + return $CFG->wwwroot.'/mod/resource/view.php?id='.$resource_id; +} //resource_make_link - function resource_iterator() { +/** +* part of standard API +* +*/ +function resource_iterator() { //trick to leave search indexer functionality intact, but allow //this document to only use the below function to return info //to be searched return array(true); } //resource_iterator - //this function does not need a content iterator, returns all the info - //itself; remember to fake the iterator array though - function resource_get_content_for_index(&$notneeded) { +/** +* part of standard API +* this function does not need a content iterator, returns all the info +* itself; +* @param notneeded to comply API, remember to fake the iterator array though +* @return an array of searchable documents +*/ +function resource_get_content_for_index(&$notneeded) { + global $CFG; + + // starting with Moodle native resources $documents = array(); + $query = " + SELECT + id as trueid, + r.* + FROM + {$CFG->prefix}resource as r + WHERE + alltext != '' AND + alltext != ' ' AND + alltext != ' ' AND + type != 'file' + "; + $resources = get_records_sql($query); - $resources = get_recordset_sql('SELECT * - FROM {$CFG->prefix}resource - WHERE alltext NOT LIKE "" - AND alltext NOT LIKE " " - AND alltext NOT LIKE " " - AND TYPE != "file"'); + foreach($resources as $aResource){ + $coursemodule = get_field('modules', 'id', 'name', 'resource'); + $cm = get_record('course_modules', 'course', $aResource->course, 'module', $coursemodule, 'instance', $aResource->id); + $context = get_context_instance(CONTEXT_MODULE, $cm->id); + $aResource->id = $cm->id; + $documents[] = new ResourceSearchDocument(get_object_vars($aResource), $context->id); + mtrace("finished $aResource->name"); + } - while (!$resources->EOF) { - $resource = $resources->fields; + // special physical files handling + /** + * this sequence searches for a compatible physical stream handler for getting a text + * equivalence for the content. + * + */ + if (@$CFG->block_search_enable_file_indexing){ + $query = " + SELECT + r.id as trueid, + cm.id as id, + r.course as course, + r.name as name, + r.summary as summary, + r.alltext as alltext, + r.reference as reference, + r.type as type, + r.timemodified as timemodified + FROM + {$CFG->prefix}resource as r, + {$CFG->prefix}course_modules as cm, + {$CFG->prefix}modules as m + WHERE + r.type = 'file' AND + cm.instance = r.id AND + cm.course = r.course AND + cm.module = m.id AND + m.name = 'resource' + "; + $resources = get_records_sql($query); + + // invokes external content extractor if exists. + foreach($resources as $aResource){ + // fetches a physical indexable document and adds it to documents passed by ref + $coursemodule = get_field('modules', 'id', 'name', 'resource'); + $cm = get_record('course_modules', 'id', $aResource->id); + $context = get_context_instance(CONTEXT_MODULE, $cm->id); + resource_get_physical_file($aResource, $context->id, false, $documents); + } + } + return $documents; +} //resource_get_content_for_index - if ($resource) { - $documents[] = new ResourceSearchDocument($resource); - } //if +/** +* get text from a physical file +* @param resource a resource for which to fetch some representative text +* @param getsingle if true, returns a single search document, elsewhere return the array +* given as documents increased by one +* @param documents the array of documents, by ref, where to add the new document. +* @return a search document when unique or false. +*/ +function resource_get_physical_file(&$resource, $context_id, $getsingle, &$documents = null){ + global $CFG; + + // cannot index empty references + if (empty($resource->reference)) return false; - $resources->MoveNext(); - } //foreach + // cannot index remote resources + if (resource_is_url($resource->reference)){ + mtrace("Cannot index remote URLs."); + return false; + } - return $documents; - } //resource_get_content_for_index + $fileparts = pathinfo($resource->reference); + // cannot index unknown or masked types + if (empty($fileparts['extension'])) { + return false; + } + + // cannot index non existent file + $file = "{$CFG->dataroot}/{$resource->course}/{$resource->reference}"; + if (!file_exists($file)){ + mtrace("Missing resource file $file : will not be indexed."); + return false; + } + + $ext = strtolower($fileparts['extension']); - //returns a single resource search document based on a resource_entry id - function resource_single_document($id) { - $resources = get_recordset_sql('SELECT * - FROM {$CFG->prefix}resource - WHERE alltext NOT LIKE "" - AND alltext NOT LIKE " " - AND alltext NOT LIKE " " - AND TYPE != "file", - AND id = '.$id); + // cannot index unallowed or unhandled types + if (!preg_match("/\b$ext\b/i", $CFG->block_search_filetypes)) { + mtrace($fileparts['extension'] . ' is not an allowed extension for indexing'); + return false; + } + if (file_exists($CFG->dirroot.'/search/documents/physical_'.$ext.'.php')){ + include_once($CFG->dirroot.'/search/documents/physical_'.$ext.'.php'); + $function_name = 'get_text_for_indexing_'.$ext; + $resource->alltext = $function_name($resource); + if (!empty($resource->alltext)){ + if ($getsingle){ + return new ResourceSearchDocument(get_object_vars($resource)); + } + else{ + $documents[] = new ResourceSearchDocument(get_object_vars($resource), $context_id); + } + mtrace("finished file $resource->name as {$resource->reference}"); + } + } + else{ + mtrace("fulltext handler not found for $ext type"); + } + return false; +} - $resource = $resources->fields; +/** +* part of standard API. +* returns a single resource search document based on a resource_entry id +* @param id the id of the accessible document +* @return a searchable object or null if failure +*/ +function resource_single_document($id, $itemtype) { + global $CFG; + + // rewriting with legacy moodle databse API + $query = " + SELECT + r.id as trueid, + cm.id as id, + r.course as course, + r.name as name, + r.summary as summary, + r.alltext as alltext, + r.reference as reference, + r.type as type, + r.timemodified as timemodified + FROM + {$CFG->prefix}resource as r, + {$CFG->prefix}course_modules as cm, + {$CFG->prefix}modules as m + WHERE + cm.instance = r.id AND + cm.course = r.course AND + cm.module = m.id AND + m.name = 'resource' AND + ((r.type != 'file' AND + r.alltext != '' AND + r.alltext != ' ' AND + r.alltext != ' ') OR + r.type = 'file') AND + r.id = '{$id}' + "; + $resource = get_record_sql($query); - return new ResourceSearchDocument($resource); - } //resource_single_document + if ($resource){ + $coursemodule = get_field('modules', 'id', 'name', 'resource'); + $cm = get_record('course_modules', 'id', $resource->id); + $context = get_context_instance(CONTEXT_MODULE, $cm->id); + if ($resource->type == 'file' && @$CFG->block_search_enable_file_indexing){ + $document = resource_get_physical_file($resource, true, $context->id); + if (!$document) mtrace("Warning : this document {$resource->name} will not be indexed"); + return $document; + } + else{ + return new ResourceSearchDocument(get_object_vars($resource), $context->id); + } + } + mtrace("null resource"); + return null; +} //resource_single_document - function resource_delete($info) { - return $info; - } //resource_delete +/** +* dummy delete function that aggregates id with itemtype. +* this was here for a reason, but I can't remember it at the moment. +* +*/ +function resource_delete($info, $itemtype) { + $object->id = $info; + $object->itemtype = $itemtype; + return $object; +} //resource_delete - //returns the var names needed to build a sql query for addition/deletions - function resource_db_names() { +/** +* returns the var names needed to build a sql query for addition/deletions +* +*/ +function resource_db_names() { //[primary id], [table name], [time created field name], [time modified field name], [additional where conditions for sql] - return array('id', 'resource', 'timemodified', 'timemodified', "WHERE alltext NOT LIKE '' AND alltext NOT LIKE ' ' AND alltext NOT LIKE ' ' AND TYPE != 'file'"); - } //resource_db_names + return array(array('id', 'resource', 'timemodified', 'timemodified', '*', " (alltext != '' AND alltext != ' ' AND alltext != ' ' AND TYPE != 'file') OR TYPE = 'file' ")); +} //resource_db_names + +/** +* this function handles the access policy to contents indexed as searchable documents. If this +* function does not exist, the search engine assumes access is allowed. +* @param path the access path to the module script code +* @param itemtype the information subclassing (usefull for complex modules, defaults to 'standard') +* @param this_id the item id within the information class denoted by itemtype. In resources, this id +* points to the resource record and not to the module that shows it. +* @param user the user record denoting the user who searches +* @param group_id the current group used by the user when searching +* @return true if access is allowed, false elsewhere +*/ +function resource_check_text_access($path, $itemtype, $this_id, $user, $group_id, $context_id){ + global $CFG; + + include_once("{$CFG->dirroot}/{$path}/lib.php"); + + $r = get_record('resource', 'id', $this_id); + $module_context = get_record('context', 'id', $context_id); + $cm = get_record('course_modules', 'id', $module_context->instance); + //check if found course module is visible + if (!$cm->visible and !has_capability('moodle/course:viewhiddenactivities', $module_context)){ + return false; + } + + return true; +} //resource_check_text_access ?> \ No newline at end of file diff --git a/search/documents/techproject_document.php b/search/documents/techproject_document.php new file mode 100644 index 0000000000000..b1f2bc5231f08 --- /dev/null +++ b/search/documents/techproject_document.php @@ -0,0 +1,278 @@ +dirroot/search/documents/document.php"); +require_once("$CFG->dirroot/mod/techproject/lib.php"); + +/** +* a class for representing searchable information +* +*/ +class TechprojectEntrySearchDocument extends SearchDocument { + + /** + * constructor + * + */ + public function __construct(&$entry, $course_id, $context_id) { + // generic information + $doc->docid = $entry['id']; + $doc->documenttype = SEARCH_TYPE_TECHPROJECT; + $doc->itemtype = $entry['entry_type']; + $doc->contextid = $context_id; + + + $doc->title = $entry['abstract']; + $doc->author = ($entry['userid']) ? $entry['author'] : ''; + $doc->contents = strip_tags($entry['description']); + $doc->date = ''; + + $doc->url = techproject_make_link($entry['projectid'], $entry['id'], $entry['entry_type'], $entry['groupid']); + + // module specific information + $data->techproject = $entry['projectid']; + + parent::__construct($doc, $data, $course_id, $entry['groupid'], $entry['userid'], PATH_FOR_SEARCH_TYPE_TECHPROJECT); + } //constructor +} //TechprojectEntrySearchDocument + +/** +* constructs a valid link to a description detail +* +*/ +function techproject_make_link($techproject_id, $entry_id, $entry_type, $group_id) { + global $CFG; + return $CFG->wwwroot.'/mod/techproject/view.php?view=view_detail&id='.$techproject_id.'&objectId='.$entry_id.'&objectClass='.$entry_type.'&group='.$group_id; +} //techproject_make_link + +/** +* search standard API +* +*/ +function techproject_iterator() { + $techprojects = get_records('techproject'); + return $techprojects; +} //techproject_iterator + +/** +* search standard API +* @param techproject a techproject instance +* @return an array of collected searchable documents +*/ +function techproject_get_content_for_index(&$techproject) { + $documents = array(); + if (!$techproject) return $documents; + + $requirements = techproject_get_entries($techproject->id, 'requirement'); + $specifications = techproject_get_entries($techproject->id, 'specification'); + $tasks = techproject_get_tasks($techproject->id); + $milestones = techproject_get_entries($techproject->id, 'milestone'); + $deliverables = techproject_get_entries($techproject->id, 'deliverable'); + $coursemodule = get_field('modules', 'id', 'name', 'techproject'); + $cm = get_record('course_modules', 'course', $techproject->course, 'module', $coursemodule, 'instance', $techproject->id); + $context = get_context_instance(CONTEXT_MODULE, $cm->id); + + $entries = array_merge($requirements, $specifications, $milestones, $deliverables); + foreach($entries as $anEntry) { + if ($anEntry) { + if (strlen($anEntry->description) > 0) { + $documents[] = new TechprojectEntrySearchDocument(get_object_vars($anEntry), $techproject->course, $context->id); + } + } + } + + foreach($tasks as $aTask) { + if ($aTask) { + if (strlen($aTask->description) > 0) { + if ($aTask->assignee){ + $user = get_record('user', 'id', $aTask->assignee); + $aTask->author = $user->firstname.' '.$user->lastname; + } + $documents[] = new TechprojectEntrySearchDocument(get_object_vars($aTask), $techproject->course, $context->id); + } + } + } + return $documents; +} //techproject_get_content_for_index + +/** +* returns a single techproject search document based on a techproject_entry id and itemtype +* +*/ +function techproject_single_document($id, $itemtype) { + switch ($itemtype){ + case 'requirement':{ + $entry = get_record('techproject_requirement', 'id', $id); + break; + } + case 'specification':{ + $entry = get_record('techproject_specification', 'id', $id); + break; + } + case 'milestone':{ + $entry = get_record('techproject_milestone', 'id', $id); + break; + } + case 'deliverable':{ + $entry = get_record('techproject_deliverable', 'id', $id); + break; + } + case 'task':{ + $entry = get_record('techproject_task', 'id', $id); + if ($entry->assignee){ + $user = get_record('user', 'id', $entry->assignee); + $entry->author = $user->firstname.' '.$user->lastname; + } + break; + } + } + $techprojet_course = get_field('techproject', 'course', 'id', $entry->projectid); + $coursemodule = get_field('modules', 'id', 'name', 'techproject'); + $cm = get_record('course_modules', 'course', $techproject_course, 'module', $coursemodule, 'instance', $entry->projectid); + $context = get_context_instance(CONTEXT_MODULE, $cm->id); + $entry->type = $itemtype; + $techproject = get_record('techproject', 'id', $requirement->projectid); + return new TechprojectEntrySearchDocument(get_object_vars($anEntry), $techproject->course, $context->id); +} //techproject_single_document + +/** +* dummy delete function that packs id with itemtype. +* this was here for a reason, but I can't remember it at the moment. +* +*/ +function techproject_delete($info, $itemtype) { + $object->id = $info; + $object->itemtype = $itemtype; + return $object; +} //techproject_delete + +/** +* returns the var names needed to build a sql query for addition/deletions +* +*/ +// TODO : what should we do there ? +function techproject_db_names() { + //[primary id], [table name], [time created field name], [time modified field name] + return array( + array('id', 'techproject_requirement', 'created', 'modified', 'requirement'), + array('id', 'techproject_specification', 'created', 'modified', 'specification'), + array('id', 'techproject_task', 'created', 'modified', 'task'), + array('id', 'techproject_milestone', 'created', 'modified', 'milestone'), + array('id', 'techproject_deliverable', 'created', 'modified', 'deliverable') + ); +} //techproject_db_names + +/** +* get a complete list of entries of one particular type +* @param techprojectId the project instance +* @param type the entity type +* @return an array of records +*/ +function techproject_get_entries($techproject_id, $type) { + global $CFG; + + $query = " + SELECT + e.id, + e.abstract, + e.description, + e.projectid, + e.groupid, + e.userid, + '$type' AS entry_type + FROM + {$CFG->prefix}techproject_{$type} AS e + WHERE + e.projectid = '{$techproject_id}' + "; + return get_records_sql($query); +} //techproject_get_entries + +/** +* get the task list for a project instance +* @param techprojectId the project +* @return an array of records that represent tasks +*/ +function techproject_get_tasks($techproject_id) { + global $CFG; + + $query = " + SELECT + t.id, + t.abstract, + t.description, + t.projectid, + t.groupid, + t.owner as userid, + u.firstname, + u.lastname, + 'task' as entry_type + FROM + {$CFG->prefix}techproject_task AS t + LEFT JOIN + {$CFG->prefix}user AS u + ON + t.owner = u.id + WHERE + t.projectid = '{$techproject_id}' + ORDER BY + t.taskstart ASC + "; + return get_records_sql($query); +} //techproject_get_tasks + +/** +* this function handles the access policy to contents indexed as searchable documents. If this +* function does not exist, the search engine assumes access is allowed. +* When this point is reached, we already know that : +* - user is legitimate in the surrounding context +* - user may be guest and guest access is allowed to the module +* - the function may perform local checks within the module information logic +* @param path the access path to the module script code +* @param entry_type the information subclassing (usefull for complex modules, defaults to 'standard') +* @param this_id the item id within the information class denoted by entry_type. In techprojects, this id +* points to the techproject instance in which all resources are indexed. +* @param user the user record denoting the user who searches +* @param group_id the current group used by the user when searching +* @return true if access is allowed, false elsewhere +*/ +function techproject_check_text_access($path, $entry_type, $this_id, $user, $group_id, $context_id){ + global $CFG; + + include_once("{$CFG->dirroot}/{$path}/lib.php"); + + // get the techproject object and all related stuff + $techproject = get_record('techproject', 'id', $this_id); + $course = get_record('course', 'id', $techproject->course); + $module_context = get_record('context', 'id', $context_id); + $cm = get_record('course_modules', 'id', $module_context->instance); + if (!$cm->visible and !has_capability('moodle/course:viewhiddenactivities', $module_context)) return false; + + //group consistency check : checks the following situations about groups + // if user is guest check access capabilities for guests : + // guests can see default project, and other records if groups are liberal + // TODO : change guestsallowed in a capability + if (isguest() && $techproject->guestsallowed){ + if ($group_id && groupmode($course, $cm) == SEPARATEGROUPS) + return false; + return true; + } + + // trap if user is not same group and groups are separated + $current_group = get_current_group($course->id); + if ((groupmode($course) == SEPARATEGROUPS) && $group_id != $current_group && $group_id) return false; + + //trap if ungroupedsees is off in strict access mode and user is not teacher + if ((groupmode($course) == SEPARATEGROUPS) && !$techproject->ungroupedsees && !$group_id && isteacher($user->id)) return false; + + return true; +} //techproject_check_text_access + +?> \ No newline at end of file diff --git a/search/documents/wiki_document.php b/search/documents/wiki_document.php index ea1662e143776..145111c03f12e 100644 --- a/search/documents/wiki_document.php +++ b/search/documents/wiki_document.php @@ -1,158 +1,246 @@ title = wikipage->pagename - * - * Functions for iterating and retrieving the necessary records are now also included - * in this file, rather than mod/wiki/lib.php - * */ - - require_once("$CFG->dirroot/search/documents/document.php"); - require_once("$CFG->dirroot/mod/wiki/lib.php"); - - /* All the $doc->___ fields are required by the base document class! - * Each and every module that requires search functionality must correctly - * map their internal fields to the five $doc fields (id, title, author, contents - * and url). Any module specific data can be added to the $data object, which is - * serialised into a binary field in the index. - * */ - class WikiSearchDocument extends SearchDocument { - public function __construct(&$page, $wiki_id, $course_id, $group_id) { - // generic information; required - $doc->docid = $page->id; - $doc->title = $page->pagename; - $doc->date = $page->timemodified; - - //remove '(ip.ip.ip.ip)' from wiki author field - $doc->author = preg_replace('/\(.*?\)/', '', $page->author); - $doc->contents = $page->content; - $doc->url = wiki_make_link($wiki_id, $page->pagename, $page->version); - - // module specific information; optional - $data->version = $page->version; - $data->wiki = $wiki_id; - - // construct the parent class - parent::__construct($doc, $data, SEARCH_TYPE_WIKI, $course_id, $group_id); +/** +* Global Search Engine for Moodle +* Michael Champanis (mchampan) [cynnical@gmail.com] +* review 1.8+ : Valery Fremaux [valery.fremaux@club-internet.fr] +* 2007/08/02 +* +* document handling for wiki activity module +* This file contains the mapping between a wiki page and it's indexable counterpart, +* e.g. searchdocument->title = wikipage->pagename +* +* Functions for iterating and retrieving the necessary records are now also included +* in this file, rather than mod/wiki/lib.php +**/ + +require_once("$CFG->dirroot/search/documents/document.php"); +require_once("$CFG->dirroot/mod/wiki/lib.php"); + +/* +* All the $doc->___ fields are required by the base document class! +* Each and every module that requires search functionality must correctly +* map their internal fields to the five $doc fields (id, title, author, contents +* and url). Any module specific data can be added to the $data object, which is +* serialised into a binary field in the index. +**/ +class WikiSearchDocument extends SearchDocument { + public function __construct(&$page, $wiki_id, $course_id, $group_id, $user_id, $context_id) { + // generic information; required + $doc->docid = $page['id']; + $doc->documenttype = SEARCH_TYPE_WIKI; + $doc->itemtype = 'standard'; + $doc->contextid = $context_id; + + $doc->title = $page['pagename']; + $doc->date = $page['timemodified']; + //remove '(ip.ip.ip.ip)' from wiki author field + $doc->author = preg_replace('/\(.*?\)/', '', $page['author']); + $doc->contents = $page['content']; + $doc->url = wiki_make_link($wiki_id, $page['pagename'], $page['version']); + + // module specific information; optional + $data->version = $page['version']; + $data->wiki = $wiki_id; + + // construct the parent class + parent::__construct($doc, $data, $course_id, $group_id, $user_id, PATH_FOR_SEARCH_TYPE_WIKI); } //constructor - } //WikiSearchDocument - - function wiki_name_convert($str) { +} //WikiSearchDocument + +/** +* converts a page name to cope Wiki constraints. Transforms spaces in plus. +* @param str the name to convert +* @return the converted name +*/ +function wiki_name_convert($str) { return str_replace(' ', '+', $str); - } //wiki_name_convert - - function wiki_make_link($wiki_id, $title, $version) { +} //wiki_name_convert + +/** +* constructs a valid link to a wiki content +* @param wikiId +* @param title +* @param version +*/ +function wiki_make_link($wikiId, $title, $version) { global $CFG; - return $CFG->wwwroot.'/mod/wiki/view.php?wid='.$wiki_id.'&page='.wiki_name_convert($title).'&version='.$version; - } //wiki_make_link - //rescued and converted from ewikimoodlelib.php - //retrieves latest version of a page - function wiki_get_latest_page(&$entry, $pagename, $version=0) { + return $CFG->wwwroot.'/mod/wiki/view.php?wid='.$wikiId.'&page='.wiki_name_convert($title).'&version='.$version; +} //wiki_make_link + +/** +* rescued and converted from ewikimoodlelib.php +* retrieves latest version of a page +* @param entry the wiki object as a reference +* @param pagename the name of the page known by the wiki engine +* @param version +*/ +function wiki_get_latest_page(&$entry, $pagename, $version = 0) { $pagename = "'".addslashes($pagename)."'"; - + if ($version > 0 and is_int($version)) { - $version = "AND (version=$version)"; + $version = "AND (version=$version)"; } else { - $version = ''; - } //else - + $version = ''; + } + $select = "(pagename=$pagename) AND wiki=".$entry->id." $version "; $sort = 'version DESC'; - + //change this to recordset_select, as per http://docs.moodle.org/en/Datalib_Notes if ($result_arr = get_records_select('wiki_pages', $select, $sort, '*', 0, 1)) { - foreach ($result_arr as $obj) { - $result_obj = $obj; - } //foreach - } //if - + foreach ($result_arr as $obj) { + $result_obj = $obj; + } + } + if (isset($result_obj)) { - $result_obj->meta = @unserialize($result_obj->meta); - return $result_obj; + $result_obj->meta = @unserialize($result_obj->meta); + return $result_obj; } else { - return false; - } //else - } //wiki_get_latest_page - - //fetches all pages, including old versions - function wiki_get_pages(&$entry) { + return false; + } +} //wiki_get_latest_page + +/** +* fetches all pages, including old versions +* @param entry the wiki object as a reference +* @return an array of record objects that represents pages of this wiki object +*/ +function wiki_get_pages(&$entry) { return get_records('wiki_pages', 'wiki', $entry->id); - } //wiki_get_pages - - //fetches all the latest versions of all the pages - function wiki_get_latest_pages(&$entry) { - //== (My)SQL for this - /* select * from wiki_pages - inner join - (select wiki_pages.pagename, max(wiki_pages.version) as ver - from wiki_pages group by pagename) as a - on ((wiki_pages.version = a.ver) and - (wiki_pages.pagename like a.pagename)) */ +} //wiki_get_pages + +/** +* fetches all the latest versions of all the pages +* +*/ +function wiki_get_latest_pages(&$entry) { + //== (My)SQL for this + /* select * from wiki_pages + inner join + (select wiki_pages.pagename, max(wiki_pages.version) as ver + from wiki_pages group by pagename) as a + on ((wiki_pages.version = a.ver) and + (wiki_pages.pagename like a.pagename)) */ $pages = array(); - + //http://moodle.org/bugs/bug.php?op=show&bugid=5877&pos=0 - //if ($ids = get_records('wiki_pages', 'wiki', $entry->id, '', 'distinct pagename')) { - if ($rs = get_recordset('wiki_pages', 'wiki', $entry->id, '', 'distinct pagename')) { - $ids = $rs->GetRows(); - //-- - foreach ($ids as $id) { - $pages[] = wiki_get_latest_page($entry, $id[0]); - } //foreach - } else { - return false; - } //else - + if ($ids = get_records('wiki_pages', 'wiki', $entry->id, '', 'distinct pagename')) { + if ($pagesets = get_records('wiki_pages', 'wiki', $entry->id, '', 'distinct pagename')) { + foreach ($pagesets as $aPageset) { + $pages[] = wiki_get_latest_page($entry, $aPageset->id); + } + } else { + return false; + } + } return $pages; - } //wiki_get_latest_pages - - function wiki_iterator() { - return get_all_instances_in_courses("wiki", get_courses()); - } //wiki_iterator +} //wiki_get_latest_pages + +/** +* part of search engine API +* +*/ +function wiki_iterator() { + $wikis = get_records('wiki'); + return $wikis; +} //wiki_iterator + +/** +* part of search engine API +* @param wiki a wiki instance +* @return an array of searchable deocuments +*/ +function wiki_get_content_for_index(&$wiki) { - function wiki_get_content_for_index(&$wiki) { $documents = array(); - $entries = wiki_get_entries($wiki); foreach($entries as $entry) { - //all pages - //$pages = wiki_get_pages($entry); - - //latest pages - $pages = wiki_get_latest_pages($entry); - - if (is_array($pages)) { - foreach($pages as $page) { - if (strlen($page->content) > 0) { - $documents[] = new WikiSearchDocument($page, $entry->wikiid, $entry->course, $entry->groupid); - } //if - } //foreach - } //if - } //foreach - + $coursemodule = get_field('modules', 'id', 'name', 'wiki'); + $cm = get_record('course_modules', 'course', $entry->course, 'module', $coursemodule, 'instance', $entry->wikiid); + $context = get_context_instance(CONTEXT_MODULE, $cm->id); + + //all pages + //$pages = wiki_get_pages($entry); + + //latest pages + $pages = wiki_get_latest_pages($entry); + if (is_array($pages)) { + foreach($pages as $page) { + if (strlen($page->content) > 0) { + $documents[] = new WikiSearchDocument(get_object_vars($page), $entry->wikiid, $entry->course, $entry->groupid, $page->userid, $context->id); + } + } + } + } return $documents; - } //wiki_get_content_for_index - - //returns a single wiki search document based on a wiki_entry id - function wiki_single_document($id) { - $pages = get_recordset('wiki_pages', 'id', $id); - $page = $pages->fields; - - $entries = get_recordset('wiki_entries', 'id', $page['wiki']); - $entry = $entries->fields; - - return new WikiSearchDocument($page, $entry['wikiid'], $entry['course'], $entry['groupid']); - } //wiki_single_document - - function wiki_delete($info) { - return $info; - } //wiki_delete - - //returns the var names needed to build a sql query for addition/deletions - function wiki_db_names() { +} //wiki_get_content_for_index + +/** +* returns a single wiki search document based on a wiki_entry id +* @param id the id of the wiki +* @param itemtype the type of information (standard) +* @retuen a searchable document +*/ +function wiki_single_document($id, $itemtype) { + $page = get_record('wiki_pages', 'id', $id); + $entry = get_record('wiki_entries', 'id', $page->wiki); + $coursemodule = get_field('modules', 'id', 'name', 'wiki'); + $cm = get_record('course_modules', 'course', $entry->course, 'module', $coursemodule, 'instance', $entry->wikiid); + $context = get_context_instance(CONTEXT_MODULE, $cm->id); + return new WikiSearchDocument(get_object_vars($page), $entry->wikiid, $entry->course, $entry->groupid, $page->userid, $context->id); +} //wiki_single_document + +/** +* dummy delete function that packs id with itemtype. +* this was here for a reason, but I can't remember it at the moment. +* +*/ +function wiki_delete($info, $itemtype) { + $object->id = $info; + $object->itemtype = $itemtype; + return $object; +} //wiki_delete + +//returns the var names needed to build a sql query for addition/deletions +function wiki_db_names() { //[primary id], [table name], [time created field name], [time modified field name] - return array('id', 'wiki_pages', 'created', 'lastmodified'); - } //wiki_db_names - + return array(array('id', 'wiki_pages', 'created', 'lastmodified', 'standard')); +} //wiki_db_names + +/** +* this function handles the access policy to contents indexed as searchable documents. If this +* function does not exist, the search engine assumes access is allowed. +* When this point is reached, we already know that : +* - user is legitimate in the surrounding context +* - user may be guest and guest access is allowed to the module +* - the function may perform local checks within the module information logic +* @param path the access path to the module script code +* @param itemtype the information subclassing (usefull for complex modules, defaults to 'standard') +* @param this_id the item id within the information class denoted by itemtype. In wikies, this id +* points out the indexed wiki page. +* @param user the user record denoting the user who searches +* @param group_id the current group used by the user when searching +* @return true if access is allowed, false elsewhere +*/ +function wiki_check_text_access($path, $itemtype, $this_id, $user, $group_id, $context_id){ + global $CFG; + + // get the wiki object and all related stuff + $page = get_record('wiki_pages', 'id', $id); + $entry = get_record('wiki_entries', 'id', $page->wiki); + $course = get_record('course', 'id', $entry->course); + $module_context = get_record('context', 'id', $context_id); + $cm = get_record('course_modules', 'id', $module_context->instance); + if (!$cm->visible and !has_capability('moodle/course:viewhiddenactivities', $module_context)) return false; + + //group consistency check : checks the following situations about groups + // trap if user is not same group and groups are separated + $current_group = get_current_group($course->id); + if ((groupmode($course) == SEPARATEGROUPS) && $group_id != $current_group && !has_capability('moodle/site:accessallgroups', $module_context)) return false; + + return true; +} //wiki_check_text_access ?> \ No newline at end of file diff --git a/search/index.php b/search/index.php index 3bf5495bc8de2..264892e33df34 100644 --- a/search/index.php +++ b/search/index.php @@ -1,8 +1,9 @@ \ No newline at end of file diff --git a/search/indexer.php b/search/indexer.php index c1e18abbb6793..bd231d7ae2676 100644 --- a/search/indexer.php +++ b/search/indexer.php @@ -1,176 +1,197 @@ dirroot/search/lib.php"); - - //only administrators can index the moodle installation, because access to all pages is required - require_login(); - - if (empty($CFG->enableglobalsearch)) { - error('Global searching is not enabled.'); - } - - if (!isadmin()) { - error("You need to be an admin user to use this page.", "$CFG->wwwroot/login/index.php"); - } //if - - //confirmation flag to prevent accidental reindexing (indexersplash.php is the correct entry point) - $sure = strtolower(optional_param('areyousure', '', PARAM_ALPHA)); - - if ($sure != 'yes') { +/** +* Global Search Engine for Moodle +* Michael Champanis (mchampan) [cynnical@gmail.com] +* review 1.8+ : Valery Fremaux [valery.fremaux@club-internet.fr] +* 2007/08/02 +* +* The indexer logic - +* +* Look through each installed module's or block's search document class file (/search/documents) +* for necessary search functions, and if they're present add the content to the index. +* Repeat this for blocks. +* +* Because the iterator/retrieval functions are now stored in /search/documents/_document.php, +* /mod/mod/lib.php doesn't have to be modified - and thus the search module becomes quite +* self-sufficient. URL's are now stored in the index, stopping us from needing to require +* the class files to generate a results page. +* +* Along with the index data, each document's summary gets stored in the database +* and synchronised to the index (flat file) via the primary key ('id') which is mapped +* to the 'dbid' field in the index +* */ + +//this'll take some time, set up the environment +@set_time_limit(0); +@ob_implicit_flush(true); +@ob_end_flush(); + +require_once('../config.php'); +require_once("$CFG->dirroot/search/lib.php"); + +//only administrators can index the moodle installation, because access to all pages is required +require_login(); + +if (empty($CFG->enableglobalsearch)) { + error(get_string('globalsearchdisabled', 'search')); +} + +if (!isadmin()) { + error(get_string('beadmin', 'search'), "$CFG->wwwroot/login/index.php"); +} //if + +//confirmation flag to prevent accidental reindexing (indexersplash.php is the correct entry point) +$sure = strtolower(optional_param('areyousure', '', PARAM_ALPHA)); + +if ($sure != 'yes') { mtrace("
Sorry, you need to confirm indexing via indexersplash.php"
           .". (Back to query page).
"); exit(0); - } //if +} //if - //check for php5 (lib.php) - if (!search_check_php5()) { +//check for php5 (lib.php) +if (!search_check_php5()) { $phpversion = phpversion(); mtrace("Sorry, global search requires PHP 5.0.0 or later (currently using version $phpversion)"); exit(0); - } //if +} - //php5 found, continue including php5-only files - //require_once("$CFG->dirroot/search/Zend/Search/Lucene.php"); - require_once("$CFG->dirroot/search/indexlib.php"); +//php5 found, continue including php5-only files +//require_once("$CFG->dirroot/search/Zend/Search/Lucene.php"); +require_once("$CFG->dirroot/search/indexlib.php"); - mtrace('
Server Time: '.date('r',time())."\n");
+mtrace('');
+mtrace('
Server Time: '.date('r',time())."\n");
 
-  if ($CFG->search_indexer_busy == '1') {
+if ($CFG->search_indexer_busy == '1') {
     //means indexing was not finished previously
     mtrace("Warning: Indexing was not successfully completed last time, restarting.\n");
-  } //if
+}
 
-  //turn on busy flag
-  set_config('search_indexer_busy', '1');
+//turn on busy flag
+set_config('search_indexer_busy', '1');
 
-  //paths
-  $index_path = SEARCH_INDEX_PATH;
-  $index_db_file = "$CFG->dirroot/search/db/$CFG->dbtype.sql";
-  $dbcontrol = new IndexDBControl();
+//paths
+$index_path = SEARCH_INDEX_PATH;
+$index_db_file = "{$CFG->dirroot}/search/db/$CFG->dbtype.sql";
+$dbcontrol = new IndexDBControl();
 
-  //setup directory in data root
-  if (!file_exists($index_path)) {
+//setup directory in data root
+if (!file_exists($index_path)) {
     mtrace("Data directory ($index_path) does not exist, attempting to create.");
     if (!mkdir($index_path)) {
-      search_pexit("Error creating data directory at: $index_path. Please correct.");
-    } else {
-      mtrace("Directory successfully created.");
-    } //else
-  } else {
+        search_pexit("Error creating data directory at: $index_path. Please correct.");
+    } 
+    else {
+        mtrace("Directory successfully created.");
+    } 
+} 
+else {
     mtrace("Using $index_path as data directory.");
-  } //else
+} 
 
-  $index = new Zend_Search_Lucene($index_path, true);
+$index = new Zend_Search_Lucene($index_path, true);
 
-  if (!$dbcontrol->checkDB()) {
+if (!$dbcontrol->checkDB()) {
     search_pexit("Database error. Please check settings/files.");
-  } //if
-
-  //begin timer
-  search_stopwatch();
-  mtrace("Starting activity modules\n");
-
-  //the presence of the required search functions -
-  // * mod_iterator
-  // * mod_get_content_for_index
-  //are the sole basis for including a module in the index at the moment.
-
-  if ($mods = get_records_select('modules' /*'index this module?' where statement*/)) {
-    //add virtual modules onto the back of the array
-    $mods = array_merge($mods, search_get_additional_modules());
-
-    foreach ($mods as $mod) {
-      $class_file = $CFG->dirroot.'/search/documents/'.$mod->name.'_document.php';
-
-      if (file_exists($class_file)) {
-        include_once($class_file);
-
-        //build function names
-        $iter_function = $mod->name.'_iterator';
-        $index_function = $mod->name.'_get_content_for_index';
-
-        $counter = 0;
-        $doc = new stdClass;
-
-        if (function_exists($index_function) && function_exists($iter_function)) {
-          mtrace("Processing module function $index_function ...");
-
-          foreach ($iter_function() as $i) {
-            $documents = $index_function($i);
-
-            //begin transaction
-
-            foreach($documents as $document) {
-              $counter++;
-
-              //object to insert into db
-              $dbid = $dbcontrol->addDocument($document);
-
-              //synchronise db with index
-              $document->addField(Zend_Search_Lucene_Field::Keyword('dbid', $dbid));
-
-              //add document to index
-              $index->addDocument($document);
-
-              //commit every x new documents, and print a status message
-              if (($counter%2000) == 0) {
+}
+
+//begin timer
+search_stopwatch();
+mtrace("Starting activity modules\n");
+
+//the presence of the required search functions -
+// * mod_iterator
+// * mod_get_content_for_index
+//are the sole basis for including a module in the index at the moment.
+$searchables = array();
+
+// collects modules
+if ($mods = get_records('modules', '', '', '', 'id,name')) {
+    $searchables = array_merge($searchables, $mods);
+}
+mtrace(count($searchables).' modules found.');
+  
+// collects blocks as indexable information may be found in blocks either
+if ($blocks = get_records('block', '', '', '', 'id,name')) {
+    // prepend the "block_" prefix to discriminate document type plugins
+    foreach(array_keys($blocks) as $aBlockId){
+        $blocks[$aBlockId]->name = 'block_'.$blocks[$aBlockId]->name;
+    }
+    $searchables = array_merge($searchables, $blocks);
+    mtrace(count($blocks).' blocks found.');
+}
+  
+//add virtual modules onto the back of the array
+$searchables = array_merge($searchables, search_get_additional_modules());
+if ($searchables){
+    foreach ($searchables as $mod) {
+        $class_file = $CFG->dirroot.'/search/documents/'.$mod->name.'_document.php';
+     
+        if (file_exists($class_file)) {
+            include_once($class_file);
+
+            //build function names
+            $iter_function = $mod->name.'_iterator';
+            $index_function = $mod->name.'_get_content_for_index';
+            $counter = 0;
+            if (function_exists($index_function) && function_exists($iter_function)) {
+                mtrace("Processing module function $index_function ...");
+                $sources = $iter_function();
+                if ($sources){
+                    foreach ($sources as $i) {
+                        $documents = $index_function($i);
+              
+                        //begin transaction
+                        if ($documents){
+                            foreach($documents as $document) {
+                                $counter++;
+                                
+                                //object to insert into db
+                                $dbid = $dbcontrol->addDocument($document);
+                                
+                                //synchronise db with index
+                                $document->addField(Zend_Search_Lucene_Field::Keyword('dbid', $dbid));
+                                
+                                //add document to index
+                                $index->addDocument($document);
+                                
+                                //commit every x new documents, and print a status message
+                                if (($counter % 2000) == 0) {
+                                    $index->commit();
+                                    mtrace(".. $counter");
+                                } 
+                            }
+                        }
+                        //end transaction
+                    }
+                }
+        
+                //commit left over documents, and finish up
                 $index->commit();
-                mtrace(".. $counter");
-              } //if
-            } //foreach
-
-            //end transaction
-
-          } //foreach
-
-          //commit left over documents, and finish up
-          $index->commit();
-
-          mtrace("-- $counter documents indexed");
-          mtrace("done.\n");
-        } //if
-      } //if
-    } //foreach
-  } //if
-
-  //finished modules
-  mtrace('Finished activity modules');
-  search_stopwatch();
-
-  //now blocks...
-  //
-
-  mtrace(".
Back to query page."); - mtrace('
'); - - //finished, turn busy flag off - set_config("search_indexer_busy", "0"); - - //mark the time we last updated - set_config("search_indexer_run_date", time()); - - //and the index size - set_config("search_index_size", (int)$index->count()); + + mtrace("-- $counter documents indexed"); + mtrace("done.\n"); + } + } + } +} + +//finished modules +mtrace('Finished activity modules'); +search_stopwatch(); + +mtrace(".
Back to query page."); +mtrace('
'); + +//finished, turn busy flag off +set_config("search_indexer_busy", "0"); + +//mark the time we last updated +set_config("search_indexer_run_date", time()); + +//and the index size +set_config("search_index_size", (int)$index->count()); ?> \ No newline at end of file diff --git a/search/indexersplash.php b/search/indexersplash.php index 913e2ae669b33..058118677df4e 100644 --- a/search/indexersplash.php +++ b/search/indexersplash.php @@ -1,33 +1,39 @@ dirroot/search/lib.php"); +require_once('../config.php'); +require_once("$CFG->dirroot/search/lib.php"); - require_login(); +require_login(); - if (empty($CFG->enableglobalsearch)) { - error('Global searching is not enabled.'); - } +if (empty($CFG->enableglobalsearch)) { + error(get_string('globalsearchdisabled', 'search')); +} - if (!isadmin()) { - error("You need to be an admin user to use this page.", "$CFG->wwwroot/login/index.php"); - } //if +if (!isadmin()) { + error(get_string('beadmin', 'search'), "$CFG->wwwroot/login/index.php"); +} - //check for php5 (lib.php) - if (!search_check_php5()) { +//check for php5 (lib.php) +if (!search_check_php5()) { $phpversion = phpversion(); mtrace("Sorry, global search requires PHP 5.0.0 or later (currently using version $phpversion)"); exit(0); - } //if +} - require_once("$CFG->dirroot/search/indexlib.php"); - $indexinfo = new IndexInfo(); +require_once("$CFG->dirroot/search/indexlib.php"); +$indexinfo = new IndexInfo(); - if ($indexinfo->valid()) { +if ($indexinfo->valid()) { mtrace("
The data directory ($indexinfo->path) contains $indexinfo->filecount files, and\n"
           ."there are ".$indexinfo->dbcount." records in the search_documents table.\n"
           ."\n"
@@ -42,7 +48,8 @@
           ."Test indexing or "
           ."Continue indexing or Back to query page."
           ."
"); - } else { +} +else { header('Location: indexer.php?areyousure=yes'); - } //else +} ?> \ No newline at end of file diff --git a/search/indexlib.php b/search/indexlib.php index cc479e866270d..32df62f386783 100644 --- a/search/indexlib.php +++ b/search/indexlib.php @@ -1,15 +1,20 @@ dirroot/search/lib.php"); - require_once("$CFG->dirroot/search/Zend/Search/Lucene.php"); - - class IndexInfo { +/* +* Author: Michael Champanis +* +* Reviewed by: Valery Fremaux (2007) +* +* Index info class +* +* Used to retrieve information about an index. +* Has methods to check for valid database and data directory, +* and the index itself. +**/ + +require_once("$CFG->dirroot/search/lib.php"); +require_once("$CFG->dirroot/search/Zend/Search/Lucene.php"); + +class IndexInfo { private $path, //index data directory $size, //size of directory (i.e. the whole index) $filecount, //number of files @@ -18,193 +23,226 @@ class IndexInfo { $types, //array of [document types => count] $complete, //is index completely formed? $time; //date index was generated - - public function __construct($path=SEARCH_INDEX_PATH) { - global $CFG, $db; - - $this->path = $path; - - //test to see if there is a valid index on disk, at the specified path - try { - $test_index = new Zend_Search_Lucene($this->path, false); - $validindex = true; - } catch(Exception $e) { - $validindex = false; - } //catch - - //retrieve file system info about the index if it is valid - if ($validindex) { - $this->size = display_size(get_directory_size($this->path)); - $index_dir = get_directory_list($this->path, '', false, false); - $this->filecount = count($index_dir); - $this->indexcount = $test_index->count(); - } else { - $this->size = 0; - $this->filecount = 0; - $this->indexcount = 0; - } //else - - $db_exists = false; //for now - - //get all the current tables in moodle - $admin_tables = $db->MetaTables(); - - //TODO: use new IndexDBControl class for database checks? - - //check if our search table exists - if (in_array($CFG->prefix.SEARCH_DATABASE_TABLE, $admin_tables)) { - //retrieve database information if it does - $db_exists = true; - - //total documents - $this->dbcount = count_records(SEARCH_DATABASE_TABLE); - - //individual document types - $types = search_get_document_types(); - sort($types); - - foreach($types as $type) { - $c = count_records(SEARCH_DATABASE_TABLE, 'doctype', $type); - $this->types[$type] = (int)$c; - } //foreach - } else { - $this->dbcount = 0; - $this->types = array(); - } //else - - //check if the busy flag is set - if ($CFG->search_indexer_busy == '1') { - $this->complete = false; - } else { - $this->complete = true; - } //if - - //get the last run date for the indexer - if ($this->valid() && $CFG->search_indexer_run_date) { - $this->time = $CFG->search_indexer_run_date; - } else { - $this->time = 0; - } //else + + public function __construct($path = SEARCH_INDEX_PATH) { + global $CFG, $db; + + $this->path = $path; + + //test to see if there is a valid index on disk, at the specified path + try { + $test_index = new Zend_Search_Lucene($this->path, false); + $validindex = true; + } catch(Exception $e) { + $validindex = false; + } //catch + + //retrieve file system info about the index if it is valid + if ($validindex) { + $this->size = display_size(get_directory_size($this->path)); + $index_dir = get_directory_list($this->path, '', false, false); + $this->filecount = count($index_dir); + $this->indexcount = $test_index->count(); + } + else { + $this->size = 0; + $this->filecount = 0; + $this->indexcount = 0; + } + + $db_exists = false; //for now + + //get all the current tables in moodle + $admin_tables = $db->MetaTables(); + + //TODO: use new IndexDBControl class for database checks? + + //check if our search table exists + if (in_array($CFG->prefix.SEARCH_DATABASE_TABLE, $admin_tables)) { + //retrieve database information if it does + $db_exists = true; + + //total documents + $this->dbcount = count_records(SEARCH_DATABASE_TABLE); + + //individual document types + $types = search_get_document_types(); + sort($types); + + foreach($types as $type) { + $c = count_records(SEARCH_DATABASE_TABLE, 'doctype', $type); + $this->types[$type] = (int)$c; + } + } + else { + $this->dbcount = 0; + $this->types = array(); + } + + //check if the busy flag is set + if ($CFG->search_indexer_busy == '1') { + $this->complete = false; + } + else { + $this->complete = true; + } + + //get the last run date for the indexer + if ($this->valid() && $CFG->search_indexer_run_date) { + $this->time = $CFG->search_indexer_run_date; + } + else { + $this->time = 0; + } } //__construct - - //returns false on error, and the error message via referenced variable $err - public function valid(&$err=null) { - $err = array(); - $ret = true; - - if (!$this->is_valid_dir()) { - $err['dir'] = 'Index directory either contains an invalid index, or nothing at all.'; - $ret = false; - } //if - - if (!$this->is_valid_db()) { - $err['db'] = 'Database table is not present, or contains no index records.'; - $ret = false; - } //if - - if (!$this->complete) { - $err['index'] = 'Indexing was not successfully completed, please restart it.'; - $ret = false; - } //if - - return $ret; + + /** + * returns false on error, and the error message via referenced variable $err + * + */ + public function valid(&$err = null) { + $err = array(); + $ret = true; + + if (!$this->is_valid_dir()) { + $err['dir'] = get_string('invalidindexerror', 'search'); + $ret = false; + } + + if (!$this->is_valid_db()) { + $err['db'] = get_string('emptydatabaseerror', 'search'); + $ret = false; + } + + if (!$this->complete) { + $err['index'] = get_string('uncompleteindexingerror','search'); + $ret = false; + } + + return $ret; } //valid - - //is the index dir valid + + /** + * is the index dir valid + * + */ public function is_valid_dir() { - if ($this->filecount > 0) { - return true; - } else { - return false; - } //else + if ($this->filecount > 0) { + return true; + } + else { + return false; + } } //is_valid_dir - - //is the db table valid + + /** + * is the db table valid + * + */ public function is_valid_db() { - if ($this->dbcount > 0) { - return true; - } else { - return false; - } //else + if ($this->dbcount > 0) { + return true; + } + else { + return false; + } } //is_valid_db - - //shorthand get method for the class variables + + /** + * shorthand get method for the class variables + * + */ public function __get($var) { - if (in_array($var, array_keys(get_class_vars(get_class($this))))) { - return $this->$var; - } //if + if (in_array($var, array_keys(get_class_vars(get_class($this))))) { + return $this->$var; + } } //__get - } //IndexInfo +} //IndexInfo - /* DB Index control class - * - * Used to control the search index database table - * */ +/* +* DB Index control class +* +* Used to control the search index database table +**/ +class IndexDBControl { - class IndexDBControl { - //does the table exist? + /** + * does the table exist? + * + */ public function checkTableExists() { - global $CFG, $db; - - $table = SEARCH_DATABASE_TABLE; - $tables = $db->MetaTables(); - - if (in_array($CFG->prefix.$table, $tables)) { - return true; - } else { - return false; - } //else + global $CFG, $db; + + $table = SEARCH_DATABASE_TABLE; + $tables = $db->MetaTables(); + if (in_array($CFG->prefix.$table, $tables)) { + return true; + } + else { + return false; + } } //checkTableExists - //is our database setup valid? + /** + * is our database setup valid? + * + */ public function checkDB() { - global $CFG, $db; - - $sqlfile = "$CFG->dirroot/search/db/$CFG->dbtype.sql"; - $ret = false; - - if ($this->checkTableExists()) { - execute_sql('drop table '.$CFG->prefix.SEARCH_DATABASE_TABLE, false); - } //if + global $CFG, $db; + + $sqlfile = "$CFG->dirroot/blocks/search/db/$CFG->dbtype.sql"; + $ret = false; + if ($this->checkTableExists()) { + execute_sql('drop table '.$CFG->prefix.SEARCH_DATABASE_TABLE, false); + } - ob_start(); //turn output buffering on - to hide modify_database() output - $ret = modify_database($sqlfile, '', false); - ob_end_clean(); //chuck the buffer and resume normal operation + //turn output buffering on - to hide modify_database() output + ob_start(); + $ret = modify_database($sqlfile, '', false); - return $ret; + //chuck the buffer and resume normal operation + ob_end_clean(); + return $ret; } //checkDB - //add a document record to the table + /** + * add a document record to the table + * @param document must be a Lucene SearchDocument instance + */ public function addDocument($document=null) { - global $db; - - if ($document == null) { - return false; - } //if - - //object to insert into db - $doc->doctype = $document->doctype; - $doc->docid = $document->docid; - $doc->title = search_escape_string($document->title); - $doc->url = search_escape_string($document->url); - $doc->update = time(); - $doc->docdate = $document->date; - $doc->courseid = $document->course_id; - $doc->groupid = $document->group_id; - - //insert summary into db - $id = insert_record(SEARCH_DATABASE_TABLE, $doc); - - return $id; + global $db, $CFG; + + if ($document == null) { + return false; + } + + // object to insert into db + $doc->doctype = $document->doctype; + $doc->docid = $document->docid; + $doc->itemtype = $document->itemtype; + $doc->title = search_escape_string($document->title); + $doc->url = search_escape_string($document->url); + $doc->update = time(); + $doc->docdate = $document->date; + $doc->courseid = $document->course_id; + $doc->groupid = $document->group_id; + + //insert summary into db + $id = insert_record(SEARCH_DATABASE_TABLE, $doc); + + return $id; } //addDocument - //remove a document record from the index + /** + * remove a document record from the index + * @param document must be a Lucene document instance, or at least a dbid enveloppe + */ public function delDocument($document) { - global $db; - - delete_records(SEARCH_DATABASE_TABLE, 'id', $document->dbid); + global $db; + + delete_records(SEARCH_DATABASE_TABLE, 'id', $document->dbid); } //delDocument - } //IndexControl +} //IndexControl ?> \ No newline at end of file diff --git a/search/lib.php b/search/lib.php index 8cdd62c9df925..b9e6f9195c667 100644 --- a/search/lib.php +++ b/search/lib.php @@ -1,113 +1,156 @@ dataroot/search"); - define('SEARCH_DATABASE_TABLE', 'search_documents'); - - //document types that can be searched - //define('SEARCH_TYPE_NONE', 'none'); - define('SEARCH_TYPE_WIKI', 'wiki'); - define('SEARCH_TYPE_FORUM', 'forum'); - define('SEARCH_TYPE_GLOSSARY', 'glossary'); - define('SEARCH_TYPE_RESOURCE', 'resource'); - - //returns all the document type constants - function search_get_document_types($prefix='SEARCH_TYPE') { +/* +* Author: Michael Champanis +* +* This file must not contain any PHP 5, because it is used to test for PHP 5 +* itself, and needs to be able to be executed on PHP 4 installations. +* +* Reviewed by: Valery Fremaux (2007) +* - adding techproject search capabilities +* - adding full internationalization +**/ + +/* +// function reference +function search_get_document_types($prefix = 'SEARCH_TYPE_') { +function search_get_additional_modules() { +function search_shorten_url($url, $length=30) { +function search_escape_string($str) { +function search_check_php5($feedback = false) { +function search_stopwatch($cli = false) { +function search_pexit($str = "") { +*/ + +define('SEARCH_INDEX_PATH', "$CFG->dataroot/search"); +define('SEARCH_DATABASE_TABLE', 'search_documents'); + +//document types that can be searched +//define('SEARCH_TYPE_NONE', 'none'); +define('SEARCH_TYPE_WIKI', 'wiki'); +define('PATH_FOR_SEARCH_TYPE_WIKI', 'mod/wiki'); +define('SEARCH_TYPE_FORUM', 'forum'); +define('PATH_FOR_SEARCH_TYPE_FORUM', 'mod/forum'); +define('SEARCH_TYPE_GLOSSARY', 'glossary'); +define('PATH_FOR_SEARCH_TYPE_GLOSSARY', 'mod/glossary'); +define('SEARCH_TYPE_RESOURCE', 'resource'); +define('PATH_FOR_SEARCH_TYPE_RESOURCE', 'mod/resource'); +define('SEARCH_TYPE_TECHPROJECT', 'techproject'); +define('PATH_FOR_SEARCH_TYPE_TECHPROJECT', 'mod/techproject'); +define('SEARCH_TYPE_DATA', 'data'); +define('PATH_FOR_SEARCH_TYPE_DATA', 'mod/data'); +define('SEARCH_TYPE_CHAT', 'chat'); +define('PATH_FOR_SEARCH_TYPE_CHAT', 'mod/chat'); + +/** +* returns all the document type constants +* @param prefix a pattern for recognizing constants +* @return an array of type labels +*/ +function search_get_document_types($prefix = 'SEARCH_TYPE_') { $ret = array(); - - foreach (get_defined_constants() as $key=>$value) { - if (substr($key, 0, strlen($prefix)) == $prefix) { - $ret[$key] = $value; - } //if - } //foreach - + foreach (get_defined_constants() as $key => $value) { + if (preg_match("/^{$prefix}/", $key)){ + $ret[$key] = $value; + } + } sort($ret); - return $ret; - } //search_get_document_types - - // additional virtual modules to index - // - // By adding 'moo' to the extras array, an additional document type - // documents/moo_document.php will be indexed - this allows for - // virtual modules to be added to the index, i.e. non-module specific - // information. - function search_get_additional_modules() { +} //search_get_document_types + +/** +* additional virtual modules to index +* +* By adding 'moo' to the extras array, an additional document type +* documents/moo_document.php will be indexed - this allows for +* virtual modules to be added to the index, i.e. non-module specific +* information. +*/ +function search_get_additional_modules() { $extras = array(/* additional keywords go here */); $ret = array(); - foreach($extras as $extra) { - $temp->name = $extra; - $ret[] = clone($temp); - } //foreach - + $temp->name = $extra; + $ret[] = clone($temp); + } return $ret; - } //search_get_additional_modules - - //shortens a url so it can fit on the results page - function search_shorten_url($url, $length=30) { +} //search_get_additional_modules + +/** +* shortens a url so it can fit on the results page +* @param url the url +* @param length the size limit we want +*/ +function search_shorten_url($url, $length=30) { return substr($url, 0, $length)."..."; - } //search_shorten_url - - function search_escape_string($str) { +} //search_shorten_url + +/** +* a local function for escaping +* @param str the string to escape +* @return the escaped string +*/ +function search_escape_string($str) { global $CFG; switch ($CFG->dbfamily) { - case 'mysql': - $s = mysql_real_escape_string($str); - break; - case 'postgres': - $s = pg_escape_string($str); - break; - default: - $s = addslashes($str); - } //switch - + case 'mysql': + $s = mysql_real_escape_string($str); + break; + case 'postgres': + $s = pg_escape_string($str); + break; + default: + $s = addslashes($str); + } return $s; - } //search_escape_string - - //get a real php 5 version number, using 5.0.0 arbitrarily - function search_check_php5($feedback=false) { +} //search_escape_string + +/** +* get a real php 5 version number, using 5.0.0 arbitrarily +* @param feedback if true, prints a feedback message to output. +* @return true if version of PHP is high enough +*/ +function search_check_php5($feedback = false) { if (!check_php_version("5.0.0")) { - if ($feedback) { - $phpversion = phpversion(); - print_heading("Sorry, global search requires PHP 5.0.0 or later (currently using version $phpversion)"); - } //if - - return false; - } else { + if ($feedback) { + print_heading(get_string('versiontoolow', 'search')); + } + return false; + } + else { return true; - } //else - } //search_check_php5 - - //simple timer function, outputs result on 2nd call - function search_stopwatch($cli = false) { + } +} //search_check_php5 + +/** +* simple timer function, on first call, records a current microtime stamp, outputs result on 2nd call +* @param cli an output formatting switch +* @return void +*/ +function search_stopwatch($cli = false) { if (!empty($GLOBALS['search_script_start_time'])) { - if (!$cli) print ''; - print round(microtime(true) - $GLOBALS['search_script_start_time'], 6).' seconds'; - if (!$cli) print ''; - - unset($GLOBALS['search_script_start_time']); - } else { - $GLOBALS['search_script_start_time'] = microtime(true); - } //else - } //search_stopwatch - - //print and exit (for debugging) - function search_pexit($str = "") { + if (!$cli) print ''; + print round(microtime(true) - $GLOBALS['search_script_start_time'], 6).' '.get_string('seconds', 'search'); + if (!$cli) print ''; + unset($GLOBALS['search_script_start_time']); + } + else { + $GLOBALS['search_script_start_time'] = microtime(true); + } +} //search_stopwatch + +/** +* print and exit (for debugging) +* @param str a variable to explore +* @return void +*/ +function search_pexit($str = "") { if (is_array($str) or is_object($str)) { - print_r($str); + print_r($str); } else if ($str) { - print $str."
"; - } //if - + print $str."
"; + } exit(0); - } //search_pexit +} //search_pexit ?> diff --git a/search/query.php b/search/query.php index f83cc6fec36dd..97526fd180098 100644 --- a/search/query.php +++ b/search/query.php @@ -1,43 +1,48 @@ dirroot/search/lib.php"); - - if ($CFG->forcelogin) { +/** +* Global Search Engine for Moodle +* Michael Champanis (mchampan) [cynnical@gmail.com] +* review 1.8+ : Valery Fremaux [valery.fremaux@club-internet.fr] +* 2007/08/02 +* +* The query page - accepts a user-entered query string and returns results. +* +* Queries are boolean-aware, e.g.: +* +* '+' term required +* '-' term must not be present +* '' (no modifier) term's presence increases rank, but isn't required +* 'field:' search this field +* +* Examples: +* +* 'earthquake +author:michael' +* Searches for documents written by 'michael' that contain 'earthquake' +* +* 'earthquake +doctype:wiki' +* Search all wiki pages for 'earthquake' +* +* '+author:helen +author:foster' +* All articles written by Helen Foster +* +*/ + +require_once('../config.php'); +require_once("$CFG->dirroot/search/lib.php"); + +if ($CFG->forcelogin) { require_login(); - } +} - if (empty($CFG->enableglobalsearch)) { - error('Global searching is not enabled.'); - } +if (empty($CFG->enableglobalsearch)) { + error(get_string('globalsearchdisabled', 'search')); +} - $adv = new Object(); +$adv = new Object(); - //check for php5, but don't die yet (see line 52) - if ($check = search_check_php5()) { - require_once("$CFG->dirroot/search/querylib.php"); +// check for php5, but don't die yet (see line 52) +if ($check = search_check_php5()) { + require_once("{$CFG->dirroot}/search/querylib.php"); $page_number = optional_param('page', -1, PARAM_INT); $pages = ($page_number == -1) ? false : true; @@ -45,254 +50,288 @@ $query_string = optional_param('query_string', '', PARAM_CLEAN); if ($pages && isset($_SESSION['search_advanced_query'])) { - //if both are set, then we are busy browsing through the result pages of an advanced query - $adv = unserialize($_SESSION['search_advanced_query']); - } else if ($advanced) { - //otherwise we are dealing with a new advanced query - unset($_SESSION['search_advanced_query']); - session_unregister('search_advanced_query'); - - //chars to strip from strings (whitespace) - $chars = " \t\n\r\0\x0B,-+"; - - //retrieve advanced query variables - $adv->mustappear = trim(optional_param('mustappear', '', PARAM_CLEAN), $chars); - $adv->notappear = trim(optional_param('notappear', '', PARAM_CLEAN), $chars); - $adv->canappear = trim(optional_param('canappear', '', PARAM_CLEAN), $chars); - $adv->module = optional_param('module', '', PARAM_CLEAN); - $adv->title = trim(optional_param('title', '', PARAM_CLEAN), $chars); - $adv->author = trim(optional_param('author', '', PARAM_CLEAN), $chars); - } //else + // if both are set, then we are busy browsing through the result pages of an advanced query + $adv = unserialize($_SESSION['search_advanced_query']); + } + else if ($advanced) { + // otherwise we are dealing with a new advanced query + unset($_SESSION['search_advanced_query']); + session_unregister('search_advanced_query'); + + // chars to strip from strings (whitespace) + $chars = " \t\n\r\0\x0B,-+"; + + // retrieve advanced query variables + $adv->mustappear = trim(optional_param('mustappear', '', PARAM_CLEAN), $chars); + $adv->notappear = trim(optional_param('notappear', '', PARAM_CLEAN), $chars); + $adv->canappear = trim(optional_param('canappear', '', PARAM_CLEAN), $chars); + $adv->module = optional_param('module', '', PARAM_CLEAN); + $adv->title = trim(optional_param('title', '', PARAM_CLEAN), $chars); + $adv->author = trim(optional_param('author', '', PARAM_CLEAN), $chars); + } if ($advanced) { - //parse the advanced variables into a query string - //TODO: move out to external query class (QueryParse?) - - $query_string = ''; - - //get all available module types - $module_types = array_merge(array('All'), array_values(search_get_document_types())); - $adv->module = in_array($adv->module, $module_types) ? $adv->module : 'All'; - - //convert '1 2' into '+1 +2' for required words field - if (strlen(trim($adv->mustappear)) > 0) { - $query_string = ' +'.implode(' +', preg_split("/[\s,;]+/", $adv->mustappear)); - } //if - - //convert '1 2' into '-1 -2' for not wanted words field - if (strlen(trim($adv->notappear)) > 0) { - $query_string .= ' -'.implode(' -', preg_split("/[\s,;]+/", $adv->notappear)); - } //if - - //this field is left untouched, apart from whitespace being stripped - if (strlen(trim($adv->canappear)) > 0) { - $query_string .= ' '.implode(' ', preg_split("/[\s,;]+/", $adv->canappear)); - } //if - - //add module restriction - if ($adv->module != 'All') { - $query_string .= ' +doctype:'.$adv->module; - } //if - - //create title search string - if (strlen(trim($adv->title)) > 0) { - $query_string .= ' +title:'.implode(' +title:', preg_split("/[\s,;]+/", $adv->title)); - } //if - - //create author search string - if (strlen(trim($adv->author)) > 0) { - $query_string .= ' +author:'.implode(' +author:', preg_split("/[\s,;]+/", $adv->author)); - } //if - - //save our options if the query is valid - if (!empty($query_string)) { - $_SESSION['search_advanced_query'] = serialize($adv); - } //if - } //if - - //normalise page number + //parse the advanced variables into a query string + //TODO: move out to external query class (QueryParse?) + + $query_string = ''; + + // get all available module types + $module_types = array_merge(array('all'), array_values(search_get_document_types())); + $adv->module = in_array($adv->module, $module_types) ? $adv->module : 'all'; + + // convert '1 2' into '+1 +2' for required words field + if (strlen(trim($adv->mustappear)) > 0) { + $query_string = ' +'.implode(' +', preg_split("/[\s,;]+/", $adv->mustappear)); + } + + // convert '1 2' into '-1 -2' for not wanted words field + if (strlen(trim($adv->notappear)) > 0) { + $query_string .= ' -'.implode(' -', preg_split("/[\s,;]+/", $adv->notappear)); + } + + // this field is left untouched, apart from whitespace being stripped + if (strlen(trim($adv->canappear)) > 0) { + $query_string .= ' '.implode(' ', preg_split("/[\s,;]+/", $adv->canappear)); + } + + // add module restriction + $doctypestr = get_string('doctype', 'search'); + $titlestr = get_string('title', 'search'); + $authorstr = get_string('author', 'search'); + if ($adv->module != 'all') { + $query_string .= " +{$doctypestr}:".$adv->module; + } + + // create title search string + if (strlen(trim($adv->title)) > 0) { + $query_string .= " +{$titlestr}:".implode(" +{$titlestr}:", preg_split("/[\s,;]+/", $adv->title)); + } + + // create author search string + if (strlen(trim($adv->author)) > 0) { + $query_string .= " +{$authorstr}:".implode(" +{$authorstr}:", preg_split("/[\s,;]+/", $adv->author)); + } + + // save our options if the query is valid + if (!empty($query_string)) { + $_SESSION['search_advanced_query'] = serialize($adv); + } + } + + // normalise page number if ($page_number < 1) { - $page_number = 1; - } //if + $page_number = 1; + } //run the query against the index $sq = new SearchQuery($query_string, $page_number, 10, false); - } //if +} - if (!$site = get_site()) { +if (!$site = get_site()) { redirect("index.php"); - } //if +} - $strsearch = "Search"; //get_string(); - $strquery = "Enter your search query"; //get_string(); +$strsearch = get_string('search', 'search'); +$strquery = get_string('enteryoursearchquery', 'search'); - print_header("$site->shortname: $strsearch: $strquery", "$site->fullname", +print_header("$site->shortname: $strsearch: $strquery", "$site->fullname", "$strsearch -> $strquery"); - //keep things pretty, even if php5 isn't available - if (!$check) { +//keep things pretty, even if php5 isn't available +if (!$check) { print_heading(search_check_php5(true)); print_footer(); exit(0); - } //if +} - print_simple_box_start('center', '100%', '', 20); - print_heading($strquery); +print_box_start(); +print_heading($strquery); - print_simple_box_start('center', '', '', 20); +print_box_start(); - $vars = get_object_vars($adv); +$vars = get_object_vars($adv); - if (isset($vars)) { +if (isset($vars)) { foreach ($vars as $key => $value) { - $adv->$key = stripslashes(htmlentities($value)); - } //foreach - } + $adv->$key = stripslashes(htmlentities($value)); + } +} ?>
- - -     - Advanced search | - Statistics - + +     + | + + - + - + - + - + - + - + - +
These words must appear::
These words must not appear::
These words help improve rank::
Which modules to search?::
Words in title::
Author name::


- - + +
Normal search | Statistics | 
- +
-
+
'; - print 'Searching: '; - - if ($sq->is_valid_index()) { +if ($sq->is_valid_index()) { //use cached variable to show up-to-date index size (takes deletions into account) print $CFG->search_index_size; - } else { +} +else { print "0"; - } //else - - print ' documents.'; +} - if (!$sq->is_valid_index() and isadmin()) { - print "

Admin: There appears to be no search index. Please create an index.

\n"; - } //if +print ' '; +print_string('documents', 'search'); +print '.'; - print '
'; +if (!$sq->is_valid_index() and isadmin()) { + print '

' . get_string('noindexmessage', 'search') . '' . get_string('createanindex', 'search')."

\n"; +} - print_simple_box_end(); - - if ($sq->is_valid()) { - print_simple_box_start('center', '50%', 'white', 10); +?> + +is_valid()) { + print_box_start(); + search_stopwatch(); $hit_count = $sq->count(); - + print "
"; - - print $hit_count." results returned for '".stripslashes($query_string)."'."; + + print $hit_count.' '.get_string('resultsreturnedfor', 'search') . " '".stripslashes($query_string)."'."; print "
"; - + if ($hit_count > 0) { - $page_links = $sq->page_numbers(); - $hits = $sq->results(); - - if ($advanced) { - //if in advanced mode, search options are saved in the session, so - //we can remove the query string var from the page links, and replace - //it with a=1 (Advanced = on) instead - $page_links = preg_replace("/query_string=[^&]+/", 'a=1', $page_links); - } //if - - print "
    "; - - foreach ($hits as $listing) { - print "
  1. $listing->title
    \n" - ."".search_shorten_url($listing->url, 70)."
    \n" - ."Type: ".$listing->doctype.", score: ".round($listing->score, 3).", author: ".$listing->author."\n" - ."
  2. \n"; - } //for - - print "
"; - print $page_links; - } //if - - print_simple_box_end(); + $page_links = $sq->page_numbers(); + $hits = $sq->results(); + + if ($advanced) { + // if in advanced mode, search options are saved in the session, so + // we can remove the query string var from the page links, and replace + // it with a=1 (Advanced = on) instead + $page_links = preg_replace("/query_string=[^&]+/", 'a=1', $page_links); + } + + print "
    "; + + $typestr = get_string('type', 'search'); + $scorestr = get_string('score', 'search'); + $authorstr = get_string('author', 'search'); + foreach ($hits as $listing) { + if ($CFG->unicodedb) $listing->title = mb_convert_encoding($listing->title, 'auto', 'UTF8'); + $title_post_processing_function = $listing->doctype.'_link_post_processing'; + require_once "{$CFG->dirroot}/search/documents/{$listing->doctype}_document.php"; + if (function_exists($title_post_processing_function)) + $listing->title = $title_post_processing_function($listing->title); + print "
  1. url)."'>$listing->title
    \n" + ."".search_shorten_url($listing->url, 70)."
    \n" + ."{$typestr}: ".$listing->doctype.", {$scorestr}: ".round($listing->score, 3).", {$authorstr}: ".$listing->author."\n" + ."
  2. \n"; + } + + print "
"; + print $page_links; + } + + print_box_end(); ?> -
- It took to fetch these results. +.
\ No newline at end of file diff --git a/search/querylib.php b/search/querylib.php index 65d4b066545f7..2db68c1fcf349 100644 --- a/search/querylib.php +++ b/search/querylib.php @@ -1,91 +1,130 @@ dirroot/search/Zend/Search/Lucene.php"); - - class SearchResult { - public $url, - $title, - $doctype, - $author, - $score, - $number; - } //SearchResult - - - //split this into Cache class and extend to SearchCache? - class SearchCache { - private $mode, - $valid; - - public function __construct($mode='session') { - $accepted_modes = array('session'); - - if (in_array($mode, $accepted_modes)) { - $this->mode = $mode; - } else { - $this->mode = 'session'; - } //else +require_once("{$CFG->dirroot}/search/Zend/Search/Lucene.php"); + +define('DEFAULT_POPUP_SETTINGS', "\"menubar=0,location=0,scrollbars,resizable,width=600,height=450\""); + +/** +* a class that represents a single result record of the search engine +*/ +class SearchResult { +public $url, + $title, + $doctype, + $author, + $score, + $number; +} //SearchResult + + +//split this into Cache class and extend to SearchCache? +class SearchCache { +private $mode, + $valid; + + // foresees other caching locations + public function __construct($mode = 'session') { + $accepted_modes = array('session'); + + if (in_array($mode, $accepted_modes)) { + $this->mode = $mode; + } else { + $this->mode = 'session'; + } //else - $this->valid = true; + $this->valid = true; } //constructor + /** + * returns the search cache status + * @return boolean + */ public function can_cache() { - return $this->valid; + return $this->valid; } //can_cache - public function cache($id=false, $object=false) { - //see if there was a previous query - $last_term = $this->fetch('search_last_term'); - - //if this query is different from the last, clear out the last one - if ($id != false and $last_term != $id) { - $this->clear($last_term); - } //if + /** + * + * + */ + public function cache($id = false, $object = false) { + //see if there was a previous query + $last_term = $this->fetch('search_last_term'); + + //if this query is different from the last, clear out the last one + if ($id != false and $last_term != $id) { + $this->clear($last_term); + } //if - //store the new query if id and object are passed in - if ($object and $id) { - $this->store('search_last_term', $id); - $this->store($id, $object); - return true; - //otherwise return the stored results - } else if ($id and $this->exists($id)) { - return $this->fetch($id); - } //else + //store the new query if id and object are passed in + if ($object and $id) { + $this->store('search_last_term', $id); + $this->store($id, $object); + return true; + //otherwise return the stored results + } + else if ($id and $this->exists($id)) { + return $this->fetch($id); + } //else } //cache + /** + * do key exist in cache ? + * @param id the object key + * @return boolean + */ private function exists($id) { - switch ($this->mode) { - case 'session' : - return isset($_SESSION[$id]); - } //switch + switch ($this->mode) { + case 'session' : + return isset($_SESSION[$id]); + } //switch } //exists + /** + * clears a cached object in cache + * @param the object key to clear + * @return void + */ private function clear($id) { - switch ($this->mode) { - case 'session' : - unset($_SESSION[$id]); - session_unregister($id); - return; - } //switch + switch ($this->mode) { + case 'session' : + unset($_SESSION[$id]); + session_unregister($id); + return; + } //switch } //clear + /** + * fetches a cached object + * @param id the object identifier + * @return the object cached + */ private function fetch($id) { - switch ($this->mode) { - case 'session' : - return ($this->exists($id)) ? unserialize($_SESSION[$id]) : false; - } //switch + switch ($this->mode) { + case 'session' : + return ($this->exists($id)) ? unserialize($_SESSION[$id]) : false; + } //switch } //fetch + /** + * put an object in cache + * @param id the key for that object + * @param object the object to cache as a serialized value + * @return void + */ private function store($id, $object) { - switch ($this->mode) { - case 'session' : - $_SESSION[$id] = serialize($object); - return; - } //switch + switch ($this->mode) { + case 'session' : + $_SESSION[$id] = serialize($object); + return; + } //switch } //store - } //SearchCache +} //SearchCache - - class SearchQuery { +/** +* Represents a single query with results +* +*/ +class SearchQuery { private $index, $term, $pagenumber, @@ -96,166 +135,201 @@ class SearchQuery { $results_per_page, $total_results; - public function __construct($term='', $page=1, $results_per_page=10, $cache=false) { - global $CFG; + /** + * constructor records query parameters + * + */ + public function __construct($term = '', $page = 1, $results_per_page = 10, $cache = false) { + global $CFG; - $this->term = $term; - $this->pagenumber = $page; - $this->cache = $cache; - $this->validquery = true; - $this->validindex = true; - $this->results_per_page = $results_per_page; + $this->term = $term; + $this->pagenumber = $page; + $this->cache = $cache; + $this->validquery = true; + $this->validindex = true; + $this->results_per_page = $results_per_page; - $index_path = SEARCH_INDEX_PATH; + $index_path = SEARCH_INDEX_PATH; - try { - $this->index = new Zend_Search_Lucene($index_path, false); - } catch(Exception $e) { - $this->validindex = false; - return; - } //catch + try { + $this->index = new Zend_Search_Lucene($index_path, false); + } catch(Exception $e) { + $this->validindex = false; + return; + } //catch - if (empty($this->term)) { - $this->validquery = false; - } else { - $this->set_query($this->term); - } //else + if (empty($this->term)) { + $this->validquery = false; + } else { + $this->set_query($this->term); + } //else } //constructor + + /** + * determines state of query object depending on query entry and + * tries to lauch search if all is OK + * @return void (this is only a state changing trigger). + */ + public function set_query($term = '') { + if (!empty($term)) { + $this->term = $term; + } //if - public function set_query($term='') { - if (!empty($term)) { - $this->term = $term; - } //if - - if (empty($this->term)) { - $this->validquery = false; - } else { - $this->validquery = true; - } //else + if (empty($this->term)) { + $this->validquery = false; + } + else { + $this->validquery = true; + } //else - if ($this->validquery and $this->validindex) { - $this->results = $this->get_results(); - } else { - $this->results = array(); - } //else + if ($this->validquery and $this->validindex) { + $this->results = $this->get_results(); + } + else { + $this->results = array(); + } //else } //set_query + /** + * accessor to the result table. + * @return an array of result records + */ public function results() { - return $this->results; + return $this->results; } //results + /** + * do the effective collection of results + * + */ private function process_results($all=false) { - global $USER; - - $term = strtolower($this->term); - - //experimental - return more results - $strip_arr = array('author:', 'title:', '+', '-', 'doctype:'); - $stripped_term = str_replace($strip_arr, '', $term); - - $hits = $this->index->find($term." title:".$stripped_term." author:".$stripped_term); - //-- + global $USER; - $hitcount = count($hits); - $this->total_results = $hitcount; + $term = strtolower($this->term); - if ($hitcount == 0) return array(); + //experimental - return more results + $strip_arr = array('author:', 'title:', '+', '-', 'doctype:'); + $stripped_term = str_replace($strip_arr, '', $term); - $totalpages = ceil($hitcount/$this->results_per_page); + $hits = $this->index->find($term." title:".$stripped_term." author:".$stripped_term); + //-- - if (!$all) { - if ($hitcount < $this->results_per_page) { - $this->pagenumber = 1; - } else if ($this->pagenumber > $totalpages) { - $this->pagenumber =$totalpages; - } //if - - $start = ($this->pagenumber - 1) * $this->results_per_page; - $end = $start + $this->results_per_page; + $hitcount = count($hits); + $this->total_results = $hitcount; - if ($end > $hitcount) { - $end = $hitcount; - } //if - } else { - $start = 0; - $end = $hitcount; - } //else + if ($hitcount == 0) return array(); - $resultdoc = new SearchResult(); - $resultdocs = array(); + $totalpages = ceil($hitcount/$this->results_per_page); - for ($i = $start; $i < $end; $i++) { - $hit = $hits[$i]; + if (!$all) { + if ($hitcount < $this->results_per_page) { + $this->pagenumber = 1; + } + else if ($this->pagenumber > $totalpages) { + $this->pagenumber = $totalpages; + } //if - //check permissions on each result - if ($this->can_display($USER, $hit->id, $hit->doctype, $hit->course_id, $hit->group_id)) { - $resultdoc->number = $i; - $resultdoc->url = $hit->url; - $resultdoc->title = $hit->title; - $resultdoc->score = $hit->score; - $resultdoc->doctype = $hit->doctype; - $resultdoc->author = $hit->author; + $start = ($this->pagenumber - 1) * $this->results_per_page; + $end = $start + $this->results_per_page; - //and store it - $resultdocs[] = clone($resultdoc); - } //if - } //foreach + if ($end > $hitcount) { + $end = $hitcount; + } //if + } + else { + $start = 0; + $end = $hitcount; + } //else - return $resultdocs; + $resultdoc = new SearchResult(); + $resultdocs = array(); + + for ($i = $start; $i < $end; $i++) { + $hit = $hits[$i]; + + //check permissions on each result + if ($this->can_display($USER, $hit->docid, $hit->doctype, $hit->course_id, $hit->group_id, $hit->path, $hit->itemtype, $hit->context_id )) { + $resultdoc->number = $i; + $resultdoc->url = $hit->url; + $resultdoc->title = $hit->title; + $resultdoc->score = $hit->score; + $resultdoc->doctype = $hit->doctype; + $resultdoc->author = $hit->author; + + //and store it + $resultdocs[] = clone($resultdoc); + } //if + else{ + // lowers total_results one unit + $this->total_results--; + } + } //foreach + + return $resultdocs; } //process_results + /** + * get results of a search query using a caching strategy if available + * @return the result documents as an array of search objects + */ private function get_results() { - $cache = new SearchCache(); - - if ($this->cache and $cache->can_cache()) { - if (!($resultdocs = $cache->cache($this->term))) { - $resultdocs = $this->process_results(); - //cache the results so we don't have to compute this on every page-load - $cache->cache($this->term, $resultdocs); - //print "Using new results."; - } else { - //There was something in the cache, so we're using that to save time - //print "Using cached results."; + $cache = new SearchCache(); + + if ($this->cache and $cache->can_cache()) { + if (!($resultdocs = $cache->cache($this->term))) { + $resultdocs = $this->process_results(); + //cache the results so we don't have to compute this on every page-load + $cache->cache($this->term, $resultdocs); + //print "Using new results."; + } + else { + //There was something in the cache, so we're using that to save time + //print "Using cached results."; + } //else + } + else { + //no caching :( + //print "Caching disabled!"; + $resultdocs = $this->process_results(); } //else - } else { - //no caching :( - //print "Caching disabled!"; - $resultdocs = $this->process_results(); - } //else - return $resultdocs; + return $resultdocs; } //get_results + /** + * constructs the results paging links on results. + * @return string the results paging links + */ public function page_numbers() { $pages = $this->total_pages(); $query = htmlentities($this->term); $page = $this->pagenumber; - $next = "Next"; - $back = "Back"; + $next = get_string('next', 'search'); + $back = get_string('back', 'search'); $ret = ""; @@ -274,13 +348,68 @@ public function page_numbers() { return $ret; } //page_numbers - //can the user see this result? - private function can_display(&$user, $this_id, $doctype, $course_id, $group_id) { - //this function should return true/false depending on - //whether or not a user can see this resource - //.. - //if one of you nice moodlers see this, feel free to - //implement it for me .. :-P + /** + * can the user see this result ? + * @param user a reference upon the user to be checked for access + * @param this_id the item identifier + * @param doctype the search document type. MAtches the module or block or + * extra search source definition + * @param course_id the course reference of the searched result + * @param group_id the group identity attached to the found resource + * @param path the path that routes to the local lib.php of the searched + * surrounding object fot that document + * @param item_type a subclassing information for complex module data models + * // TODO reorder parameters more consistently + */ + private function can_display(&$user, $this_id, $doctype, $course_id, $group_id, $path, $item_type, $context_id) { + global $CFG; + + /** + * course related checks + */ + // admins can see everything, anyway. + if (isadmin()){ + return true; + } + + // first check course compatibility against user : enrolled users to that course can see. + $myCourses = get_my_courses($user->id); + $unenroled = !in_array($course_id, array_keys($myCourses)); + + // if guests are allowed, logged guest can see + $isallowedguest = (isguest()) ? get_field('course', 'guest', 'id', $course_id) : false ; + + if ($unenroled && !$isallowedguest){ + return false; + } + + // if user is enrolled or is allowed user and course is hidden, can he see it ? + $visibility = get_field('course', 'visible', 'id', $course_id); + if ($visibility <= 0){ + if (!has_capability('moodle/course:viewhiddencourses', get_context_instance(CONTEXT_COURSE, $course->id))){ + return false; + } + } + + /** + * prerecorded capabilities + */ + // get context caching information and tries to discard unwanted records here + + + /** + * final checks + */ + // then give back indexing data to the module for local check + include_once "{$CFG->dirroot}/search/documents/{$doctype}_document.php"; + $access_check_function = "{$doctype}_check_text_access"; + + if (function_exists($access_check_function)){ + $modulecheck = $access_check_function($path, $item_type, $this_id, $user, $group_id, $context_id); + // echo "module said $modulecheck for item $doctype/$item_type/$this_id"; + return($modulecheck); + } + return true; } //can_display diff --git a/search/stats.php b/search/stats.php index 5e391b164f671..0b71c9c29e1d1 100644 --- a/search/stats.php +++ b/search/stats.php @@ -1,51 +1,75 @@ dirroot/search/lib.php"); - - if ($CFG->forcelogin) { +/** +* Global Search Engine for Moodle +* Michael Champanis (mchampan) [cynnical@gmail.com] +* review 1.8+ : Valery Fremaux [valery.fremaux@club-internet.fr] +* 2007/08/02 +* +* Prints some basic statistics about the current index. +* Does some diagnostics if you are logged in as an administrator. +* +*/ + +require_once('../config.php'); +require_once("{$CFG->dirroot}/search/lib.php"); + +if ($CFG->forcelogin) { require_login(); - } - - if (empty($CFG->enableglobalsearch)) { - error('Global searching is not enabled.'); - } +} - //check for php5, but don't die yet - if ($check = search_check_php5()) { - require_once("$CFG->dirroot/search/indexlib.php"); +if (empty($CFG->enableglobalsearch)) { + error(get_string('globalsearchdisabled', 'search')); +} +//check for php5, but don't die yet +if ($check = search_check_php5()) { + require_once("{$CFG->dirroot}/search/indexlib.php"); + $indexinfo = new IndexInfo(); - } //if +} - if (!$site = get_site()) { +if (!$site = get_site()) { redirect("index.php"); - } //if +} - $strsearch = "Search"; //get_string(); - $strquery = "Search statistics"; //get_string(); +$strsearch = get_string('search', 'search'); +$strquery = get_string('statistics', 'search'); - print_header("$site->shortname: $strsearch: $strquery", "$site->fullname", - "$strsearch -> $strquery"); +print_header("$site->shortname: $strsearch: $strquery", "$site->fullname", + "$strsearch -> $strquery"); - //keep things pretty, even if php5 isn't available - if (!$check) { +//keep things pretty, even if php5 isn't available +if (!$check) { print_heading(search_check_php5(true)); print_footer(); exit(0); - } //if - - print_simple_box_start('center', '100%', '', 20); - print_heading($strquery); - - print_simple_box_start('center', '', '', 20); - - //this table is only for admins, shows index directory size and location - if (isadmin()) { +} + +print_box_start(); +print_heading($strquery); + +print_box_start(); + +$databasestr = get_string('database', 'search'); +$documentsinindexstr = get_string('documentsinindex', 'search'); +$deletionsinindexstr = get_string('deletionsinindex', 'search'); +$documentsindatabasestr = get_string('documentsindatabase', 'search'); +$databasestatestr = get_string('databasestate', 'search'); + +//this table is only for admins, shows index directory size and location +if (isadmin()) { + $datadirectorystr = get_string('datadirectory', 'search'); + $inindexdirectorystr = get_string('filesinindexdirectory', 'search'); + $totalsizestr = get_string('totalsize', 'search'); + $errorsstr = get_string('errors', 'search'); + $solutionsstr = get_string('solutions', 'search'); + $checkdirstr = get_string('checkdir', 'search'); + $checkdbstr = get_string('checkdb', 'search'); + $checkdiradvicestr = get_string('checkdiradvice', 'search'); + $checkdbadvicestr = get_string('checkdbadvice', 'search'); + $runindexerteststr = get_string('runindexertest', 'search'); + $runindexerstr = get_string('runindexer', 'search'); + $admin_table->tablealign = "center"; $admin_table->align = array ("right", "left"); $admin_table->wrap = array ("nowrap", "nowrap"); @@ -53,72 +77,73 @@ $admin_table->cellspacing = 0; $admin_table->width = '500'; - $admin_table->data[] = array('Data directory', ''.$indexinfo->path.''); - $admin_table->data[] = array('Files in index directory', $indexinfo->filecount); - $admin_table->data[] = array('Total size', $indexinfo->size); + $admin_table->data[] = array("{$datadirectorystr}", ''.$indexinfo->path.''); + $admin_table->data[] = array($inindexdirectorystr, $indexinfo->filecount); + $admin_table->data[] = array($totalsizestr, $indexinfo->size); if ($indexinfo->time > 0) { - $admin_table->data[] = array('Created on', date('r', $indexinfo->time)); - } else { - $admin_table->data[] = array('Created on', '-'); - } //else + $admin_table->data[] = array(get_string('createdon', 'search'), date('r', $indexinfo->time)); + } + else { + $admin_table->data[] = array(get_string('createdon', 'search'), '-'); + } if (!$indexinfo->valid($errors)) { - $admin_table->data[] = array('Errors', ' '); - - foreach ($errors as $key=>$value) { - $admin_table->data[] = array($key.' ... ', $value); - } //foreach - - $admin_table->data[] = array('Solutions', ' '); - - if (isset($errors['dir'])) { - $admin_table->data[] = array('Check dir', 'Ensure the data directory exists and is writable.'); - } //if + $admin_table->data[] = array("{$errorsstr}", ' '); + foreach ($errors as $key => $value) { + $admin_table->data[] = array($key.' ... ', $value); + } + } - if (isset($errors['db'])) { - $admin_table->data[] = array('Check DB', 'Check your database for any problems.'); - } //if - - $admin_table->data[] = array('Run indexer test', 'tests/index.php'); - $admin_table->data[] = array('Run indexer', 'indexersplash.php'); - } //if - } //if + print_table($admin_table); + print_spacer(20); + print_heading($solutionsstr); + + unset($admin_table->data); + if (isset($errors['dir'])) { + $admin_table->data[] = array($checkdirstr, $checkdiradvicestr); + } + if (isset($errors['db'])) { + $admin_table->data[] = array($checkdbstr, $checkdbadvicestr); + } + + $admin_table->data[] = array($runindexerteststr, 'tests/index.php'); + $admin_table->data[] = array($runindexerstr, 'indexersplash.php'); + + print_table($admin_table); + print_spacer(20); +} - //this is the standard summary table for normal users, shows document counts - $table->tablealign = "center"; - $table->align = array ("right", "left"); - $table->wrap = array ("nowrap", "nowrap"); - $table->cellpadding = 5; - $table->cellspacing = 0; - $table->width = '500'; +//this is the standard summary table for normal users, shows document counts +$table->tablealign = "center"; +$table->align = array ("right", "left"); +$table->wrap = array ("nowrap", "nowrap"); +$table->cellpadding = 5; +$table->cellspacing = 0; +$table->width = '500'; - $table->data[] = array('Database', 'search_documents'); +$table->data[] = array("{$databasestr}", "{$CFG->prefix}search_documents"); - //add extra fields if we're admin - if (isadmin()) { +//add extra fields if we're admin +if (isadmin()) { //don't want to confuse users if the two totals don't match (hint: they should) - $table->data[] = array('Documents in index', $indexinfo->indexcount); - + $table->data[] = array($documentsinindexstr, $indexinfo->indexcount); + //*cough* they should match if deletions were actually removed from the index, //as it turns out, they're only marked as deleted and not returned in search results - $table->data[] = array('Deletions in index', (int)$indexinfo->indexcount - (int)$indexinfo->dbcount); - } //if - - $table->data[] = array('Documents in database', $indexinfo->dbcount); + $table->data[] = array($deletionsinindexstr, (int)$indexinfo->indexcount - (int)$indexinfo->dbcount); +} - foreach($indexinfo->types as $key => $value) { - $table->data[] = array("'$key' documents", $value); - } //foreach +$table->data[] = array($documentsindatabasestr, $indexinfo->dbcount); - if (isadmin()) { - print_table($admin_table); - print_spacer(20); - } //if +foreach($indexinfo->types as $key => $value) { + $table->data[] = array(get_string('documentsfor', 'search') . " '".get_string('modulenameplural', $key)."'", $value); +} - print_table($table); +print_heading($databasestatestr); +print_table($table); - print_simple_box_end(); - print_simple_box_end(); - print_footer(); +print_box_end(); +print_box_end(); +print_footer(); ?> \ No newline at end of file diff --git a/search/update.php b/search/update.php index 8282a62bcdf0e..b7ebe4abc5be1 100644 --- a/search/update.php +++ b/search/update.php @@ -1,105 +1,143 @@ dirroot/search/lib.php"); - - require_login(); - - if (empty($CFG->enableglobalsearch)) { - error('Global searching is not enabled.'); - } - - if (!isadmin()) { - error("You need to be an admin user to use this page.", "$CFG->wwwroot/login/index.php"); - } //if - - //check for php5 (lib.php) - if (!search_check_php5()) { +/** +* Global Search Engine for Moodle +* Michael Champanis (mchampan) [cynnical@gmail.com] +* review 1.8+ : Valery Fremaux [valery.fremaux@club-internet.fr] +* 2007/08/02 +* +* Index asynchronous updator +* +* Major chages in this review is passing the xxxx_db_names return to +* multiple arity to handle multiple document types modules +*/ + +require_once('../config.php'); +require_once("$CFG->dirroot/search/lib.php"); + +require_login(); + +if (empty($CFG->enableglobalsearch)) { + error(get_string('globalsearchdisabled', 'search')); +} + +if (!isadmin()) { + error(get_string('beadmin', 'search'), "$CFG->wwwroot/login/index.php"); +} + +//check for php5 (lib.php) +if (!search_check_php5()) { $phpversion = phpversion(); mtrace("Sorry, global search requires PHP 5.0.0 or later (currently using version $phpversion)"); exit(0); - } //if - - require_once("$CFG->dirroot/search/indexlib.php"); - - $index = new Zend_Search_Lucene(SEARCH_INDEX_PATH); - $dbcontrol = new IndexDBControl(); - $update_count = 0; - - $indexdate = $CFG->search_indexer_run_date; - - mtrace("
Starting index update (updates)...\n");
-
-  if ($mods = get_records_select('modules')) {
-  $mods = array_merge($mods, search_get_additional_modules());
-
-  foreach ($mods as $mod) {
-    $class_file = $CFG->dirroot.'/search/documents/'.$mod->name.'_document.php';
-    $get_document_function = $mod->name.'_single_document';
-    $delete_function = $mod->name.'_delete';
-    $db_names_function = $mod->name.'_db_names';
-    $updates = array();
-
-    if (file_exists($class_file)) {
-      require_once($class_file);
-
-      if (function_exists($delete_function) and function_exists($db_names_function) and function_exists($get_document_function)) {
-        mtrace("Checking $mod->name module for updates.");
-        $values = $db_names_function();
-
-        //TODO: check 'in' syntax with other RDBMS' (add and update.php as well)
-        $sql = "select id, ".$values[0]." as docid from ".$values[1].
-               " where ".$values[3]." > $indexdate".
-               " and id in (select docid from ".SEARCH_DATABASE_TABLE.")";
-
-        $records = get_records_sql($sql);
-
-        if (is_array($records)) {
-          foreach($records as $record) {
-            $updates[] = $delete_function($record->docid);
-          } //foreach
-        } //if
-
-        foreach ($updates as $update) {
-          ++$update_count;
-
-          //delete old document
-          $doc = $index->find("+docid:$update +doctype:$mod->name");
-
-          //get the record, should only be one
-          foreach ($doc as $thisdoc) {
-            mtrace("  Delete: $thisdoc->title (database id = $thisdoc->dbid, index id = $thisdoc->id, moodle instance id = $thisdoc->docid)");
-
-            $dbcontrol->delDocument($thisdoc);
-            $index->delete($thisdoc->id);
-          } //foreach
-
-          //add new modified document back into index
-          $add = $get_document_function($update);
-
-          //object to insert into db
-          $dbid = $dbcontrol->addDocument($add);
-
-          //synchronise db with index
-          $add->addField(Zend_Search_Lucene_Field::Keyword('dbid', $dbid));
-
-          mtrace("  Add: $add->title (database id = $add->dbid, moodle instance id = $add->docid)");
-
-          $index->addDocument($add);
-        } //foreach
-
-        mtrace("Finished $mod->name.\n");
-      } //if
-    } //if
-  } //foreach
-  } //if
-
-  //commit changes
-  $index->commit();
-
-  //update index date
-  set_config("search_indexer_run_date", time());
-
-  mtrace("Finished $update_count updates.
"); +} + +require_once("$CFG->dirroot/search/indexlib.php"); + +$index = new Zend_Search_Lucene(SEARCH_INDEX_PATH); +$dbcontrol = new IndexDBControl(); +$update_count = 0; +$indexdate = $CFG->search_indexer_update_date; +$startupdatedate = time(); + +mtrace("
Starting index update (updates)...\n");
+
+if ($mods = get_records_select('modules')) {
+    $mods = array_merge($mods, search_get_additional_modules());
+    
+    foreach ($mods as $mod) {
+        $class_file = $CFG->dirroot.'/search/documents/'.$mod->name.'_document.php';
+        $get_document_function = $mod->name.'_single_document';
+        $delete_function = $mod->name.'_delete';
+        $db_names_function = $mod->name.'_db_names';
+        $updates = array();
+        
+        if (file_exists($class_file)) {
+            require_once($class_file);
+            
+            //if both required functions exist
+            if (function_exists($delete_function) and function_exists($db_names_function) and function_exists($get_document_function)) {
+                mtrace("Checking $mod->name module for updates.");
+                $valuesArray = $db_names_function();
+                if ($valuesArray){
+                    foreach($valuesArray as $values){
+                    
+                        $where = (isset($values[5])) ? 'AND ('.$values[5].')' : '';
+                        $itemtypes = ($values[4] != '*') ? " AND itemtype = '{$values[4]}' " : '' ;
+
+                        //TODO: check 'in' syntax with other RDBMS' (add and update.php as well)
+                        $table = SEARCH_DATABASE_TABLE;
+                        $query = "
+                            SELECT 
+                                docid,
+                                itemtype
+                            FROM 
+                                {$CFG->prefix}{$table}
+                            WHERE
+                                doctype = '{$mod->name}'
+                                $itemtypes
+                        ";
+                        $docIds = get_records_sql_menu($query);
+                        $docIdList = ($docIds) ? implode("','", array_keys($docIds)) : '' ;
+                        
+                        $query = "
+                            SELECT 
+                                id, 
+                                {$values[0]} as docid
+                            FROM 
+                                {$CFG->prefix}{$values[1]} 
+                            WHERE 
+                                {$values[3]} > {$indexdate} AND 
+                                id IN ('{$docIdList}')
+                                $where
+                        ";
+                        $records = get_records_sql($query);
+                        if (is_array($records)) {
+                            foreach($records as $record) {
+                                $updates[] = $delete_function($record->docid, $docIds[$record->docid]);
+                            } 
+                        } 
+                    }
+                    
+                    foreach ($updates as $update) {
+                        ++$update_count;
+                        
+                        //delete old document
+                        $doc = $index->find("+docid:{$update->id} +doctype:{$mod->name} +itemtype:{$update->itemtype}");
+                        
+                        //get the record, should only be one
+                        foreach ($doc as $thisdoc) {
+                            mtrace("  Delete: $thisdoc->title (database id = $thisdoc->dbid, index id = $thisdoc->id, moodle instance id = $thisdoc->docid)");
+                            $dbcontrol->delDocument($thisdoc);
+                            $index->delete($thisdoc->id);
+                        } 
+                        
+                        //add new modified document back into index
+                        $add = $get_document_function($update->id, $update->itemtype);
+                        
+                        //object to insert into db
+                        $dbid = $dbcontrol->addDocument($add);
+                        
+                        //synchronise db with index
+                        $add->addField(Zend_Search_Lucene_Field::Keyword('dbid', $dbid));
+                        mtrace("  Add: $add->title (database id = $add->dbid, moodle instance id = $add->docid)");
+                        $index->addDocument($add);
+                    } 
+                }
+                else{
+                    mtrace("No types to update.\n");
+                }
+                mtrace("Finished $mod->name.\n");
+            } 
+        } 
+    } 
+} 
+
+//commit changes
+$index->commit();
+
+//update index date
+set_config("search_indexer_update_date", $startupdatedate);
+
+mtrace("Finished $update_count updates.
"); ?> \ No newline at end of file