/**
  * Filters search results to pick out only the ones that match the query.
  * @param array $results Array of results from internal_query
  * @param int $desired Number of desired results
  * @return object ->results containing actual results and ->dbnext containing
  *   database position of next set of results.
  */
 function internal_filter($results, $desired)
 {
     global $CFG;
     $required = array();
     $accepted = array();
     $count = 0;
     $return = new StdClass();
     $return->dbnext = 0;
     $tl = textlib_get_instance();
     foreach ($results as $result) {
         $return->dbnext++;
         if (substr($result->plugin, 0, 4) === 'mod/') {
             // Module plugins
             $module = substr($result->plugin, 4);
             $function = $module . '_ousearch_get_document';
             if (!array_key_exists($module, $required)) {
                 require_once $CFG->dirroot . '/mod/' . $module . '/lib.php';
                 $required[$module] = true;
                 if (!function_exists($function)) {
                     error('Missing module search support ' . $function);
                 }
             }
         } else {
             if (substr($result->plugin, 0, 5) === 'test/') {
                 // Testing code, assumed to already be included
                 $function = substr($result->plugin, 5) . '_ousearch_get_document';
             } else {
                 // Nothing else supported yet
                 error('Unsupported search plugin type ' . $result->plugin);
             }
         }
         // Let's request the document. Note that the 'document' fields of
         // $result are those used by this function to find the right one.
         $page = $function($result);
         // Ignore if we can't find the document
         if (!$page) {
             debugging('Module ' . $result->plugin . ' can\'t find search document');
             ousearch_document::wipe_document($result->id);
             continue;
         }
         // Page option can request that this result is not included
         if (!empty($page->hide)) {
             continue;
         }
         // Strip XHTML from content (need this before phrase scan)
         $textcontent = ousearch_document::strip_xhtml($page->content);
         // Add extra strings to the content after a special don't-show-this
         // marker and with another special marker between each (to prevent
         // phrases)
         if (isset($page->extrastrings) && count($page->extrastrings) > 0) {
             $evilmarker = rand();
             // This means people can't do it on purpose
             $textcontent .= ' xxrealcontentends' . $evilmarker;
             foreach ($page->extrastrings as $string) {
                 $textcontent .= ' ' . $string . ' xxsplit' . $evilmarker;
             }
         }
         // Do quick phrase scan that doesn't deal with Unicode,
         // or word-splitting but just discards results that
         // don't have the phrase words next to each other without
         // ASCII letters in between. This is intended to discard
         // results that (fairly) definitely don't have the phrase.
         // The further check below will make sure they really do
         // have it according to our standard (slow) word-splitting.
         $quickcheckcontent = $page->title . ' ' . $textcontent;
         $ok = true;
         foreach ($this->terms as $term) {
             if (count($term->words) < 2) {
                 continue;
             }
             $gap = '[^A-Za-z0-9]+';
             $pattern = '/(^|' . $gap . ')';
             $first = true;
             foreach ($term->words as $word) {
                 if ($first) {
                     $first = false;
                 } else {
                     $pattern .= $gap;
                 }
                 $pattern .= $word;
             }
             $pattern .= '($|' . $gap . ')/i';
             if (!preg_match($pattern, $quickcheckcontent)) {
                 $ok = false;
                 break;
             }
         }
         if (!$ok) {
             continue;
         }
         // OK, obtain document as linear text
         list($contentwords, $contentpositions) = ousearch_document::split_words($textcontent, false, true);
         list($titlewords, $titlepositions) = ousearch_document::split_words($page->title, false, true);
         $allwords = array_merge($titlewords, $contentwords);
         // Check it for phrases
         $positivewords = array();
         $ok = true;
         $DNIfound = -1;
         foreach ($this->terms as $term) {
             foreach ($term->words as $word) {
                 $positivewords[$word] = true;
             }
             if (count($term->words) < 2) {
                 continue;
             }
             $pos = 0;
             $found = false;
             foreach ($allwords as $word) {
                 if ($word === $term->words[$pos]) {
                     $pos++;
                     if ($pos === count($term->words)) {
                         $found = true;
                         break;
                     }
                 } else {
                     $pos = 0;
                 }
             }
             if (!$found) {
                 $ok = false;
                 break;
             }
         }
         foreach ($this->negativeterms as $term) {
             if (count($term->words) < 2) {
                 continue;
             }
             $pos = 0;
             $found = false;
             foreach ($allwords as $word) {
                 if ($word === $term->words[$pos]) {
                     $pos++;
                     if ($pos === count($term->words)) {
                         $found = true;
                         break;
                     }
                 } else {
                     $pos = 0;
                 }
             }
             if ($found) {
                 $ok = false;
                 break;
             }
         }
         if (!$ok) {
             continue;
         }
         // Result passes! Make structure holding it...
         // We now have list of all positive words, let's mark these
         // in title and summary
         $result->title = self::internal_highlight_words($page->title, $titlewords, $titlepositions, $positivewords);
         // Strip searchable-but-not-displayable content for summary
         if (isset($evilmarker)) {
             $strippedwords = array();
             foreach ($contentwords as $word) {
                 // Do not include extra strings in summary
                 if ($word == 'xxrealcontentends' . $evilmarker) {
                     break;
                 }
                 $strippedwords[] = $word;
             }
             $contentwords = $strippedwords;
         }
         // Pick a section to include in the summary. This algorithm works as follows:
         // * Compute the 'score' (number of highlight words in the previous 20 words
         //   up to and including this one) at each position in the text
         // * Observe where the maximum score is reached and where it is lost.
         // * A nice range that contains the most highlight words in the middle of the
         //   range will end at ($maxstart + $maxlength/2).
         $highlights = array();
         $pos = 0;
         $currentscore = 0;
         $maxscore = -1;
         $maxstart = 0;
         $maxlength = 0;
         $run = true;
         foreach ($contentwords as $word) {
             if (array_key_exists($pos - OUSEARCH_SUMMARYLENGTH, $highlights)) {
                 unset($highlights[$pos - OUSEARCH_SUMMARYLENGTH]);
                 $currentscore--;
             }
             if (array_key_exists($word, $positivewords)) {
                 $highlights[$pos] = true;
                 $currentscore++;
             }
             if ($currentscore > $maxscore) {
                 $maxscore = $currentscore;
                 $maxstart = $pos;
                 $maxlength = 1;
                 $run = true;
             } else {
                 if ($currentscore === $maxscore && $run) {
                     $maxlength++;
                 } else {
                     $run = false;
                 }
             }
             $pos++;
         }
         $start = $maxstart + $maxlength / 2 - OUSEARCH_SUMMARYLENGTH;
         if ($start < 0) {
             $start = 0;
         }
         $end = $start + OUSEARCH_SUMMARYLENGTH;
         if ($end > count($contentwords)) {
             $end = count($contentwords);
         }
         // $contentpositions is in characters
         $result->summary = $tl->substr($textcontent, $contentpositions[$start], $contentpositions[$end] - $contentpositions[$start]) . ($end < count($contentwords) ? '...' : '');
         $offset = -$contentpositions[$start];
         $result->summary = self::internal_highlight_words($result->summary, $contentwords, $contentpositions, $positivewords, $offset, $start, $end);
         if ($start !== 0) {
             $result->summary = '...' . $result->summary;
         }
         $result->summary = trim($result->summary);
         $result->activityname = $page->activityname;
         $result->activityurl = $page->activityurl;
         $result->url = $page->url;
         if (isset($page->data)) {
             $result->data = $page->data;
         }
         // Do user-specified filter if set
         if ($this->filter) {
             $filter = $this->filter;
             if (!$filter($result)) {
                 continue;
             }
         }
         $accepted[] = $result;
         $count++;
         if ($count == $desired) {
             break;
         }
     }
     $return->results = $accepted;
     return $return;
 }