示例#1
0
/**
 * Returns a list of matching documents for the given query
 *
 * @author Andreas Gohr <*****@*****.**>
 * @author Kazutaka Miyasaka <*****@*****.**>
 */
function _ft_pageSearch(&$data)
{
    $Indexer = idx_get_indexer();
    // parse the given query
    $q = ft_queryParser($Indexer, $data['query']);
    $data['highlight'] = $q['highlight'];
    if (empty($q['parsed_ary'])) {
        return array();
    }
    // lookup all words found in the query
    $lookup = $Indexer->lookup($q['words']);
    // get all pages in this dokuwiki site (!: includes nonexistent pages)
    $pages_all = array();
    foreach ($Indexer->getPages() as $id) {
        $pages_all[$id] = 0;
        // base: 0 hit
    }
    // process the query
    $stack = array();
    foreach ($q['parsed_ary'] as $token) {
        switch (substr($token, 0, 3)) {
            case 'W+:':
            case 'W-:':
            case 'W_:':
                // word
                $word = substr($token, 3);
                $stack[] = (array) $lookup[$word];
                break;
            case 'P+:':
            case 'P-:':
                // phrase
                $phrase = substr($token, 3);
                // since phrases are always parsed as ((W1)(W2)...(P)),
                // the end($stack) always points the pages that contain
                // all words in this phrase
                $pages = end($stack);
                $pages_matched = array();
                foreach (array_keys($pages) as $id) {
                    $text = utf8_strtolower(rawWiki($id));
                    if (strpos($text, $phrase) !== false) {
                        $pages_matched[$id] = 0;
                        // phrase: always 0 hit
                    }
                }
                $stack[] = $pages_matched;
                break;
            case 'N+:':
            case 'N-:':
                // namespace
                $ns = substr($token, 3);
                $pages_matched = array();
                foreach (array_keys($pages_all) as $id) {
                    if (strpos($id, $ns) === 0) {
                        $pages_matched[$id] = 0;
                        // namespace: always 0 hit
                    }
                }
                $stack[] = $pages_matched;
                break;
            case 'AND':
                // and operation
                list($pages1, $pages2) = array_splice($stack, -2);
                $stack[] = ft_resultCombine(array($pages1, $pages2));
                break;
            case 'OR':
                // or operation
                list($pages1, $pages2) = array_splice($stack, -2);
                $stack[] = ft_resultUnite(array($pages1, $pages2));
                break;
            case 'NOT':
                // not operation (unary)
                $pages = array_pop($stack);
                $stack[] = ft_resultComplement(array($pages_all, $pages));
                break;
        }
    }
    $docs = array_pop($stack);
    if (empty($docs)) {
        return array();
    }
    // check: settings, acls, existence
    foreach (array_keys($docs) as $id) {
        if (isHiddenPage($id) || auth_quickaclcheck($id) < AUTH_READ || !page_exists($id, '', false)) {
            unset($docs[$id]);
        }
    }
    // sort docs by count
    arsort($docs);
    return $docs;
}
 /**
  * Gets highlight candidates from HTTP_REFERER info
  * (A compensation for "remove_url_params" option)
  */
 function getCandidatesFromReferer()
 {
     global $HIGH;
     global $ACT;
     if ($ACT !== 'show' || !empty($HIGH) || !isset($_SERVER['HTTP_REFERER']) || in_array($this->disable_highlight, array('all', 'auto'))) {
         return;
     }
     $referer = (string) $_SERVER['HTTP_REFERER'];
     if (strpos($referer, DOKU_URL) === 0 && preg_match('/[?&]do=search&id=([^&]+)/', $referer, $matches)) {
         // users seem to have jumped from search result link in this wiki
         require_once DOKU_INC . 'inc/fulltext.php';
         $query = urldecode($matches[1]);
         // set highlight candidates
         // (ft_queryParser has been modified since DokuWiki Rincewind)
         if (function_exists('idx_get_indexer')) {
             $Indexer = idx_get_indexer();
             $parsed_query = ft_queryParser($Indexer, $query);
         } else {
             $parsed_query = ft_queryParser($query);
         }
         $HIGH = $parsed_query['highlight'];
     }
 }
/**
 * The fulltext search
 *
 * Returns a list of matching documents for the given query
 *
 */
function ft_pageSearch($query, &$highlight)
{
    $q = ft_queryParser($query);
    $highlight = array();
    // remember for hilighting later
    foreach ($q['words'] as $wrd) {
        $highlight[] = str_replace('*', '', $wrd);
    }
    // lookup all words found in the query
    $words = array_merge($q['and'], $q['not']);
    if (!count($words)) {
        return array();
    }
    $result = idx_lookup($words);
    if (!count($result)) {
        return array();
    }
    // merge search results with query
    foreach ($q['and'] as $pos => $w) {
        $q['and'][$pos] = $result[$w];
    }
    // create a list of unwanted docs
    $not = array();
    foreach ($q['not'] as $pos => $w) {
        $not = array_merge($not, array_keys($result[$w]));
    }
    // combine and-words
    if (count($q['and']) > 1) {
        $docs = ft_resultCombine($q['and']);
    } else {
        $docs = $q['and'][0];
    }
    if (!count($docs)) {
        return array();
    }
    // create a list of hidden pages in the result
    $hidden = array();
    $hidden = array_filter(array_keys($docs), 'isHiddenPage');
    $not = array_merge($not, $hidden);
    // filter unmatched namespaces
    if (!empty($q['ns'])) {
        $pattern = implode('|^', $q['ns']);
        foreach ($docs as $key => $val) {
            if (!preg_match('/^' . $pattern . '/', $key)) {
                unset($docs[$key]);
            }
        }
    }
    // remove negative matches
    foreach ($not as $n) {
        unset($docs[$n]);
    }
    if (!count($docs)) {
        return array();
    }
    // handle phrases
    if (count($q['phrases'])) {
        $q['phrases'] = array_map('utf8_strtolower', $q['phrases']);
        // use this for higlighting later:
        $highlight = array_merge($highlight, $q['phrases']);
        $q['phrases'] = array_map('preg_quote_cb', $q['phrases']);
        // check the source of all documents for the exact phrases
        foreach (array_keys($docs) as $id) {
            $text = utf8_strtolower(rawWiki($id));
            foreach ($q['phrases'] as $phrase) {
                if (!preg_match('/' . $phrase . '/usi', $text)) {
                    unset($docs[$id]);
                    // no hit - remove
                    break;
                }
            }
        }
    }
    if (!count($docs)) {
        return array();
    }
    // check ACL permissions
    foreach (array_keys($docs) as $doc) {
        if (auth_quickaclcheck($doc) < AUTH_READ) {
            unset($docs[$doc]);
        }
    }
    if (!count($docs)) {
        return array();
    }
    // if there are any hits left, sort them by count
    arsort($docs);
    return $docs;
}
示例#4
0
 function search_queries(&$event, $param)
 {
     global $ACT;
     if (is_array($ACT) || $this->is_edit_user) {
         return;
     }
     if ($ACT != 'show' && $ACT != 'search') {
         return;
     }
     //login,admin,profile, revisions,logout
     if (empty($_SERVER['QUERY_STRING']) || $this->is_excluded($this->ipaddr)) {
         return;
     }
     $queries = unserialize(io_readFile($this->qs_file, false));
     if (!$queries) {
         $queries = array('words' => array(), 'ns' => array(), 'extern' => array());
     }
     $elems = explode('&', html_entity_decode($_SERVER['QUERY_STRING']));
     $data_found = false;
     if (count($elems > 1)) {
         $words = array();
         $temp = array();
         foreach ($elems as $el) {
             if (isset($el) && $el) {
                 list($name, $value) = explode('=', $el);
                 $temp[$name] = $value;
             }
         }
         if (isset($temp['do']) && $temp['do'] == 'search') {
             $data_found = true;
             if (function_exists('idx_get_indexer')) {
                 $ar = ft_queryParser(idx_get_indexer(), urldecode($temp['id']));
             } else {
                 $ar = ft_queryParser(urldecode($temp['id']));
             }
             if (!empty($ar['phrases']) && !empty($ar['not'])) {
                 $words = array_diff($ar['words'], $ar['not']);
             } else {
                 $words = $ar['words'];
             }
             if (!empty($words)) {
                 foreach ($words as $word) {
                     $this->set_queries($queries, $word, 'words');
                 }
             }
             if (!empty($ar['ns'])) {
                 foreach ($ar['ns'] as $ns) {
                     $this->set_queries($queries, $ns, 'ns');
                 }
             }
         } else {
             foreach ($this->dw_tokens as $t) {
                 if (isset($temp[$t])) {
                     unset($temp[$t]);
                 }
             }
             if (count($temp)) {
                 $keys = array_keys($temp);
                 foreach ($keys as $k) {
                     if (preg_match('/rev\\d*\\[\\d*\\]/', $k)) {
                         unset($temp[$k]);
                     }
                 }
                 if (count($temp)) {
                     $data_found = true;
                 }
             }
             foreach ($temp as $name => $val) {
                 $this->set_queries($queries['extern'], urldecode($name), 'name');
                 if (!$val) {
                     $val = '_empty_';
                 }
                 $this->set_queries($queries['extern'], urldecode($val), 'val');
                 $this->set_named_values($queries['extern']['name'][urldecode($name)], urldecode($val));
             }
         }
         if ($data_found) {
             io_saveFile($this->qs_file, serialize($queries));
         }
     }
 }