/** * Returns a list of matching documents for the given query * * @author Andreas Gohr <*****@*****.**> * @author Kazutaka Miyasaka <*****@*****.**> */ function _ft_pageSearch(&$data) { $Indexer = idx_get_indexer(); // parse the given query $q = ft_queryParser($Indexer, $data['query']); $data['highlight'] = $q['highlight']; if (empty($q['parsed_ary'])) { return array(); } // lookup all words found in the query $lookup = $Indexer->lookup($q['words']); // get all pages in this dokuwiki site (!: includes nonexistent pages) $pages_all = array(); foreach ($Indexer->getPages() as $id) { $pages_all[$id] = 0; // base: 0 hit } // process the query $stack = array(); foreach ($q['parsed_ary'] as $token) { switch (substr($token, 0, 3)) { case 'W+:': case 'W-:': case 'W_:': // word $word = substr($token, 3); $stack[] = (array) $lookup[$word]; break; case 'P+:': case 'P-:': // phrase $phrase = substr($token, 3); // since phrases are always parsed as ((W1)(W2)...(P)), // the end($stack) always points the pages that contain // all words in this phrase $pages = end($stack); $pages_matched = array(); foreach (array_keys($pages) as $id) { $text = utf8_strtolower(rawWiki($id)); if (strpos($text, $phrase) !== false) { $pages_matched[$id] = 0; // phrase: always 0 hit } } $stack[] = $pages_matched; break; case 'N+:': case 'N-:': // namespace $ns = substr($token, 3); $pages_matched = array(); foreach (array_keys($pages_all) as $id) { if (strpos($id, $ns) === 0) { $pages_matched[$id] = 0; // namespace: always 0 hit } } $stack[] = $pages_matched; break; case 'AND': // and operation list($pages1, $pages2) = array_splice($stack, -2); $stack[] = ft_resultCombine(array($pages1, $pages2)); break; case 'OR': // or operation list($pages1, $pages2) = array_splice($stack, -2); $stack[] = ft_resultUnite(array($pages1, $pages2)); break; case 'NOT': // not operation (unary) $pages = array_pop($stack); $stack[] = ft_resultComplement(array($pages_all, $pages)); break; } } $docs = array_pop($stack); if (empty($docs)) { return array(); } // check: settings, acls, existence foreach (array_keys($docs) as $id) { if (isHiddenPage($id) || auth_quickaclcheck($id) < AUTH_READ || !page_exists($id, '', false)) { unset($docs[$id]); } } // sort docs by count arsort($docs); return $docs; }
/** * Gets highlight candidates from HTTP_REFERER info * (A compensation for "remove_url_params" option) */ function getCandidatesFromReferer() { global $HIGH; global $ACT; if ($ACT !== 'show' || !empty($HIGH) || !isset($_SERVER['HTTP_REFERER']) || in_array($this->disable_highlight, array('all', 'auto'))) { return; } $referer = (string) $_SERVER['HTTP_REFERER']; if (strpos($referer, DOKU_URL) === 0 && preg_match('/[?&]do=search&id=([^&]+)/', $referer, $matches)) { // users seem to have jumped from search result link in this wiki require_once DOKU_INC . 'inc/fulltext.php'; $query = urldecode($matches[1]); // set highlight candidates // (ft_queryParser has been modified since DokuWiki Rincewind) if (function_exists('idx_get_indexer')) { $Indexer = idx_get_indexer(); $parsed_query = ft_queryParser($Indexer, $query); } else { $parsed_query = ft_queryParser($query); } $HIGH = $parsed_query['highlight']; } }
/** * The fulltext search * * Returns a list of matching documents for the given query * */ function ft_pageSearch($query, &$highlight) { $q = ft_queryParser($query); $highlight = array(); // remember for hilighting later foreach ($q['words'] as $wrd) { $highlight[] = str_replace('*', '', $wrd); } // lookup all words found in the query $words = array_merge($q['and'], $q['not']); if (!count($words)) { return array(); } $result = idx_lookup($words); if (!count($result)) { return array(); } // merge search results with query foreach ($q['and'] as $pos => $w) { $q['and'][$pos] = $result[$w]; } // create a list of unwanted docs $not = array(); foreach ($q['not'] as $pos => $w) { $not = array_merge($not, array_keys($result[$w])); } // combine and-words if (count($q['and']) > 1) { $docs = ft_resultCombine($q['and']); } else { $docs = $q['and'][0]; } if (!count($docs)) { return array(); } // create a list of hidden pages in the result $hidden = array(); $hidden = array_filter(array_keys($docs), 'isHiddenPage'); $not = array_merge($not, $hidden); // filter unmatched namespaces if (!empty($q['ns'])) { $pattern = implode('|^', $q['ns']); foreach ($docs as $key => $val) { if (!preg_match('/^' . $pattern . '/', $key)) { unset($docs[$key]); } } } // remove negative matches foreach ($not as $n) { unset($docs[$n]); } if (!count($docs)) { return array(); } // handle phrases if (count($q['phrases'])) { $q['phrases'] = array_map('utf8_strtolower', $q['phrases']); // use this for higlighting later: $highlight = array_merge($highlight, $q['phrases']); $q['phrases'] = array_map('preg_quote_cb', $q['phrases']); // check the source of all documents for the exact phrases foreach (array_keys($docs) as $id) { $text = utf8_strtolower(rawWiki($id)); foreach ($q['phrases'] as $phrase) { if (!preg_match('/' . $phrase . '/usi', $text)) { unset($docs[$id]); // no hit - remove break; } } } } if (!count($docs)) { return array(); } // check ACL permissions foreach (array_keys($docs) as $doc) { if (auth_quickaclcheck($doc) < AUTH_READ) { unset($docs[$doc]); } } if (!count($docs)) { return array(); } // if there are any hits left, sort them by count arsort($docs); return $docs; }
function search_queries(&$event, $param) { global $ACT; if (is_array($ACT) || $this->is_edit_user) { return; } if ($ACT != 'show' && $ACT != 'search') { return; } //login,admin,profile, revisions,logout if (empty($_SERVER['QUERY_STRING']) || $this->is_excluded($this->ipaddr)) { return; } $queries = unserialize(io_readFile($this->qs_file, false)); if (!$queries) { $queries = array('words' => array(), 'ns' => array(), 'extern' => array()); } $elems = explode('&', html_entity_decode($_SERVER['QUERY_STRING'])); $data_found = false; if (count($elems > 1)) { $words = array(); $temp = array(); foreach ($elems as $el) { if (isset($el) && $el) { list($name, $value) = explode('=', $el); $temp[$name] = $value; } } if (isset($temp['do']) && $temp['do'] == 'search') { $data_found = true; if (function_exists('idx_get_indexer')) { $ar = ft_queryParser(idx_get_indexer(), urldecode($temp['id'])); } else { $ar = ft_queryParser(urldecode($temp['id'])); } if (!empty($ar['phrases']) && !empty($ar['not'])) { $words = array_diff($ar['words'], $ar['not']); } else { $words = $ar['words']; } if (!empty($words)) { foreach ($words as $word) { $this->set_queries($queries, $word, 'words'); } } if (!empty($ar['ns'])) { foreach ($ar['ns'] as $ns) { $this->set_queries($queries, $ns, 'ns'); } } } else { foreach ($this->dw_tokens as $t) { if (isset($temp[$t])) { unset($temp[$t]); } } if (count($temp)) { $keys = array_keys($temp); foreach ($keys as $k) { if (preg_match('/rev\\d*\\[\\d*\\]/', $k)) { unset($temp[$k]); } } if (count($temp)) { $data_found = true; } } foreach ($temp as $name => $val) { $this->set_queries($queries['extern'], urldecode($name), 'name'); if (!$val) { $val = '_empty_'; } $this->set_queries($queries['extern'], urldecode($val), 'val'); $this->set_named_values($queries['extern']['name'][urldecode($name)], urldecode($val)); } } if ($data_found) { io_saveFile($this->qs_file, serialize($queries)); } } }