function do_highlight($formatter, $params = array()) { if (isset($params['value'])) { $expr = $params['value']; } else { if (isset($params['q'])) { $expr = $params['q']; } } $expr = _stripslashes($expr); $formatter->send_header('', $params); $formatter->send_title('', '', $params); flush(); ob_start(); $formatter->send_page(); flush(); $out = ob_get_contents(); ob_end_clean(); if (isset($expr[0])) { highlight_repl(null, true); $highlight = _preg_search_escape($expr); $test = validate_needle($highlight); if ($test === false) { // invalid regex. quote all regexp specials $highlight = preg_quote($expr); } $out = preg_replace_callback('/((<[^>]*>)|(' . $highlight . '))/i', 'highlight_repl', $out); echo $out; } else { echo $out; } $args['editable'] = 1; $formatter->send_footer($args, $params); }
function macro_TitleSearch($formatter = "", $needle = "", &$opts) { global $DBInfo; $type = 'o'; $url = $formatter->link_url($formatter->page->urlname); $hneedle = _html_escape($needle); $msg = _("Go"); $form = "<form method='get' action='{$url}'>\n <input type='hidden' name='action' value='titlesearch' />\n <input name='value' size='30' value=\"{$hneedle}\" />\n <span class='button'><input type='submit' class='button' value='{$msg}' /></span>\n </form>"; if (!isset($needle[0])) { $opts['msg'] = _("Use more specific text"); if (!empty($opts['call'])) { $opts['form'] = $form; return $opts; } return $form; } $opts['form'] = $form; $opts['msg'] = sprintf(_("Title search for \"%s\""), $hneedle); $cneedle = _preg_search_escape($needle); if ($opts['noexpr']) { $needle = preg_quote($needle); } else { if (validate_needle($cneedle) === false) { $needle = preg_quote($needle); } else { // good expr $needle = $cneedle; } } // return the exact page or all similar pages $noexact = true; if (isset($opts['noexact'])) { $noexact = $opts['noexact']; } $limit = !empty($DBInfo->titlesearch_page_limit) ? $DBInfo->titlesearch_page_limit : 100; if (isset($opts['limit'])) { $limit = $opts['limit']; } $indexer = $DBInfo->lazyLoad('titleindexer'); $pages = $indexer->getLikePages($needle, $limit); $opts['all'] = $DBInfo->getCounter(); if (empty($DBInfo->alias)) { $DBInfo->initAlias(); } $alias = $DBInfo->alias->getAllPages(); $pages = array_merge($pages, $alias); $hits = array(); $exacts = array(); if ($noexact) { // return all search results foreach ($pages as $page) { if (preg_match("/" . $needle . "/i", $page)) { $hits[] = $page; } } } else { // return exact pages foreach ($pages as $page) { if (preg_match("/^" . $needle . "\$/i", $page)) { $hits[] = $page; $exacts[] = $page; if (empty($DBInfo->titlesearch_exact_all)) { $hits = $exacts; break; } } } } if (empty($hits) and empty($exacts)) { // simple title search by ignore spaces $needle2 = str_replace(' ', "\\s*", $needle); $ws = preg_split("/([\\x{AC00}-\\x{D7F7}])/u", $needle2, -1, PREG_SPLIT_DELIM_CAPTURE | PREG_SPLIT_NO_EMPTY); $needle2 = implode("\\s*", $ws); $hits = $indexer->getLikePages($needle2); foreach ($alias as $page) { if (preg_match("/" . $needle2 . "/i", $page)) { $hits[] = $page; } } } sort($hits); $idx = 1; if (!empty($opts['linkto'])) { $idx = 10; } $out = ''; foreach ($hits as $pagename) { $pagetext = _html_escape(urldecode($pagename)); if (!empty($opts['linkto'])) { $out .= '<li>' . $formatter->link_to("{$opts['linkto']}{$pagename}", $pagetext, "tabindex='{$idx}'") . "</li>\n"; } else { $out .= '<li>' . $formatter->link_tag(_rawurlencode($pagename), "", $pagetext, "tabindex='{$idx}'") . "</li>\n"; } $idx++; } if ($out) { $out = "<{$type}l>{$out}</{$type}l>\n"; } $opts['hits'] = count($hits); if ($opts['hits'] == 1) { $opts['value'] = array_pop($hits); } if (!empty($exacts)) { $opts['exact'] = 1; } if (!empty($opts['call'])) { $opts['out'] = $out; return $opts; } return $out; }
function macro_FullSearch($formatter, $value = "", &$opts) { global $DBInfo; $needle = $value; if ($value === true) { $needle = $value = $formatter->page->name; $options['noexpr'] = 1; } else { # for MoinMoin compatibility with [[FullSearch("blah blah")]] #$needle = preg_replace("/^('|\")([^\\1]*)\\1/","\\2",$value); $needle = $value; } // for pagination $offset = ''; if (!empty($opts['offset']) and is_numeric($opts['offset'])) { if ($opts['offset'] > 0) { $offset = $opts['offset']; } } $url = $formatter->link_url($formatter->page->urlname); $fneedle = _html_escape($needle); $tooshort = !empty($DBInfo->fullsearch_tooshort) ? $DBInfo->fullsearch_tooshort : 2; $m1 = _("Display context of search results"); $m2 = _("Search BackLinks only"); $m3 = _("Case-sensitive searching"); $msg = _("Go"); $bchecked = !empty($DBInfo->use_backlinks) ? 'checked="checked"' : ''; $form = <<<EOF <form method='get' action='{$url}'> <input type='hidden' name='action' value='fullsearch' /> <input name='value' size='30' value="{$fneedle}" /> <span class='button'><input type='submit' class='button' value='{$msg}' /></span><br /> <input type='checkbox' name='backlinks' value='1' {$bchecked} />{$m2}<br /> <input type='checkbox' name='context' value='20' />{$m1}<br /> <input type='checkbox' name='case' value='1' />{$m3}<br /> </form> EOF; if (!isset($needle[0]) or !empty($opts['form'])) { # or blah blah $opts['msg'] = _("No search text"); return $form; } $opts['form'] = $form; # XXX $excl = array(); $incl = array(); if (!empty($opts['noexpr'])) { $tmp = preg_split("/\\s+/", $needle); $needle = $value = join('|', $tmp); $raw_needle = implode(' ', $tmp); $needle = preg_quote($needle); } else { if (empty($opts['backlinks'])) { $terms = preg_split('/((?<!\\S)[-+]?"[^"]+?"(?!\\S)|\\S+)/s', $needle, -1, PREG_SPLIT_DELIM_CAPTURE | PREG_SPLIT_NO_EMPTY); $common_words = array('the', 'that', 'where', 'what', 'who', 'how', 'too', 'are'); $common = array(); foreach ($terms as $term) { if (trim($term) == '') { continue; } if (preg_match('/^([-+]?)("?)([^\\2]+?)\\2$/', $term, $match)) { $word = str_replace(array('\\', '.', '*'), '', $match[3]); $len = strlen($word); if (!$match[1] and $match[2] != '"') { if ($len < $tooshort or in_array($word, $common_words)) { $common[] = $word; continue; } } if ($match[1] == '-') { $excl[] = $word; } else { $incl[] = $word; } } } $needle = implode('|', $incl); $needle = _preg_search_escape($needle); $raw_needle = implode(' ', $incl); $test = validate_needle($needle); if ($test === false) { // invalid regex $tmp = array_map('preg_quote', $incl); $needle = implode('|', $tmp); } $excl_needle = implode('|', $excl); $test = validate_needle($excl_needle); if ($test2 === false) { // invalid regex $tmp = array_map('preg_quote', $excl); $excl_needle = implode('|', $tmp); } } else { $cneedle = _preg_search_escape($needle); $test = validate_needle($cneedle); if ($test === false) { $needle = preg_quote($needle); } else { $needle = $cneedle; } } } $test3 = trim($needle); if (!isset($test3[0])) { $opts['msg'] = _("Empty expression"); return $form; } # set arena and sid if (!empty($opts['backlinks'])) { $arena = 'backlinks'; } else { if (!empty($opts['keywords'])) { $arena = 'keywords'; } else { $arena = 'fullsearch'; } } if ($arena == 'fullsearch') { $sid = md5($value . 'v' . $offset); } else { $sid = $value; } $delay = !empty($DBInfo->default_delaytime) ? $DBInfo->default_delaytime : 0; # retrieve cache $fc = new Cache_text($arena); if (!$formatter->refresh and $fc->exists($sid)) { $data = $fc->fetch($sid); if (!empty($opts['backlinks'])) { // backlinks are not needed to check it. $hits = $data; // also fetch redirects $r = new Cache_Text('redirects'); $redirects = $r->fetch($sid); } else { if (is_array($data)) { # check cache mtime $cmt = $fc->mtime($sid); # check update or not $dmt = $DBInfo->mtime(); if ($dmt > $cmt + $delay) { # XXX crude method $data = array(); } else { # XXX smart but incomplete method if (isset($data['hits'])) { $hits =& $data['hits']; } else { $hits =& $data; } foreach ($hits as $p => $c) { $mp = $DBInfo->getPage($p); $mt = $mp->mtime(); if ($mt > $cmt + $delay) { $data = array(); break; } } } if (isset($data['searched'])) { extract($data); } else { if (!empty($data)) { $hits = $data; } } } } } $pattern = '/' . $needle . '/'; if (!empty($excl_needle)) { $excl_pattern = '/' . $excl_needle . '/'; } if (!empty($opts['case'])) { $pattern .= "i"; $excl_pattern .= "i"; } if (isset($hits)) { if (in_array($arena, array('backlinks', 'keywords'))) { $test = key($hits); if (is_int($test) and $hits[$test] != -1) { // fix compatible issue for keywords, backlinks $hits = array_flip($hits); foreach ($hits as $k => $v) { $hits[$k] = -1; } reset($hits); } // check invert redirect index if (!empty($redirects)) { $redirects = array_flip($redirects); ksort($redirects); foreach ($redirects as $k => $v) { $hits[$k] = -2; } reset($hits); } } //continue; } else { $hits = array(); set_time_limit(0); if (!empty($opts['backlinks']) and empty($DBInfo->use_backlink_search)) { $hits = array(); } else { if (!empty($opts['keywords']) and empty($DBInfo->use_keyword_search)) { $hits = array(); } else { if (!empty($opts['backlinks'])) { $pages = $DBInfo->getPageLists(); #$opts['context']=-1; # turn off context-matching $cache = new Cache_text("pagelinks"); foreach ($pages as $page_name) { $links = $cache->fetch($page_name); if (is_array($links)) { if (in_array($value, $links)) { $hits[$page_name] = -1; } // ignore count if < 0 } } } else { if (!empty($opts['keywords'])) { $pages = $DBInfo->getPageLists(); $opts['context'] = -1; # turn off context-matching $cache = new Cache_text("keyword"); foreach ($pages as $page_name) { $links = $cache->fetch($page_name); // XXX if (is_array($links)) { if (stristr(implode(' ', $links), $needle)) { $hits[$page_name] = -1; } // ignore count if < 0 } } } else { $params = array(); $ret = array(); $params['ret'] =& $ret; $params['offset'] = $offset; $params['search'] = 1; $params['incl'] = $incl; $params['excl'] = $excl; $pages = $DBInfo->getPageLists($params); // set time_limit $mt = explode(' ', microtime()); $timestamp = $mt[0] + $mt[1]; $j = 0; $time_limit = isset($DBInfo->process_time_limit) ? $DBInfo->process_time_limit : 3; // default 3-seconds $j = 0; while (list($_, $page_name) = each($pages)) { // check time_limit if ($time_limit and $j % 30 == 0) { $mt = explode(' ', microtime()); $now = $mt[0] + $mt[1]; if ($now - $timestamp > $time_limit) { break; } } $j++; $p = new WikiPage($page_name); if (!$p->exists()) { continue; } $body = $p->_get_raw_body(); #$count = count(preg_split($pattern, $body))-1; $count = preg_match_all($pattern, $body, $matches); if ($count) { foreach ($excl as $ex) { if (stristr($body, $ex)) { continue; } } foreach ($incl as $in) { if (!stristr($body, $in)) { continue; } } $hits[$page_name] = $count; } } $searched = $j > 0 ? $j : 0; $offset = !empty($offset) ? $offset + $j : $j; } } } } #krsort($hits); #ksort($hits); $name = array_keys($hits); array_multisort($hits, SORT_DESC, $name, SORT_ASC); if (in_array($arena, array('backlinks', 'keywords'))) { $fc->update($sid, $name); } else { $fc->update($sid, array('hits' => $hits, 'offset' => $offset, 'searched' => $searched)); } } $opts['hits'] = $hits; $opts['hit'] = count($hits); $opts['all'] = $DBInfo->getCounter(); if ($opts['all'] > $searched) { $opts['next'] = $offset; $opts['searched'] = $searched; } if (!empty($opts['call'])) { return $hits; } $out = "<!-- RESULT LIST START -->"; // for search plugin $out .= "<ul class='fullsearchResult'>"; $idx = 1; $checkbox = ''; while (list($page_name, $count) = each($hits)) { $pgname = _html_escape($page_name); if (!empty($opts['checkbox'])) { $checkbox = "<input type='checkbox' name='pagenames[]' value=\"{$pgname}\" />"; } $out .= '<!-- RESULT ITEM START -->'; // for search plugin $out .= '<li>' . $checkbox . $formatter->link_tag(_rawurlencode($page_name), '?action=highlight&value=' . _urlencode($value), $pgname, 'tabindex="' . $idx . '"'); if ($count > 0) { $out .= ' . . . . ' . sprintf($count == 1 ? _("%d match") : _("%d matches"), $count); } else { if ($count == -2) { $out .= " <span class='redirectIcon'><span>" . _("Redirect page") . "</span></span>\n"; } } if (!empty($opts['context']) and $opts['context'] > 0) { # search matching contexts $p = new WikiPage($page_name); if ($p->exists()) { $body = $p->_get_raw_body(); $out .= find_needle($body, $needle, $excl_needle, $opts['context']); } } $out .= "</li>\n"; $out .= '<!-- RESULT ITEM END -->'; // for search plugin $idx++; #if ($idx > 50) break; } $out .= "</ul>\n"; $out .= "<!-- RESULT LIST END -->"; // for search plugin return $out; }
function getLikePages($needle, $limit = 100, $params = array()) { if (!isset($needle[0])) { return false; } // null needle if ($limit <= 0) { $limit = 0; } // no limit // protect \n char $needle = str_replace("\n", "", $needle); $total = file_get_contents($this->pagecnt); if ($total === false) { return false; } $flst = fopen($this->pagelst, 'r'); if (!is_resource($flst)) { return false; } $fidx = fopen($this->pageidx, 'r'); if (!is_resource($fidx)) { fclose($flst); return false; } $pages = array(); $pre = '.*'; $suf = '.*'; if ($needle[0] == '^') { $pre = ''; $needle = substr($needle, 1); } if (substr($needle, -1) == '$') { $suf = ''; $needle = substr($needle, 0, -1); } // check regex $test = validate_needle($needle); if (!$test) { $needle = preg_quote($needle); } else { $needle = _preg_search_escape($needle); } $chunk = $this->chunksize - 1; // chunk size $is = $ie = 0; // index start/end $ss = $se = 0; // seek start/end fseek($flst, 0, SEEK_SET); while ($ie < $total - 1) { $ie = $is + $chunk; if ($ie >= $total) { $ie = $total - 1; } fseek($fidx, $ie * 4, SEEK_SET); $dum = unpack('N', fread($fidx, 4)); $se = $dum[1]; $tmp = ''; if ($se > $ss) { $tmp = fread($flst, $se - $ss); } $addtmp = fgets($flst, 1024); // include last chunk $tmp .= $addtmp; $se += strlen($addtmp); if (preg_match_all('/^' . $pre . '(?:' . $needle . ')' . $suf . '$/' . $this->_match_flags, $tmp, $match)) { $pages = array_merge($pages, $match[0]); if (!empty($limit) and count($pages) > $limit) { if (empty($params['nocut'])) { $pages = array_slice($pages, 0, $limit); } break; } } $ss = $se; $is = $ie + 1; } fclose($flst); fclose($fidx); return $pages; }
function macro_FastSearch($formatter, $value = "", &$opts) { global $DBInfo; $default_limit = isset($DBInfo->fastsearch_limit) ? $DBInfo->fastsearch_limit : 30; if ($value === true) { $needle = $value = $formatter->page->name; } else { # for MoinMoin compatibility with [[FullSearch("blah blah")]] $needle = $value = preg_replace("/^('|\")([^\\1]*)\\1/", "\\2", $value); } $needle = _preg_search_escape($needle); $pattern = '/' . $needle . '/i'; $fneedle = str_replace('"', """, $needle); # XXX $url = $formatter->link_url($formatter->page->urlname); $arena = 'fullsearch'; $check1 = 'checked="checked"'; $check2 = $check3 = ''; if (in_array($opts['arena'], array('titlesearch', 'fullsearch', 'pagelinks'))) { $check1 = ''; $arena = $opts['arena']; if ($arena == 'fullsearch') { $check1 = 'checked="checked"'; } else { if ($arena == 'titlesearch') { $check2 = 'checked="checked"'; } else { $check3 = 'checked="checked"'; } } } if (!empty($opts['backlinks'])) { $arena = 'pagelinks'; $check1 = ''; $check3 = 'checked="checked"'; } $msg = _("Fast search"); $msg2 = _("Display context of search results"); $msg3 = _("Full text search"); $msg4 = _("Title search"); $msg5 = _("Link search"); $form = <<<EOF <form method='get' action='{$url}'> <input type='hidden' name='action' value='fastsearch' /> <input name='value' size='30' value='{$fneedle}' /> <span class='button'><input type='submit' class='button' value='{$msg}' /></span><br /> <input type='checkbox' name='context' value='20' />{$msg2}<br /> <input type='radio' name='arena' value='fullsearch' {$check1} />{$msg3} <input type='radio' name='arena' value='titlesearch' {$check2} />{$msg4} <input type='radio' name='arena' value='pagelinks' {$check3} />{$msg5}<br /> </form> EOF; if (!isset($needle[0]) or !empty($opts['form'])) { # or blah blah $opts['msg'] = _("No search text"); return $form; } else { if (validate_needle($needle) === false) { $opts['msg'] = sprintf(_("Invalid search expression \"%s\""), $needle); return $form; } } $DB = new Indexer_dba($arena, "r", $DBInfo->dba_type); if ($DB->db == null) { $opts['msg'] = _("Couldn't open search database, sorry."); $opts['hits'] = array(); $opts['hit'] = 0; $opts['all'] = 0; return ''; } $opts['form'] = $form; $sc = new Cache_text("searchkey"); if ($arena == "pagelinks") { $words = array($value); } else { $words = getTokens($value); } // $words=explode(' ', strtolower($value)); $idx = array(); $new_words = array(); foreach ($words as $word) { if ($sc->exists($word)) { $searchkeys = $sc->fetch($word); } else { $searchkeys = $DB->_search($word); $sc->update($word, $searchkeys); } $new_words = array_merge($new_words, $searchkeys); $new_words = array_merge($idx, $DB->_search($word)); } $words = array_merge($words, $new_words); // $word = array_shift($words); $idx = $DB->_fetchValues($word); foreach ($words as $word) { $ids = $DB->_fetchValues($word); // FIXME foreach ($ids as $id) { $idx[] = $id; } } $init_hits = array_count_values($idx); // initial hits $idx = array_keys($init_hits); //arsort($idx); $all_count = $DBInfo->getCounter(); $pages = array(); $hits = array(); foreach ($idx as $id) { $key = $DB->_fetch($id); $pages[$id] = $key; $hits['_' . $key] = $init_hits[$id]; // HACK. prefix '_' to numerical named pages } $DB->close(); if (!empty($_GET['q']) and isset($_GET['q'][0])) { return $pages; } $context = !empty($opts['context']) ? $opts['context'] : 0; $limit = isset($opts['limit'][0]) ? $opts['limit'] : $default_limit; $contexts = array(); if ($arena == 'fullsearch' || $arena == 'pagelinks') { $idx = 1; foreach ($pages as $page_name) { if (!empty($limit) and $idx > $limit) { break; } $p = new WikiPage($page_name); if (!$p->exists()) { continue; } $body = $p->_get_raw_body(); $count = preg_match_all($pattern, $body, $matches); // more precisely count matches if ($context) { # search matching contexts $contexts[$page_name] = find_needle($body, $needle, '', $context); } $hits['_' . $page_name] = $count; // XXX hack for numerical named pages $idx++; } } //uasort($hits, 'strcasecmp'); //$order = 0; //uasort($hits, create_function('$a, $b', 'return ' . ($order ? '' : '-') . '(strcasecmp($a, $b));')); $name = array_keys($hits); array_multisort($hits, SORT_DESC, $name, SORT_ASC); $opts['hits'] = $hits; $opts['hit'] = count($hits); $opts['all'] = $all_count; if (!empty($opts['call'])) { return $hits; } $out = "<!-- RESULT LIST START -->"; // for search plugin $out .= "<ul>"; $idx = 1; while (list($page_name, $count) = each($hits)) { $page_name = substr($page_name, 1); $out .= '<!-- RESULT ITEM START -->'; // for search plugin $out .= '<li>' . $formatter->link_tag(_rawurlencode($page_name), "?action=highlight&value=" . _urlencode($needle), $page_name, "tabindex='{$idx}'"); if ($count > 1) { $out .= ' . . . . ' . sprintf($count == 1 ? _("%d match") : _("%d matches"), $count); if (!empty($contexts[$page_name])) { $out .= $contexts[$page_name]; } } $out .= "</li>\n"; $out .= '<!-- RESULT ITEM END -->'; // for search plugin $idx++; if (!empty($limit) and $idx > $limit) { break; } } $out .= "</ul>\n"; $out .= "<!-- RESULT LIST END -->"; // for search plugin return $out; }
function getLikePages($needle, $limit = 100, $params = array()) { if (!isset($needle[0])) { return false; } // null needle // escape \n char $needle = str_replace("\n", "", $needle); $total = file_get_contents($this->pagecnt); if ($total === false) { return false; } $flst = fopen($this->pagelst, 'r'); if (!is_resource($flst)) { return false; } $pages = array(); $pre = '.*'; $suf = '.*'; if ($needle[0] == '^') { $pre = ''; $needle = substr($needle, 1); } if (substr($needle, -1) == '$') { $suf = ''; $needle = substr($needle, 0, -1); } // check regex $test = validate_needle($needle); if (!$test) { $needle = preg_quote($needle, '/'); } fseek($flst, 0, SEEK_END); $size = ftell($flst); fseek($flst, 0, SEEK_SET); $chunk = min(10240, intval($size / 10)); $chunk = max($chunk, 8192); while (!feof($flst)) { $data = fread($flst, $chunk); $data .= fgets($flst, 2048); if (preg_match_all('/^' . $pre . '(?:' . $needle . ')' . $suf . '$/' . $this->_match_flags, $data, $match)) { $pages = array_merge($pages, $match[0]); if (!empty($limit) and count($pages) > $limit) { if (empty($params['nocut'])) { $pages = array_slice($pages, 0, $limit); } break; } } } fclose($flst); return $pages; }