function filter_indexer_ko($formatter, $value, &$options) { $more_specific_len = 1; $indexer = new KoreanStemmer(); if ($options['value']) { $value = $options['value']; } $delims = ",.\\|\n\r\\s\\(\\)\\[\\]{}!@#\$%\\^&\\*\\-_\\+=~`';:'\"\\?<>\\/"; # un-wikify CamelCase, change "WikiName" to "Wiki Name" $value = preg_replace("/((?<=[a-z0-9]|[B-Z]{2})([A-Z][a-z]))/", " \\1", $value); # separate alphanumeric and local characters $value = preg_replace("/((?<=[a-z0-9])([^a-z0-9]+))/i", " \\1", $value); $keys = preg_split("/[{$delims}]+/", $value); # must be longer than $more_specific_len. if ($more_specific_len > 0) { for ($i = 0, $s = sizeof($keys); $i < $s; $i++) { if (strlen($keys[$i]) <= $more_specific_len) { unset($keys[$i]); } } } sort($keys); $keys = array_unique($keys); $log = ''; $tag = array('+', '-'); foreach ($keys as $i => $key) { $match = null; if ($stem = $indexer->getStem(trim($key), $match, $type)) { $log .= $key . '=>' . $stem . $tag[$type - 1] . '/' . $match[1] . "\n"; if ($type == 1) { $keys[$i] = $stem; } else { unset($keys[$i]); } } else { $log .= '=' . $keys[$i] . "\n"; $keys[$i] = $keys[$i]; } } if ($options['debug']) { $options['timer']->Check("indexer"); return $log . "\n" . $options['timer']->Write(); } return implode("\n", array_unique($keys)); }
function macro_Keywords($formatter, $value, $options = array()) { global $DBInfo; $supported_lang = array('ko'); $limit = isset($options['limit']) ? $options['limit'] : 40; $opts = explode(',', $value); $sort = ''; foreach ($opts as $opt) { $opt = trim($opt); if ($opt == 'delicious' or $opt == 'del.icio.us') { $tag_link = 'http://del.icio.us/tag/$TAG'; } else { if ($opt == 'technorati') { $tag_link = 'http://www.technorati.com/tag/$TAG'; } else { if ($opt == 'flickr') { $tag_link = 'http://www.flickr.com/photos/tags/$TAG'; } else { if ($opt == 'all') { $options['all'] = 1; $limit = 0; } else { if ($opt == 'random') { $options['random'] = $options['all'] = 1; } else { if ($opt == 'suggest') { $options['suggest'] = 1; } else { if ($opt == 'tour') { $options['tour'] = 1; } else { if ($opt == 'cloud') { $options['cloud'] = 1; } else { if ($opt == 'freq') { $sort = 'freq'; } else { if (($p = strpos($opt, '=')) !== false) { $k = substr($opt, 0, $p); $v = substr($opt, $p + 1); if ($k == 'limit') { $limit = $v; } else { if ($k == 'random') { $options['all'] = 1; $v = (int) $v; $v = $v > 0 ? $v : 1; $options['random'] = $v; } else { if ($k == 'sort' and in_array($v, array('freq', 'alpha'))) { $sort = $v; } else { if ($k == 'type' and in_array($v, array('full', 'title'))) { $search = $v . 'search'; } else { if ($k == 'url') { $tag_link = $v; if (preg_match('/\\$TAG/', $tag_link) === false) { $tag_link .= '$TAG'; } } } } } } // else ignore } else { $pagename = $opt; } } } } } } } } } } } if (isset($options['random']) and empty($limit)) { $limit = 0; } if (isset($options['sort']) and $options['sort'] == 'freq') { $sort = 'freq'; } if (empty($pagename)) { $pagename = $formatter->page->name; } # get cached keywords $cache = new Cache_text('keyword'); $pkey = $pagename; $mc = new Cache_text('macro'); $mkey = 'Keywords.' . md5($pagename . $value); $mykeys = array(); # check cache mtime $cmt = $mc->mtime($mkey); $pmt = $cache->mtime($pkey); if ($cmt > $pmt) { # check update or not $dmt = $cache->mtime(); if ($dmt > $cmt) { # XXX crude method $mykeys = array(); } else { $mykeys = $mc->fetch($mkey); } } else { $mc->remove($mkey); } if (!$mykeys) { if (!empty($options['all'])) { $pages = $DBInfo->getPageLists(); } else { $pages = array($pagename); } foreach ($pages as $pn) { if ($keys = $cache->fetch($pn)) { $mykeys = array_merge($mykeys, $keys); } } $mc->update($mkey, $mykeys); } if (!empty($options['all'])) { $use_sty = 1; $words = array_count_values($mykeys); unset($words['']); $ncount = array_sum($words); // total count arsort($words); $max = current($words); // get max hit number if (!empty($options['random'])) { $rws = array(); $selected = array_rand($words, min($options['random'], count($words))); foreach ($selected as $k) { $rws[$k] = $words[$k]; } $words =& $rws; } if ($sort != 'freq') { ksort($words); } #sort($words); #print $sort." $value"; #print "<pre>"; #print_r($words); #print "</pre>"; } else { $max = 3; // default weight $words = array(); foreach ($mykeys as $key) { $words[$key] = $max; // give weight to all selected keywords } } # automatically generate list of keywords if (empty($options['all']) and (empty($words) or isset($options['suggest']))) { $common = <<<EOF am an a b c d e f g h i j k l m n o p q r s t u v w x y z 0 1 2 3 4 5 6 7 8 9 if on in by it at up as down over into for from to of he his him she her back is are be being been or no not nor and all through under until these there the top with here only has had both did faw few little most almost much off on out also each were was too any very more within then across before behind beneath beyond after again against around among so such since because but yet however ever during it its the this that what where how when who whoever which their them you your will shall may might we us our get got would could have can't won't didn't don't aiff arj arts asp au avi bin biz css cgi com doc edu exe firm gif gz gzip htm html info jpeg jpg js jsp mp3 mpeg mpg mov nom pdf php pl qt ra ram rec shop sit tar tgz tiff txt wav web zip one two three four five six seven eight nine ten eleven twelve ftp http https www web net org or kr co us de EOF; $page = $DBInfo->getPage($pagename); if (!$page->exists()) { return ''; } $raw = $page->get_raw_body(); $raw = rtrim($raw); // strip macros, entities $raw = preg_replace("/&[^;\\s]+;|\\[\\[[^\\[]+\\]\\]/", ' ', $raw); $raw = preg_replace("/^##.*\$/m", ' ', $raw); $raw = preg_replace("/([;\"',`\\\\\\/\\.:@#\\!\\?\$%\\^&\\*\\(\\)\\{\\}\\[\\]\\-_\\+=\\|<>])/", ' ', strip_tags($raw . ' ' . $pagename)); // pagename also $raw = preg_replace("/((?<=[a-z0-9]|[B-Z]{2})([A-Z][a-z]))/", " \\1", $raw); $raw = strtolower($raw); $raw = preg_replace("/\\b/", ' ', $raw); //$raw=preg_replace("/\b([0-9a-zA-Z'\"])\\1+\s*/",' ',$raw); $words = preg_split("/\\s+|\n/", $raw); // remove common words $common_word_page0 = LOCAL_KEYWORDS . '/CommonWords'; $lines0 = array(); if ($DBInfo->hasPage($common_word_page0)) { $p = $DBInfo->getPage($common_word_page0); $lines0 = explode("\n", $p->get_raw_body()); } $lang = isset($formatter->pi['#language']) ? $formatter->pi['#language'] : $DBInfo->default_language; if ($lang and in_array($lang, $supported_lang)) { $common_word_page = LOCAL_KEYWORDS . '/CommonWords' . ucfirst($lang); if ($DBInfo->hasPage($common_word_page)) { $p = $DBInfo->getPage($common_word_page); $lines = explode("\n", $p->get_raw_body()); $lines = array_merge($lines, $lines0); foreach ($lines as $line) { if (isset($line[0]) and $line[0] == '#') { continue; } $common .= "\n" . $line; } $common = rtrim($common); } } $words = array_diff($words, preg_split("/\\s+|\n/", $common)); while (!empty($DBInfo->use_stemmer)) { include_once dirname(__FILE__) . '/../lib/stemmer.ko.php'; include_once dirname(__FILE__) . '/../lib/stemmer.php'; $indexer = new KoreanStemmer(); if (!is_resource($indexer->_dict)) { break; } $founds = array(); foreach ($words as $key) { if (preg_match('/^[a-zA-Z0-9]+$/', $key)) { // ignore alphanumeric $stem = PorterStemmer::Stem($key); $founds[] = $stem; continue; } $match = null; $stem = $indexer->getStem(trim($key), $match, $type); if (!empty($stem)) { $founds[] = $stem; } else { if (!empty($last)) { //print_r($match); } } } $words = $founds; $indexer->close(); break; } $preword = ''; $bigwords = array(); foreach ($words as $word) { if (strlen($word) > 2 and strlen($preword) > 2) { if ($word == $preword) { continue; } $key = $preword . ' ' . $word; $rkey = $word . ' ' . $preword; if (isset($bigwords[$key])) { $bigwords[$key]++; } else { if (isset($bigwords[$rkey])) { $bigwords[$rkey]++; } else { $bigwords[$key] = 1; } } } $preword = $word; } $words = array_count_values($words); unset($words['']); $ncount = array_sum($words); // total count /* $words=array_diff(array_keys($counts),preg_split("/\s+|\n/",$common)); if (function_exists('array_intersect_key')) { $words=array_intersect_key($counts,$words); } else { $ret = array(); foreach($words as $key) { if(array_key_exists($key, $counts)) $ret[$key] = $counts[$key]; } $words=&$ret; } */ if ($bigwords) { // $bigwords = array_filter($bigwords, create_function('$a', 'return ($a != 1);')); foreach ($bigwords as $k => $v) { $words["{$k}"] = $v; } } arsort($words); $max = current($words); // get max hit number $nwords = array(); if (isset($options['merge'])) { foreach ($mykeys as $key) { $nwords[$key] = $max; // give weight to all selected keywords } } if ($nwords) { foreach ($nwords as $k => $v) { $words[$k] = $v; } } $use_sty = 1; } // if (!empty($options['call'])) { return $words; } if ($limit and ($sz = sizeof($words)) > $limit) { arsort($words); $mywords = array_keys($words); $mywords = array_slice($mywords, 0, $limit); $nwords = array(); foreach ($mywords as $k) { $nwords[$k] = $words[$k]; } $words =& $nwords; } // make criteria list $fz = 0; $min = 0; $sty = array(); if (!empty($use_sty)) { $fact = array(); $weight = $max; // $ncount #print 'max='.$max.' ratio='.$weight/$ncount.':'; $test = array(0.8, 0.6, 0.4, 0.5, 0.5, 0.5); // six level for ($i = 0; $i < 6 and $weight > 0; $i++) { $weight = (int) ($weight * $test[$i]); if ($weight > 0) { $fact[] = $weight; } #print $weight.'--'; } $max = current($fact); $min = $limit ? max(1, end($fact)) - 1 : 0; // XXX // make font-size style $fz = max(sizeof($fact), 2); $fsh = (MAX_FONT_SZ - MIN_FONT_SZ) / ($fz - 1); $fs = MAX_FONT_SZ; // max font-size:24px; for ($i = 0; $i < $fz; $i++) { $ifs = (int) ($fs + 0.5); $sty[] = " style='font-size:{$ifs}px;'"; #print '/'.$ifs; $fs -= $fsh; $fs = max($fs, 9); // min font-size:9px } } if (empty($sort) or $sort != 'freq') { ksort($words); } $link = $formatter->link_url(_rawurlencode($pagename), ''); if (!isset($tag_link)) { if (empty($search)) { $search = 'fullsearch&keywords=1'; } if (!empty($options['tour'])) { $search = 'tour&arena=keylinks'; } $tag_link = $formatter->link_url(_rawurlencode($pagename), '?action=' . $search . '&value=$TAG'); } $out = ''; if (!empty($options['add'])) { $out = "<form method='post' action='{$link}'>\n"; $out .= "<input type='hidden' name='action' value='keywords' />\n"; } if (isset($options['cloud'])) { $out = ''; foreach ($words as $key => $val) { $style = $sty[$fz - 1]; for ($i = 0; $i < $fz; $i++) { if ($val > $fact[$i]) { $style = $sty[$i]; break; } } if ($val > $min) { $out .= "<a href='" . qualifiedUrl(str_replace('$TAG', $key, $tag_link)) . "'"; if ($use_sty) { $out .= ' ' . $style; } else { $out .= " style='12'"; } $out .= ">" . $key . "</a>"; } } $out = preg_replace('/&/', urlencode('&'), $out); $tout = "<a href='http://www.roytanck.com/tag1' style='font-size:20px'>Tag name</a><a href='http://www.roytanck.com/tag2' style='font-size:10px'>Tag two</a>"; $formatter->register_javascripts(array('js/swfobject.js')); $_swf_prefix = qualifiedUrl("{$DBInfo->url_prefix}/local/wp-cumulus"); // FIXME return <<<SWF <script type="text/javascript"> var flashvars = { mode : "tags", distr : "true", tcolor : "0xffffff", tcolor2 : "0x86B9F2", hicolor : "0xBAD8F8", tagcloud : "<tags>{$out}</tags>" }; var params = { wmode: "opaque", bgcolor: "#333333" }; var attrs = { id: "myCloudContent" }; swfobject.embedSWF("{$_swf_prefix}/tagcloud.swf", "myCloud", "200", "200", "9.0.0","expressInstall.swf", flashvars, params, attrs); </script> <div id="myCloud"> </div> SWF; } $out .= '<ul>'; $checkbox = ''; foreach ($words as $key => $val) { $style = ''; if ($fz > 0) { $style = $sty[$fz - 1]; for ($i = 0; $i < $fz; $i++) { if ($val > $fact[$i]) { $style = $sty[$i]; break; } } } if ($val > $min) { $checked = ''; if ($val >= $max) { $checked = 'checked="checked"'; $ok = 1; } if (!empty($options['add'])) { $checkbox = "<input type='checkbox' {$checked} name='key[]' " . "value='{$key}' />"; } $out .= " <li class=\"tag-item\""; if (!empty($use_sty)) { $out .= " {$style} title=\"{$val} " . _("hits") . '"'; } $out .= ">{$checkbox}" . "<a href='" . str_replace('$TAG', $key, $tag_link) . "' rel='nofollow'>" . $key . "</a></li>\n"; } } $inp = ''; $form_close = ''; if (!empty($options['add'])) { $msg = _("add keywords"); $inp = "<li><input type='text' name='keywords' size='12' />: {$msg}</li>"; if ($ok) { $btn = _("Update keywords"); } else { $btn = _("Add keywords"); } $btn1 = _("Add as common words"); $btn2 = _("Unselect all"); $btnc = _("Suggest new Keywords"); $form_close = "<input type='submit' value='{$btn}'/>\n"; $form_close .= "<input type='submit' name='suggest' value='{$btnc}' />\n"; $form_close .= "<input type='submit' name='common' value='{$btn1}' />\n"; $form_close .= "<input type='button' value='{$btn2}' onClick='UncheckAll(this)' />\n"; $form_close .= "<select name='lang'><option>---</option>\n"; foreach ($supported_lang as $l) { $form_close .= "<option value='{$l}'>{$l}</option>\n"; } $langmsg = _("select language"); $form_close .= "</select>: {$langmsg}\n</form>\n"; $form_close .= <<<EOF <script type='text/javascript' src='{$DBInfo->url_prefix}/local/checkbox.js'> </script> EOF; } return "<div class='cloudView'>" . $out . "{$inp}</ul></div>{$form_close}"; }
function _stemmingWords($words) { static $indexer = null; if ($this->use_stemming > 1) { include_once dirname(__FILE__) . '/stemmer.ko.php'; if (empty($indexer)) { $indexer = new KoreanStemmer(); } $founds = array(); foreach ($words as $word) { if (preg_match('/[^0-9A-Za-z]/u', $word)) { $match = null; $stem = $indexer->getStem(trim($word), $match, $type); if (!empty($stem)) { $founds[] = $stem; } } else { $founds[] = $word; } } return $founds; } $new_words = array(); foreach ($words as $k => $word) { if (!isset($word[0])) { continue; } if ($word[0] == "" and preg_match('/[^0-9A-Za-z]/u', $word)) { //$ret = $this->_fakeIndexWords($word, $new_words); $ret = $this->_chunkWords($word, $new_words, true); if ($ret) { unset($words[$k]); } // XXX } } $words = array_unique(array_merge($words, $new_words)); return $words; }