static function GetLanguage($sLang) { static $arLanguages = array(); if(!isset($arLanguages[$sLang])) { $obLanguage = null; $class_name = strtolower("CSearchLanguage".$sLang); if(!class_exists($class_name)) { //First try to load customized class $strDirName = $_SERVER["DOCUMENT_ROOT"].FX_PERSONAL_ROOT."/php_interface/".$sLang."/search"; $strFileName = $strDirName."/language.php"; if(file_exists($strFileName)) $obLanguage = @include($strFileName); if(!is_object($obLanguage)) { if(!class_exists($class_name)) { //Then module class $strDirName = $_SERVER["DOCUMENT_ROOT"]."/freetrix/modules/search/tools/".$sLang; $strFileName = $strDirName."/language.php"; if(file_exists($strFileName)) @include($strFileName); if(!class_exists($class_name)) { $class_name = "CSearchLanguage"; } } } } if(!is_object($obLanguage)) $obLanguage = new $class_name($sLang); $obLanguage->LoadTrigrams($strDirName); $arStemInfo = stemming_init($sLang); if(is_array($arStemInfo)) $obLanguage->_abc = array_flip($obLanguage->StrToArray($arStemInfo["abc"])); $obLanguage->_has_bigramm_info = is_callable(array($obLanguage, "getbigrammletterfreq")); $arLanguages[$sLang] = $obLanguage; } return $arLanguages[$sLang]; }
CUtil::decodeURIComponent($query); $adminPage->Init(); //$adminMenu->AddOpenedSections("global_menu_content, global_menu_services, global_menu_store, global_menu_statistics, global_menu_settings"); $adminMenu->Init($adminPage->aModules); $arResult = array("CATEGORIES"=>array( "global_menu_content"=>array("ITEMS"=>array(), "TITLE"=>GetMessage('admin_lib_menu_content')), "global_menu_services"=>array("ITEMS"=>array(), "TITLE"=>GetMessage('admin_lib_menu_services')), "global_menu_store"=>array("ITEMS"=>array(), "TITLE"=>GetMessage('admin_lib_menu_store')), "global_menu_statistics"=>array("ITEMS"=>array(), "TITLE"=>GetMessage('admin_lib_menu_stat')), "global_menu_settings"=>array("ITEMS"=>array(), "TITLE"=>GetMessage('admin_lib_menu_settings')), ) ); $arStemFunc = stemming_init(LANGUAGE_ID); $arPhrase = stemming_split($query, LANGUAGE_ID); $preg_template = "/(^|[^".$arStemFunc["pcre_letters"]."])(".str_replace("/", "\\/", implode("|", array_map('preg_quote', array_keys($arPhrase)))).")/i".FX_UTF_PCRE_MODIFIER; $bFound = false; function GetStrings(&$item, $key, $p) { $category = $p[0]; $icon = $p[1]; global $arStemFunc, $arPhrase, $preg_template, $arResult, $bFound; $searchstring = ''; if($item["text"]) { if(preg_match_all($preg_template, ToUpper($item["text"]), $arMatches, PREG_OFFSET_CAPTURE))
function stemming($sText, $sLang = "ru", $bIgnoreStopWords = false, $bReturnPositions = false) { static $STOP_CACHE = array(); if (!isset($STOP_CACHE[$sLang])) { $STOP_CACHE[$sLang] = array(); } $stop_cache =& $STOP_CACHE[$sLang]; //Result $stems = array(); //Get info about all languages $arStemInfo = stemming_init(false); //Add default functions if language was not defined if (!isset($arStemInfo[$sLang])) { $arStemInfo[$sLang] = stemming_init($sLang); } $stem_func = $arStemInfo[$sLang]["stem"]; $pcre_abc = "/[^" . $arStemInfo[$sLang]["pcre_abc"] . "]+/" . BX_UTF_PCRE_MODIFIER; //Delimiter of the words $tok = " "; if ($bReturnPositions) { $sText = preg_replace("/[^" . $arStemInfo[$sLang]["pcre_letters"] . ".!?]+/" . BX_UTF_PCRE_MODIFIER, $tok, ToUpper($sText)); $sText = preg_replace("/[!?]+/" . BX_UTF_PCRE_MODIFIER, ".", $sText); } else { $sText = preg_replace("/[^" . $arStemInfo[$sLang]["pcre_letters"] . "]+/" . BX_UTF_PCRE_MODIFIER, $tok, ToUpper($sText)); } //Parse text $words = strtok($sText, $tok); $pos = 1; while ($words !== false) { if ($bReturnPositions) { $words = explode(".", $words); } else { $words = array($words); } foreach ($words as $i => $word) { $word = substr($word, 0, 50); if ($bReturnPositions) { if ($i > 0) { $pos += 5; } //Sentence distance if (!strlen($word)) { continue; } } //Try to stem starting with desired language //1 - stemming may return more than one word $stem = $stem_func($word, 1); $stop_lang = $sLang; //If word equals it's stemming //and has letters not from ABC if (!is_array($stem) && $stem === $word && preg_match($pcre_abc, $word)) { //Do the best to detect correct one $guess = stemming_detect($word, $arStemInfo, $sLang); if (strlen($guess[0])) { $stem = $guess[0]; $stop_lang = $guess[1]; } } if ($bIgnoreStopWords) { if (is_array($stem)) { foreach ($stem as $st) { $stems[$st] = isset($stems[$st]) ? $stems[$st] + $pos : $pos; } } else { $stems[$stem] = isset($stems[$stem]) ? $stems[$stem] + $pos : $pos; } } else { $stop_func = $arStemInfo[$stop_lang]["stop"]; if (is_array($stem)) { foreach ($stem as $st) { if (!isset($stop_cache[$st])) { $stop_cache[$st] = $stop_func($st); } if ($stop_cache[$st]) { $stems[$st] = isset($stems[$st]) ? $stems[$st] + $pos : $pos; } } } else { if (!isset($stop_cache[$stem])) { $stop_cache[$stem] = $stop_func($stem); } if ($stop_cache[$stem]) { $stems[$stem] = isset($stems[$stem]) ? $stems[$stem] + $pos : $pos; } } } if ($bReturnPositions) { $pos++; } } //Next word $words = strtok($tok); } return $stems; }
function StemQuery($q, $lang="en") { $arStemInfo = stemming_init($lang); return preg_replace_callback("/([".$arStemInfo["pcre_letters"]."]+)/".BX_UTF_PCRE_MODIFIER, array($this, "StemWord"), $q); }
public function __construct() { $this->_arStemFunc = stemming_init(LANGUAGE_ID); }
function StemQuery($q, $lang="en") { $arStemInfo = stemming_init($lang); return preg_replace("/([".$arStemInfo["pcre_letters"]."]+)/e".BX_UTF_PCRE_MODIFIER, "CAllSearchQuery::StemWord('\$1')", $q); }