function GuessLanguage($text, $lang = false) { if (strlen($text) <= 0) { return false; } static $cache = array(); if (empty($cache)) { $cache[] = "en"; //English is always in mind and on the first place $rsLanguages = CLanguage::GetList($b = "", $o = ""); while ($arLanguage = $rsLanguages->Fetch()) { if ($arLanguage["LID"] != "en") { $cache[] = $arLanguage["LID"]; } } } if (is_array($lang)) { $arLanguages = $lang; } else { $arLanguages = $cache; } if (count($arLanguages) < 2) { return false; } $languages_from = array(); $max_len = 0; //Give customized languages a chance to guess foreach ($arLanguages as $lang) { $ob = CSearchLanguage::GetLanguage($lang); $res = $ob->PreGuessLanguage($text, $lang); if (is_array($res)) { return $res; } elseif ($res === true) { return false; } } //First try to detect language which //was used to type the phrase foreach ($arLanguages as $lang) { $ob = CSearchLanguage::GetLanguage($lang); $arScanCodesTmp1 = $ob->ConvertToScancode($text, true); $arScanCodesTmp2_cnt = count(array_filter($arScanCodesTmp1)); //It will be one with most converted chars if ($arScanCodesTmp2_cnt > $max_len) { $max_len = $arScanCodesTmp2_cnt; $languages_from = array($lang => $arScanCodesTmp1); } elseif ($arScanCodesTmp2_cnt == $max_len) { $languages_from[$lang] = $arScanCodesTmp1; } } if ($max_len < 2) { return false; } if (count($languages_from) <= 0) { return false; } //If more than one language is detected as input //try to get one with best trigram info $arDetectionFrom = array(); $i = 0; foreach ($languages_from as $lang => $arScanCodes) { $arDetectionFrom[$lang] = array(); $ob = CSearchLanguage::GetLanguage($lang); $arDetectionFrom[$lang][] = $ob->HasTrigrams(); $arDetectionFrom[$lang][] = $ob->CheckTrigrams($arScanCodes); //Calculate how far sequence of scan codes //is from language model //$deviation = $ob->GetDeviation($arScanCodes); //$arDetection[$lang_from_to][] = $deviation[1]; //$arDetection[$lang_from_to][] = intval($deviation[0]*100); //Delay till compare $arDetectionFrom[$lang][] = $ob; $arDetectionFrom[$lang][] = $arScanCodes; $arDetectionFrom[$lang][] = $i; $i++; } uasort($arDetectionFrom, array("CSearchLanguage", "cmp")); //echo "<pre>";foreach($arDetectionFrom as $i=>$ar){var_dump($i); print_r(array($ar[0],$ar[1],$ar[3],$ar[4],));}echo "<pre>"; //Now try the best to detect the language $arDetection = array(); $i = 0; foreach ($arDetectionFrom as $lang_from => $arTemp) { $arScanCodes = $languages_from[$lang_from]; foreach ($arLanguages as $lang) { $lang_from_to = $lang_from . "=>" . $lang; $arDetection[$lang_from_to] = array(); $ob = CSearchLanguage::GetLanguage($lang); $arDetection[$lang_from_to][] = $ob->HasBigrammInfo(); $arDetection[$lang_from_to][] = $ob->CheckTrigrams($arScanCodes); //Calculate how far sequence of scan codes //is from language model //$deviation = $ob->GetDeviation($arScanCodes); //$arDetection[$lang_from_to][] = $deviation[1]; //$arDetection[$lang_from_to][] = intval($deviation[0]*100); //Delay till compare $arDetection[$lang_from_to][] = $ob; $arDetection[$lang_from_to][] = $arScanCodes; $alt_text = CSearchLanguage::ConvertKeyboardLayout($text, $lang_from, $lang); $arDetection[$lang_from_to][] = $alt_text !== $text; $arDetection[$lang_from_to][] = $i; $arDetection[$lang_from_to][] = $lang_from_to; $i++; } } uasort($arDetection, array("CSearchLanguage", "cmp")); $language_from_to = key($arDetection); list($language_from, $language_to) = explode("=>", $language_from_to); //echo "<pre>";foreach($arDetection as $i=>$ar){var_dump($i); print_r(array($ar[0],$ar[1],$ar[3],$ar[4],$ar[5],));}echo "<pre>"; $alt_text = CSearchLanguage::ConvertKeyboardLayout($text, $language_from, $language_to); if ($alt_text === $text) { return false; } return array("from" => $language_from, "to" => $language_to); }