function topWords() { global $contacts, $thresholdWords, $up; logMsg('USER', "Finding top {$thresholdWords} most often used words for each contact.."); foreach (array_keys($contacts) as $email) { echo ' <span>.</span>'; flush(); $language = $contacts[$email]['language']; if ($language != "english" && file_exists($up . "/stopwords/{$language}.txt")) { $extraStopWords = " | grep -v -w -f {$up}/stopwords/{$language}.txt"; } $f = sanitizeFilename($email); $filename = $up . '/content/' . $f . '.txt'; $filenameWords = $up . '/content/' . $f . '_words.txt'; $filenameWordsStem = $up . '/content/' . $f . '_words_stem.txt'; chdir($up); if (file_exists($filename)) { $cmd = 'cat ' . $filename . ' | tr "A-Z" "a-z" | tr -c "[:alpha:]" " " | tr " " "\\n" | sort | uniq -c | sort | grep -v -w -f ' . $up . '/stopwords/english.txt | grep -E [a-z]{3,} | tr -d " *[:digit:]*\\t" | tail -n ' . $thresholdWords * 4 . ' > ' . $filenameWords; logMsg('DEBUG', "Running CMD: {$cmd}"); shell_exec($cmd); #detect language $language = new LangDetect($filenameWords, -1); $lang = $language->Analyze(); $languages = array_keys($lang); $contacts[$email]['language'] = $languages[0]; $language = $languages[0]; $score = array_shift($score = $lang); array_shift($lang); foreach ($lang as $l => $lscore) { if ($lscore - $score > 7000) { break; } if ($l != 'english') { unset($language); $language = $l; break; } } if ($language != 'english') { logMsg('DEBUG', "Language for {$email} is " . $contacts[$email]['language'] . " (but removing also {$language} stopwords)"); } else { logMsg('DEBUG', "Language for {$email} is " . $contacts[$email]['language']); } if ($language != 'english') { $cmd = 'cat ' . $filenameWords . ' | tr "A-Z" "a-z" | tr -c "[:alpha:]" " " | tr " " "\\n" | sort | uniq -c | sort | grep -v -w -f ' . $up . '/stopwords/' . $contacts[$email]['language'] . '.txt | grep -E [a-z]{3,} | tr -d " *[:digit:]*\\t" | tail -n ' . $thresholdWords . ' > ' . $filenameWords; logMsg('DEBUG', "Running CMD: {$cmd}"); shell_exec($cmd); } $contacts[$email]['words'] = array_reverse(array_trim(file($filenameWords))); if ($language == 'english' || $language == 'swedish') { $languageShort = substr($language, 0, 2); $cmd = $up . '/cstlemma/bin/vc2008/cstlemma.exe -e1 -L -f ' . $up . '/cstlemma/flexrules_' . $languageShort . ' -t- -c"$B" -B"$w\\n" < ' . $filenameWords . ' > ' . $filenameWordsStem; logMsg('DEBUG', "Running CMD: {$cmd}"); shell_exec($cmd); $contacts[$email]['wordsStem'] = array_reverse(array_trim(file($filenameWordsStem))); array_pop($contacts[$email]['wordsStem']); } } } logMsg('USER', "Done!"); }
function setLanguage($config) { $locales = array(); $lang_cookie = ''; # Получаем локаль из куков if (isset($_COOKIE['lang'])) { $lang_cookie = $_COOKIE['lang']; } #Запись выбранного языка в куки if (isset($_GET['lang'])) { $lang_cookie = $_GET['lang']; SetCookie("lang", $lang_cookie, time() + 1000 * 24 * 60 * 60); } if (isset($config['config']['locales'])) { foreach ($config['config']['locales'] as $k => $v) { if (stristr($k, 'name_') !== false) { $key_name = str_replace('name_', '', $k); $val_en = 'enable_' . $key_name; $val_file = 'file_' . $key_name; $locales[$v] = array('file' => $config['config']['locales'][$val_file], 'enable' => $config['config']['locales'][$val_en]); } } } # Базовый язык ru-RU (на случай если по умолчанию язык недоступен или не включен) $defaultNameLocale = 'ru_RU'; $defaultFileLocale = 'original'; # Локализациия по умолчанию. if (isset($config['config']['locales_options']['default']) and $config['config']['locales_options']['default'] != '') { $temp_default = $config['config']['locales_options']['default']; # Проверяем активированна ли локализация по умолчанию if (isset($locales[$temp_default])) { $defaultNameLocale = $temp_default; $defaultFileLocale = $locales[$temp_default]['file']; } } # Если включен автодетект if (isset($config['config']['locales_options']['autoDetect']) and $config['config']['locales_options']['autoDetect'] == 1) { $langDetect = new LangDetect(); $langs = array('ru_RU' => array('ru'), 'uk_UA' => array('uk'), 'en_GB' => array('en')); $detectNameLocale = $langDetect->getBestMatch($defaultNameLocale, $langs); # Проверяем активированна ли определенная локаль if (isset($locales[$detectNameLocale]) and $locales[$detectNameLocale]['enable'] == 1) { $nameLocale = $detectNameLocale; $fileLoacale = $locales[$temp_default]['file']; } else { $nameLocale = $defaultNameLocale; $fileLoacale = $defaultFileLocale; } } else { $nameLocale = $defaultNameLocale; $fileLoacale = $defaultFileLocale; } #Если язык из куков включен то используем его if (isset($locales[$lang_cookie]) and $locales[$lang_cookie]['enable'] == 1) { $nameLocale = $lang_cookie; } T_setlocale(LC_MESSAGES, $nameLocale); T_bindtextdomain($fileLoacale, './data/locale'); T_bind_textdomain_codeset($fileLoacale, 'UTF-8'); T_textdomain($fileLoacale); }