/** * Load dictionary for given language code */ public function load($langCode) { wfProfileIn(__METHOD__); $langCode = strtolower($langCode); self::log(__METHOD__, "trying to load dictionary for '{$langCode}'..."); // set providers priority enchant_broker_set_ordering($this->broker, '*', 'myspell,aspell'); // if there's no dictionary for 'it', try to request dictionary for 'it_IT' if (!enchant_broker_dict_exists($this->broker, $langCode)) { $langCode = $langCode . '_' . strtoupper($langCode); } // check if dictionary exists if (enchant_broker_dict_exists($this->broker, $langCode)) { wfProfileIn(__METHOD__ . '::requestDict'); $this->dict = enchant_broker_request_dict($this->broker, $langCode); wfProfileOut(__METHOD__ . '::requestDict'); } if ($this->isLoaded()) { $info = $this->describe(); self::log(__METHOD__, "loaded '{$info['lang']}' provided by {$info['desc']}"); } else { self::log(__METHOD__, "unable to load dictionary for '{$langCode}'!"); } wfProfileOut(__METHOD__); return $this->isLoaded(); }
function bench_enchant($words) { // TODO: check return values!!! echo "Bench Enchant: "; $tag = 'ru_RU'; $r = enchant_broker_init(); if (!enchant_broker_dict_exists($r, $tag)) { echo "{$tag} dict not supported by enchant\n"; return false; } $d = enchant_broker_request_dict($r, $tag); $not_found = 0; $b = microtime(true); foreach ($words as $word) { // if(false === enchant_dict_quick_check($d, $word/*, $sugg*/)) { // this cause segfault if (false === enchant_dict_check($d, $word)) { enchant_dict_suggest($d, $word); $not_found++; } } $e = microtime(true); printf("time = %0.2f sec, words per second = %0.2f, not found = %d\n", $e - $b, count($words) / ($e - $b), $not_found); enchant_broker_free_dict($d); enchant_broker_free($r); }
public function __construct($config = array()) { parent::__construct($config); if (!function_exists('enchant_broker_init')) { exit('Enchant library not found'); } $this->broker = enchant_broker_init(); $this->dictionary = enchant_broker_request_dict($this->broker, $this->_config['lang']); if (!enchant_broker_dict_exists($this->broker, $this->_config['lang'])) { exit('Enchant dictionary not found for lang: ' . $this->_config['lang']); } }
/** * Returns suggestions for a specific word. * * @param String $lang Selected language code (like en_US or de_DE). Shortcodes like "en" and "de" work with enchant >= 1.4.1 * @param String $word Specific word to get suggestions for. * @return Array of suggestions for the specified word. */ function &getSuggestions($lang, $word) { $r = enchant_broker_init(); $suggs = array(); if (enchant_broker_dict_exists($r, $lang)) { $d = enchant_broker_request_dict($r, $lang); $suggs = enchant_dict_suggest($d, $word); enchant_broker_free_dict($d); } else { } enchant_broker_free($r); return $suggs; }
private function normalizeLangCode($enchant, $lang) { $variants = array("en" => array("en_US", "en_GB")); if (isset($variants[$lang])) { array_unshift($variants, $lang); foreach ($variants[$lang] as $variant) { if (enchant_broker_dict_exists($enchant, $variant)) { return $variant; } } } return $lang; }
/** * Returns suggestions for a specific word. * * @param String $lang Selected language code (like en_US or de_DE). Shortcodes like "en" and "de" work with enchant >= 1.4.1 * @param String $word Specific word to get suggestions for. * @return Array of suggestions for the specified word. */ function &getSuggestions($lang, $word) { $r = enchant_broker_init(); $suggs = array(); if (enchant_broker_dict_exists($r, $lang)) { $d = enchant_broker_request_dict($r, $lang); $suggs = enchant_dict_suggest($d, $word); enchant_broker_free_dict($d); } else { $this->throwError("Language not installed"); } enchant_broker_free($r); return $suggs; }
/** * Initializes Enchant dictionary */ private function init() { if (!$this->enchant_broker) { if (!extension_loaded('enchant')) { $this->error = "Enchant extension not available"; return; } $this->enchant_broker = enchant_broker_init(); } if (!enchant_broker_dict_exists($this->enchant_broker, $this->lang)) { $this->error = "Unable to load dictionary for selected language using Enchant"; return; } $this->enchant_dictionary = enchant_broker_request_dict($this->enchant_broker, $this->lang); }
/** * Returns suggestions for a specific word. * * @param String $lang Selected language code (like en_US or de_DE). Shortcodes like "en" and "de" work with enchant >= 1.4.1 * @param String $word Specific word to get suggestions for. * @return Array of suggestions for the specified word. */ function &getSuggestions($lang, $word) { $r = enchant_broker_init(); if (enchant_broker_dict_exists($r, $lang)) { $d = enchant_broker_request_dict($r, $lang); $suggs = enchant_dict_suggest($d, $word); // enchant_dict_suggest() sometimes returns NULL if (!is_array($suggs)) { $suggs = array(); } enchant_broker_free_dict($d); } else { $suggs = array(); } enchant_broker_free($r); return $suggs; }
public function __construct($options = array()) { if (!function_exists('enchant_broker_init')) { throw new InternalErrorException(__('Module %s not installed', 'Enchant')); } $this->_Broker = enchant_broker_init(); $defaults = array('path' => VENDORS . 'dictionaries' . DS, 'lang' => 'en_GB', 'engine' => self::ENGINE_MYSPELL); $defaults = am($defaults, (array) Configure::read('Spell')); $options = array_merge($defaults, $options); if (!isset($this->_engines[$options['engine']])) { throw new InternalErrorException(__('Engine %s not found', (string) $options['engine'])); } $engineFolder = $this->_engines[$options['engine']]; enchant_broker_set_dict_path($this->_Broker, $options['engine'], $options['path'] . $engineFolder . DS); if (!enchant_broker_dict_exists($this->_Broker, $options['lang'])) { throw new InternalErrorException(__('Dictionary %s not found', $options['lang'])); } $this->_Dict = enchant_broker_request_dict($this->_Broker, $options['lang']); }
/** * @param array|Word[] $words * @param array|string[] $locales * * @return SpellResult */ public function check($words, array $locales) { $misspelledWords = array(); $enchantResource = enchant_broker_init(); /*$bprovides = enchant_broker_describe($r); echo "Current broker provides the following backend(s):\n"; print_r($bprovides);*/ /*$dicts = enchant_broker_list_dicts($r); print_r($dicts);*/ $dictionaries = array(); foreach ($locales as $locale) { if (!enchant_broker_dict_exists($enchantResource, $locale)) { // TODO handle and log error continue; } $dictionaries[$locale] = enchant_broker_request_dict($enchantResource, $locale); } //$dprovides = enchant_dict_describe($dictionary); //echo "dictionary $tag provides:\n"; foreach ($words as $word) { $checked = false; $suggests = array(); foreach ($dictionaries as $locale => $dictionary) { $suggests[$locale] = array(); $checked = $checked || enchant_dict_quick_check($dictionary, $word->getWord(), $suggests[$locale]); } $word->setChecked($checked); if (!$word->isChecked()) { $word->setSuggests($suggests); $misspelledWords[] = $word; } } foreach ($dictionaries as $dictionary) { enchant_broker_free_dict($dictionary); } enchant_broker_free($enchantResource); $spellResult = new SpellResult(); $spellResult->setCountOfWords(count($words)); $spellResult->setMisspelledWords($misspelledWords); return $spellResult; }
//mfc $msg = str_replace('{total_pages}', $total_pages, $msg); $search_result_info .= '<div class="search-page-info">' . $msg . '</div>'; } else { $page = 1; } // query time if (!isset($_SERVER['QUERY_STRING'])) { $_SERVER['QUERY_STRING'] = ''; } $search_result_info .= '<div class="search-query-time">' . __('Query took') . ' <b>' . $biblio_list->query_time . '</b> ' . __('second(s) to complete') . '</div>'; if ($biblio_list->num_rows < 1 && $keywords != '') { // word suggestion with enchant if (function_exists('enchant_broker_init') && $sysconf['spellchecker_enabled']) { $enc = enchant_broker_init(); if (enchant_broker_dict_exists($enc, $sysconf['default_lang'])) { $dict = enchant_broker_request_dict($enc, $sysconf['default_lang']); } else { $dict = enchant_broker_request_dict($enc, 'en_US'); } $search_result_info .= '<div class="search-suggestions">' . __('Did you mean:') . ' '; $word = strtok($keywords, " \t\n"); $keywords_suggest = array(); while ($word !== false) { // check if we are inside quote if (stripos($word, '"', 0) === true) { $search_result_info .= preg_replace('@[a-z]@i', '', $word); $word = str_replace('"', '', $word); } $wordcorrect = enchant_dict_check($dict, $word); if (!$wordcorrect) {
/** * Return a collection of suggestion corresponding a query * * @param string $query * @return ArrayCollection An array collection of SearchEngineSuggestion */ private function getSuggestions($query, SearchEngineOptions $options) { // First we split the query into simple words $words = explode(" ", $this->cleanupQuery(mb_strtolower($query))); $tmpWords = []; foreach ($words as $word) { if (trim($word) === '') { continue; } $tmpWords[] = $word; } $words = array_unique($tmpWords); $altVersions = []; foreach ($words as $word) { $altVersions[$word] = [$word]; } // As we got words, we look for alternate word for each of them if (function_exists('enchant_broker_init') && $options->getLocale()) { $broker = enchant_broker_init(); if (enchant_broker_dict_exists($broker, $options->getLocale())) { $dictionnary = enchant_broker_request_dict($broker, $options->getLocale()); foreach ($words as $word) { if (enchant_dict_check($dictionnary, $word) == false) { $suggs = array_merge(enchant_dict_suggest($dictionnary, $word)); } $altVersions[$word] = array_unique($suggs); } enchant_broker_free_dict($dictionnary); } enchant_broker_free($broker); } /** * @todo enhance the trigramm query, as it could be sent in one batch */ foreach ($altVersions as $word => $versions) { $altVersions[$word] = array_unique(array_merge($versions, $this->get_sugg_trigrams($word, $options))); } // We now build an array of all possibilities based on the original query $queries = [$query]; foreach ($altVersions as $word => $versions) { $tmp_queries = []; foreach ($versions as $version) { foreach ($queries as $alt_query) { $tmp_queries[] = $alt_query; $tmp_queries[] = str_replace($word, $version, $alt_query); } $tmp_queries[] = str_replace($word, $version, $query); } $queries = array_unique(array_merge($queries, $tmp_queries)); } $suggestions = []; $max_results = 0; foreach ($queries as $alt_query) { $results = $this->sphinx->Query($alt_query, $this->getQueryIndex($alt_query, $options)); if ($results !== false && isset($results['total_found'])) { if ($results['total_found'] > 0) { $max_results = max($max_results, (int) $results['total_found']); $suggestions[] = new SearchEngineSuggestion($query, $alt_query, (int) $results['total_found']); } } } usort($suggestions, ['self', 'suggestionsHitSorter']); $tmpSuggestions = new ArrayCollection(); foreach ($suggestions as $key => $suggestion) { if ($suggestion->getHits() < $max_results / 100) { continue; } $tmpSuggestions->add($suggestion); } return $tmpSuggestions; }
/** * Wiki-specific search suggestions using enchant library. * Use SphinxSearch_setup.php to create the dictionary */ function suggestWithEnchant() { if (!function_exists('enchant_broker_init')) { return; } $broker = enchant_broker_init(); enchant_broker_set_dict_path($broker, ENCHANT_MYSPELL, dirname( __FILE__ )); if ( enchant_broker_dict_exists( $broker, 'sphinx' ) ) { $dict = enchant_broker_request_dict( $broker, 'sphinx' ); $suggestion_found = false; $full_suggestion = ''; foreach ( $this->mTerms as $word ) { if ( !enchant_dict_check($dict, $word) ) { $suggestions = enchant_dict_suggest($dict, $word); while ( count( $suggestions ) ) { $candidate = array_shift( $suggestions ); if ( strtolower($candidate) != strtolower($word) ) { $word = $candidate; $suggestion_found = true; break; } } } $full_suggestion .= $word . ' '; } enchant_broker_free_dict( $dict ); if ($suggestion_found) { $this->mSuggestion = trim( $full_suggestion ); } } enchant_broker_free( $broker ); }
/** * @fn convert * @memberof Converter * @brief Public function to convert encoding of text contents. * @param $text Source text content to convert. * @param $options Options array * Available Options - * -# output (Output encoding - default is myanmar3) * -# input_font (Input encoding) * -# encoding (encoding type - ascii or utf8 - default is utf8 or none) * -# spelling_check (Enable spelling checking when converting ascii fonts.) * -# text-only (Source content is plain text or other format such as html, php etc. - default is false) * -# en_zwsp (enable or disable adding Zero-Width-Space in converted result.) * -# exceptions (commas seperated list of words or phrase to ignore from conversion process.) * -# suggestion (enable to use user suggested words list.) * * @returns string $this->text Return converted content. */ public function convert($text, $options = array()) { foreach ($options as $option_name => $option_value) { ${$option_name} = $option_value; //extract $options and set $option_name as variable name and $option_value as variable value. } if ($input_font === '' || $input_font == 'auto') { /** * @var $input */ $input = $this->enc_test($text); } else { /** * @var $input */ $input = $input_font; } /** * @var $ext_dir */ $ext_dir = 'ext/'; $rules_dir = 'rules/' . $output; /** * @var $ext_file */ if (isset($encoding) && $encoding != '') { $ext_file = $input . '-rules-' . $encoding . '.php'; } else { $ext_file = $input . '-rules.php'; } if (file_exists($this->ROOT_DIR . $ext_dir . $rules_dir . '/' . $ext_file)) { require $this->ROOT_DIR . $ext_dir . $rules_dir . '/' . $ext_file; include $this->ROOT_DIR . $ext_dir . $rules_dir . '/correction.php'; include $this->ROOT_DIR . $ext_dir . $rules_dir . '/zwsp.php'; } else { $correction = array(); $final_text = $this->ROOT_DIR . $ext_dir . $rules_dir . '/' . $ext_file . " Fatal Error: Your converting rules file cannot be found!\nFile is missing or never existed. Try another choice or contact developer!"; //return ; } if (!isset($order)) { /** * @var array $order Character ordering rules array. */ $order = array(); //if $order is not defined, set it to empty array. } /** * @var bool $en_zwsp enable/disable adding zwsp. */ if ($en_zwsp == true) { /** * @var $final_regex_array */ $final_regex_array = array_merge_recursive($order, $correction, $zwsp); //merge all defined regular expression arrays. } else { /** * @var $final_regex_array */ $final_regex_array = array_merge_recursive($order, $correction); //merge all defined regular expression arrays without zwsp array. } if ($input === $output) { foreach ($final_regex_array as $key => $value) { /** * @var $final_text */ $final_text = preg_replace('/' . $key . '/u', $value, $text); } } else { if (isset($conv_rules)) { if ($encoding == 'ascii') { /** * @fn trim_value * @brief * @param $value * @returns */ function trim_value(&$value) { /** * @var $value */ $value = trim($value); } /** * @fn ucwords_value * @brief * @param $value * @returns */ function ucwords_value(&$value) { /** * @var $value */ $value = ucwords($value); } /** * @fn space_on_short_words * @brief * @param $value * @returns */ function space_on_short_words(&$value) { /** * @var $value */ $value = preg_replace('/^[\\d\\w]{1,3}$/u', ' $0 ', $value); } if (isset($spelling_check) && false !== $spelling_check) { /** * @var $stripped_text */ $stripped_text = strip_tags($text); /** * @var $paragraph */ $paragraph = preg_split("/[\\s,]+/s", $stripped_text); array_walk($paragraph, 'trim_value'); /** * @var $words_array */ $words_array = array_unique($paragraph); array_multisort($words_array); if (function_exists('enchant_broker_init')) { /** * @var $tag */ $tag = 'en_US'; /** * @var $r */ $r = enchant_broker_init(); /** * @var $bprovides */ $bprovides = enchant_broker_describe($r); /** * @var $dicts */ $dicts = enchant_broker_list_dicts($r); if (enchant_broker_dict_exists($r, $tag)) { /** * @var $d */ $d = enchant_broker_request_dict($r, $tag); /** * @var $dprovides */ $dprovides = enchant_dict_describe($d); foreach ($words_array as $word) { if (!empty($word)) { /** * @var $wordcorrect */ $wordcorrect = enchant_dict_check($d, $word); if ($wordcorrect) { $word = preg_replace('/^\\d+$/u', '"$0"', $word); $word = preg_replace('/^[\\d\\w]{1,3}$/u', ' $0 ', $word); $english_words_array[$word] = $word; } } } enchant_broker_free_dict($d); } enchant_broker_free($r); } else { include $this->ROOT_DIR . $ext_dir . 'dic/dictionary_array.php'; array_walk($words_array, 'space_on_short_words'); foreach ($words_array as $word) { if (!empty($word)) { /** * @var $plural_ies */ $plural_ies = preg_match('/(\\w+)(ies)|(\\w+)(s)/', $word, $plural_match_ies); if (!empty($plural_match_ies)) { array_walk($plural_match_ies, 'space_on_short_words'); if ($plural_match_ies[2] == ' ies ') { /** * @var $singular */ $singular = $plural_match_ies[1] . 'y'; } elseif ($plural_match_ies[4] == ' s ') { /** * @var $singular */ $singular = $plural_match_ies[3]; } if (in_array($singular, $dictionary) || in_array(strtolower($singular), $dictionary)) { $plural_array[$plural_match_ies[0]] = $plural_match_ies[0]; } } if (in_array($word, $dictionary) || in_array(strtolower($word), $dictionary)) { $english_words_array[$word] = $word; } } } } /** * @var $english_words */ $english_words = array(); if (isset($english_words_array) && !empty($english_words_array)) { /** * @var $english_words */ $english_words = $english_words_array; } if (isset($plural_array) && !empty($plural_array)) { /** * @var $english_words */ $english_words = array_merge($english_words, $plural_array); } /** * @var $english_words */ $english_words = array_unique($english_words); } /** * @var $generated_array */ $generated_array = array(); if (true !== $text_only) { preg_match_all('/<(.*)>/uU', $text, $html_tags); foreach ($html_tags[0] as $html_tag) { if (!empty($html_tag)) { $generated_array[$html_tag] = $html_tag; } } preg_match_all('/<(style|script)(.*)<\\/(style|script)>/uUs', $text, $script_tags); foreach ($script_tags[0] as $script_tag) { if (!empty($script_tag)) { $generated_array[$script_tag] = $script_tag; } } } /** * @var $user_content_array */ $user_content_array = array(); if (isset($exceptions)) { /** * @var $exceps_array */ $exceps_array = explode(',', $exceptions); if (!empty($exceps_array)) { foreach ($exceps_array as $ignore_list) { if (!empty($ignore_list) && strlen($ignore_list) > 4) { $generated_array[$ignore_list] = $ignore_list; $user_content_array[] = $ignore_list; } } } } if (isset($suggested) && true === $suggested) { if (file_exists($this->ROOT_DIR . $ext_dir . 'dic/userdic.dic')) { /** * @var $user_dic */ $user_dic = file($this->ROOT_DIR . $ext_dir . 'dic/userdic.dic', FILE_SKIP_EMPTY_LINES); array_walk($user_dic, 'trim_value'); foreach ($user_dic as $user_word) { if (!empty($user_word)) { $generated_array[$user_word] = $user_word; $user_content_array[] = $user_word; } } } } if (!empty($user_content_array)) { /** * @var $user_content */ $user_content = ""; array_walk($user_content_array, 'trim_value'); asort($user_content_array); /** * @var $user_content_array */ $user_content_array = array_unique($user_content_array); foreach ($user_content_array as $phrase) { $user_content .= "{$phrase}\n"; } /** * @var $userdic_file */ $userdic_file = $this->ROOT_DIR . $ext_dir . 'dic/userdic.dic'; /** * @var $uaf */ $uaf = fopen($userdic_file, 'w') or die("File is not writable or directory does not exist."); fwrite($uaf, $user_content); fclose($uaf); } /** * @var $conv_array */ $conv_array = $conv_rules; if (!empty($generated_array)) { /** * @var $conv_array */ $conv_array = array_merge($generated_array, $conv_array); } if (isset($english_words) && !empty($english_words)) { /** * @var $conv_array */ $conv_array = array_merge($english_words, $conv_array); } /** * @var $final_text */ $final_text = strtr($text, $conv_array); } else { /** * @var $final_text */ $final_text = strtr($text, $conv_rules); } foreach ($final_regex_array as $key => $value) { $reg_count = 0; /** * @var $final_text */ $final_text = preg_replace('/' . $key . '/us', $value, $final_text, -1, $reg_count); // print($reg_count.'<br>'); } } else { foreach ($final_regex_array as $key => $value) { $reg_count = 0; /** * @var $final_text */ $final_text = preg_replace('/' . $key . '/us', $value, $text, -1, $reg_count); // print($reg_count.'<br>'); } } } /** * @var $text */ $this->text = $final_text; return $this->text; }
<?php $tag = 'en_US'; $r = enchant_broker_init(); $bprovides = enchant_broker_describe($r); echo "Current broker provides the following backend(s):\n"; print_r($bprovides); if (enchant_broker_dict_exists($r, $tag)) { $d = enchant_broker_request_dict($r, $tag); $dprovides = enchant_dict_describe($d); echo "dictionary {$tag} provides:\n"; $spellerrors = enchant_dict_check($d, "soong"); print_r($dprovides); echo "found {$spellerrors} spell errors\n"; if (spellerrors) { $suggs = enchant_dict_suggest($d, "soong"); echo "Suggestions for 'soong':"; print_r($suggs); } enchant_broker_free_dict($d); } else { } enchant_broker_free($r);