Ejemplo n.º 1
0
function bench_enchant($words)
{
    // TODO: check return values!!!
    echo "Bench Enchant: ";
    $tag = 'ru_RU';
    $r = enchant_broker_init();
    if (!enchant_broker_dict_exists($r, $tag)) {
        echo "{$tag} dict not supported by enchant\n";
        return false;
    }
    $d = enchant_broker_request_dict($r, $tag);
    $not_found = 0;
    $b = microtime(true);
    foreach ($words as $word) {
        //        if(false === enchant_dict_quick_check($d, $word/*, $sugg*/)) { // this cause segfault
        if (false === enchant_dict_check($d, $word)) {
            enchant_dict_suggest($d, $word);
            $not_found++;
        }
    }
    $e = microtime(true);
    printf("time = %0.2f sec, words per second = %0.2f, not found = %d\n", $e - $b, count($words) / ($e - $b), $not_found);
    enchant_broker_free_dict($d);
    enchant_broker_free($r);
}
Ejemplo n.º 2
0
 /**
  * Spellchecks an array of words.
  *
  * @param String $lang Selected language code (like en_US or de_DE). Shortcodes like "en" and "de" work with enchant >= 1.4.1
  * @param Array $words Array of words to check.
  * @return Name/value object with arrays of suggestions.
  */
 public function getSuggestions($lang, $words)
 {
     $suggestions = array();
     $enchant = enchant_broker_init();
     $config = $this->getConfig();
     if (isset($config["enchant_dicts_path"])) {
         enchant_broker_set_dict_path($enchant, ENCHANT_MYSPELL, $config["enchant_dicts_path"]);
         enchant_broker_set_dict_path($enchant, ENCHANT_ISPELL, $config["enchant_dicts_path"]);
     }
     if (!enchant_broker_describe($enchant)) {
         throw new Exception("Enchant spellchecker not find any backends.");
     }
     $lang = $this->normalizeLangCode($enchant, $lang);
     if (enchant_broker_dict_exists($enchant, $lang)) {
         $dict = enchant_broker_request_dict($enchant, $lang);
         foreach ($words as $word) {
             if (!enchant_dict_check($dict, $word)) {
                 $suggs = enchant_dict_suggest($dict, $word);
                 if (!is_array($suggs)) {
                     $suggs = array();
                 }
                 $suggestions[$word] = $suggs;
             }
         }
         enchant_broker_free_dict($dict);
         enchant_broker_free($enchant);
     } else {
         enchant_broker_free($enchant);
         throw new Exception("Enchant spellchecker could not find dictionary for language: " . $lang);
     }
     return $suggestions;
 }
Ejemplo n.º 3
0
 /**
  * Returns suggestions for a specific word.
  *
  * @param String $lang Selected language code (like en_US or de_DE). Shortcodes like "en" and "de" work with enchant >= 1.4.1
  * @param String $word Specific word to get suggestions for.
  * @return Array of suggestions for the specified word.
  */
 function &getSuggestions($lang, $word)
 {
     $r = enchant_broker_init();
     $suggs = array();
     if (enchant_broker_dict_exists($r, $lang)) {
         $d = enchant_broker_request_dict($r, $lang);
         $suggs = enchant_dict_suggest($d, $word);
         enchant_broker_free_dict($d);
     } else {
     }
     enchant_broker_free($r);
     return $suggs;
 }
Ejemplo n.º 4
0
 /**
  * Returns suggestions for a specific word.
  *
  * @param String $lang Selected language code (like en_US or de_DE). Shortcodes like "en" and "de" work with enchant >= 1.4.1
  * @param String $word Specific word to get suggestions for.
  * @return Array of suggestions for the specified word.
  */
 function &getSuggestions($lang, $word)
 {
     $r = enchant_broker_init();
     $suggs = array();
     if (enchant_broker_dict_exists($r, $lang)) {
         $d = enchant_broker_request_dict($r, $lang);
         $suggs = enchant_dict_suggest($d, $word);
         enchant_broker_free_dict($d);
     } else {
         $this->throwError("Language not installed");
     }
     enchant_broker_free($r);
     return $suggs;
 }
Ejemplo n.º 5
0
 /**
  * Returns suggestions for a specific word.
  *
  * @param String $lang Selected language code (like en_US or de_DE). Shortcodes like "en" and "de" work with enchant >= 1.4.1
  * @param String $word Specific word to get suggestions for.
  * @return Array of suggestions for the specified word.
  */
 function &getSuggestions($lang, $word)
 {
     $r = enchant_broker_init();
     if (enchant_broker_dict_exists($r, $lang)) {
         $d = enchant_broker_request_dict($r, $lang);
         $suggs = enchant_dict_suggest($d, $word);
         // enchant_dict_suggest() sometimes returns NULL
         if (!is_array($suggs)) {
             $suggs = array();
         }
         enchant_broker_free_dict($d);
     } else {
         $suggs = array();
     }
     enchant_broker_free($r);
     return $suggs;
 }
Ejemplo n.º 6
0
 /**
  * @param array|Word[]   $words
  * @param array|string[] $locales
  *
  * @return SpellResult
  */
 public function check($words, array $locales)
 {
     $misspelledWords = array();
     $enchantResource = enchant_broker_init();
     /*$bprovides = enchant_broker_describe($r);
       echo "Current broker provides the following backend(s):\n";
       print_r($bprovides);*/
     /*$dicts = enchant_broker_list_dicts($r);
       print_r($dicts);*/
     $dictionaries = array();
     foreach ($locales as $locale) {
         if (!enchant_broker_dict_exists($enchantResource, $locale)) {
             // TODO handle and log error
             continue;
         }
         $dictionaries[$locale] = enchant_broker_request_dict($enchantResource, $locale);
     }
     //$dprovides = enchant_dict_describe($dictionary);
     //echo "dictionary $tag provides:\n";
     foreach ($words as $word) {
         $checked = false;
         $suggests = array();
         foreach ($dictionaries as $locale => $dictionary) {
             $suggests[$locale] = array();
             $checked = $checked || enchant_dict_quick_check($dictionary, $word->getWord(), $suggests[$locale]);
         }
         $word->setChecked($checked);
         if (!$word->isChecked()) {
             $word->setSuggests($suggests);
             $misspelledWords[] = $word;
         }
     }
     foreach ($dictionaries as $dictionary) {
         enchant_broker_free_dict($dictionary);
     }
     enchant_broker_free($enchantResource);
     $spellResult = new SpellResult();
     $spellResult->setCountOfWords(count($words));
     $spellResult->setMisspelledWords($misspelledWords);
     return $spellResult;
 }
Ejemplo n.º 7
0
                            $shortest = $lev;
                        }
                    }
                    $keywords_suggest[] = '<b class="search-word-suggest">' . $closest . '</b>';
                    $keywords_suggest_plain[] = $closest;
                } else {
                    $keywords_suggest[] = '<b class="search-word-correct">' . $word . '</b>';
                    $keywords_suggest_plain[] = $word;
                }
                $word = strtok(" \t\n");
            }
            $keywords_suggest_plain_str = implode(' ', $keywords_suggest_plain);
            $search_result_info .= '<a class="search-suggestion-link" href="./index.php?keywords=' . urlencode($keywords_suggest_plain_str) . '&search=Search">';
            $search_result_info .= implode(' ', $keywords_suggest);
            $search_result_info .= '</a>?</div>';
            enchant_broker_free_dict($dict);
        }
    }
    if (isset($biblio_list) && isset($sysconf['enable_xml_result']) && $sysconf['enable_xml_result']) {
        $search_result_info .= '<div><a href="index.php?resultXML=true&' . $_SERVER['QUERY_STRING'] . '" class="xmlResultLink" target="_blank" title="View Result in XML Format" style="clear: both;">XML Result</a></div>';
    }
}
// check if we are on xml resultset mode
if ((isset($_GET['rss']) || isset($_GET['resultXML'])) && $sysconf['enable_xml_result']) {
    // get document list but don't output the result
    $biblio_list->getDocumentList(false);
    if ($biblio_list->num_rows > 0) {
        // send http header
        header('Content-Type: text/xml');
        echo '<?xml version="1.0" encoding="UTF-8" ?>' . "\n";
        if (isset($_GET['rss'])) {
 /**
  * Return a collection of suggestion corresponding a query
  *
  * @param  string          $query
  * @return ArrayCollection An array collection of SearchEngineSuggestion
  */
 private function getSuggestions($query, SearchEngineOptions $options)
 {
     // First we split the query into simple words
     $words = explode(" ", $this->cleanupQuery(mb_strtolower($query)));
     $tmpWords = [];
     foreach ($words as $word) {
         if (trim($word) === '') {
             continue;
         }
         $tmpWords[] = $word;
     }
     $words = array_unique($tmpWords);
     $altVersions = [];
     foreach ($words as $word) {
         $altVersions[$word] = [$word];
     }
     // As we got words, we look for alternate word for each of them
     if (function_exists('enchant_broker_init') && $options->getLocale()) {
         $broker = enchant_broker_init();
         if (enchant_broker_dict_exists($broker, $options->getLocale())) {
             $dictionnary = enchant_broker_request_dict($broker, $options->getLocale());
             foreach ($words as $word) {
                 if (enchant_dict_check($dictionnary, $word) == false) {
                     $suggs = array_merge(enchant_dict_suggest($dictionnary, $word));
                 }
                 $altVersions[$word] = array_unique($suggs);
             }
             enchant_broker_free_dict($dictionnary);
         }
         enchant_broker_free($broker);
     }
     /**
      * @todo enhance the trigramm query, as it could be sent in one batch
      */
     foreach ($altVersions as $word => $versions) {
         $altVersions[$word] = array_unique(array_merge($versions, $this->get_sugg_trigrams($word, $options)));
     }
     // We now build an array of all possibilities based on the original query
     $queries = [$query];
     foreach ($altVersions as $word => $versions) {
         $tmp_queries = [];
         foreach ($versions as $version) {
             foreach ($queries as $alt_query) {
                 $tmp_queries[] = $alt_query;
                 $tmp_queries[] = str_replace($word, $version, $alt_query);
             }
             $tmp_queries[] = str_replace($word, $version, $query);
         }
         $queries = array_unique(array_merge($queries, $tmp_queries));
     }
     $suggestions = [];
     $max_results = 0;
     foreach ($queries as $alt_query) {
         $results = $this->sphinx->Query($alt_query, $this->getQueryIndex($alt_query, $options));
         if ($results !== false && isset($results['total_found'])) {
             if ($results['total_found'] > 0) {
                 $max_results = max($max_results, (int) $results['total_found']);
                 $suggestions[] = new SearchEngineSuggestion($query, $alt_query, (int) $results['total_found']);
             }
         }
     }
     usort($suggestions, ['self', 'suggestionsHitSorter']);
     $tmpSuggestions = new ArrayCollection();
     foreach ($suggestions as $key => $suggestion) {
         if ($suggestion->getHits() < $max_results / 100) {
             continue;
         }
         $tmpSuggestions->add($suggestion);
     }
     return $tmpSuggestions;
 }
	/**
	 * Wiki-specific search suggestions using enchant library.
	 * Use SphinxSearch_setup.php to create the dictionary
	 */
	function suggestWithEnchant() {
		if (!function_exists('enchant_broker_init')) {
			return;
		}
		$broker = enchant_broker_init();
		enchant_broker_set_dict_path($broker, ENCHANT_MYSPELL, dirname( __FILE__ ));
		if ( enchant_broker_dict_exists( $broker, 'sphinx' ) ) {
			$dict = enchant_broker_request_dict( $broker, 'sphinx' );
			$suggestion_found = false;
			$full_suggestion = '';
			foreach ( $this->mTerms as $word ) {
				if ( !enchant_dict_check($dict, $word) ) {
					$suggestions = enchant_dict_suggest($dict, $word);
					while ( count( $suggestions ) ) {
						$candidate = array_shift( $suggestions );
						if ( strtolower($candidate) != strtolower($word) ) {
							$word = $candidate;
							$suggestion_found = true;
							break;
						}
					}
				}
				$full_suggestion .= $word . ' ';
			}
			enchant_broker_free_dict( $dict );
			if ($suggestion_found) {
				$this->mSuggestion = trim( $full_suggestion );
			}
		}
		enchant_broker_free( $broker );
	}
Ejemplo n.º 10
0
 /**
  * @fn	convert
  * @memberof Converter
  * @brief	Public function to convert encoding of text contents.
  * @param	$text	Source text content to convert.
  * @param	$options	Options array
  * 						Available Options -
  * 							-# output (Output encoding - default is myanmar3)
  * 							-# input_font (Input encoding)
  * 							-# encoding (encoding type - ascii or utf8 - default is utf8 or none)
  * 							-# spelling_check (Enable spelling checking when converting ascii fonts.)
  * 							-# text-only (Source content is plain text or other format such as html, php etc. - default is false)
  * 							-# en_zwsp (enable or disable adding Zero-Width-Space in converted result.)
  * 							-# exceptions (commas seperated list of words or phrase to ignore from conversion process.)
  * 							-# suggestion (enable to use user suggested words list.)
  * 
  * @returns	string $this->text	Return converted content.
  */
 public function convert($text, $options = array())
 {
     foreach ($options as $option_name => $option_value) {
         ${$option_name} = $option_value;
         //extract $options and set $option_name as variable name and $option_value as variable value.
     }
     if ($input_font === '' || $input_font == 'auto') {
         /**
          * @var $input 
          */
         $input = $this->enc_test($text);
     } else {
         /**
          * @var $input 
          */
         $input = $input_font;
     }
     /**
      * @var $ext_dir 
      */
     $ext_dir = 'ext/';
     $rules_dir = 'rules/' . $output;
     /**
      * @var $ext_file 
      */
     if (isset($encoding) && $encoding != '') {
         $ext_file = $input . '-rules-' . $encoding . '.php';
     } else {
         $ext_file = $input . '-rules.php';
     }
     if (file_exists($this->ROOT_DIR . $ext_dir . $rules_dir . '/' . $ext_file)) {
         require $this->ROOT_DIR . $ext_dir . $rules_dir . '/' . $ext_file;
         include $this->ROOT_DIR . $ext_dir . $rules_dir . '/correction.php';
         include $this->ROOT_DIR . $ext_dir . $rules_dir . '/zwsp.php';
     } else {
         $correction = array();
         $final_text = $this->ROOT_DIR . $ext_dir . $rules_dir . '/' . $ext_file . " Fatal Error: Your converting rules file cannot be found!\nFile is missing or never existed. Try another choice or contact developer!";
         //return ;
     }
     if (!isset($order)) {
         /**
          * @var array $order 	Character ordering rules array.
          */
         $order = array();
         //if $order is not defined, set it to empty array.
     }
     /**
      * @var bool $en_zwsp	enable/disable adding zwsp. 
      */
     if ($en_zwsp == true) {
         /**
          * @var $final_regex_array 
          */
         $final_regex_array = array_merge_recursive($order, $correction, $zwsp);
         //merge all defined regular expression arrays.
     } else {
         /**
          * @var $final_regex_array 
          */
         $final_regex_array = array_merge_recursive($order, $correction);
         //merge all defined regular expression arrays without zwsp array.
     }
     if ($input === $output) {
         foreach ($final_regex_array as $key => $value) {
             /**
              * @var $final_text 
              */
             $final_text = preg_replace('/' . $key . '/u', $value, $text);
         }
     } else {
         if (isset($conv_rules)) {
             if ($encoding == 'ascii') {
                 /**
                  * @fn	trim_value
                  * @brief	
                  * @param	$value 
                  * @returns	
                  */
                 function trim_value(&$value)
                 {
                     /**
                      * @var $value 
                      */
                     $value = trim($value);
                 }
                 /**
                  * @fn	ucwords_value
                  * @brief	
                  * @param	$value 
                  * @returns	
                  */
                 function ucwords_value(&$value)
                 {
                     /**
                      * @var $value 
                      */
                     $value = ucwords($value);
                 }
                 /**
                  * @fn	space_on_short_words
                  * @brief	
                  * @param	$value 
                  * @returns	
                  */
                 function space_on_short_words(&$value)
                 {
                     /**
                      * @var $value 
                      */
                     $value = preg_replace('/^[\\d\\w]{1,3}$/u', ' $0 ', $value);
                 }
                 if (isset($spelling_check) && false !== $spelling_check) {
                     /**
                      * @var $stripped_text 
                      */
                     $stripped_text = strip_tags($text);
                     /**
                      * @var $paragraph 
                      */
                     $paragraph = preg_split("/[\\s,]+/s", $stripped_text);
                     array_walk($paragraph, 'trim_value');
                     /**
                      * @var $words_array 
                      */
                     $words_array = array_unique($paragraph);
                     array_multisort($words_array);
                     if (function_exists('enchant_broker_init')) {
                         /**
                          * @var $tag 
                          */
                         $tag = 'en_US';
                         /**
                          * @var $r 
                          */
                         $r = enchant_broker_init();
                         /**
                          * @var $bprovides 
                          */
                         $bprovides = enchant_broker_describe($r);
                         /**
                          * @var $dicts 
                          */
                         $dicts = enchant_broker_list_dicts($r);
                         if (enchant_broker_dict_exists($r, $tag)) {
                             /**
                              * @var $d 
                              */
                             $d = enchant_broker_request_dict($r, $tag);
                             /**
                              * @var $dprovides 
                              */
                             $dprovides = enchant_dict_describe($d);
                             foreach ($words_array as $word) {
                                 if (!empty($word)) {
                                     /**
                                      * @var $wordcorrect 
                                      */
                                     $wordcorrect = enchant_dict_check($d, $word);
                                     if ($wordcorrect) {
                                         $word = preg_replace('/^\\d+$/u', '"$0"', $word);
                                         $word = preg_replace('/^[\\d\\w]{1,3}$/u', ' $0 ', $word);
                                         $english_words_array[$word] = $word;
                                     }
                                 }
                             }
                             enchant_broker_free_dict($d);
                         }
                         enchant_broker_free($r);
                     } else {
                         include $this->ROOT_DIR . $ext_dir . 'dic/dictionary_array.php';
                         array_walk($words_array, 'space_on_short_words');
                         foreach ($words_array as $word) {
                             if (!empty($word)) {
                                 /**
                                  * @var $plural_ies 
                                  */
                                 $plural_ies = preg_match('/(\\w+)(ies)|(\\w+)(s)/', $word, $plural_match_ies);
                                 if (!empty($plural_match_ies)) {
                                     array_walk($plural_match_ies, 'space_on_short_words');
                                     if ($plural_match_ies[2] == ' ies ') {
                                         /**
                                          * @var $singular 
                                          */
                                         $singular = $plural_match_ies[1] . 'y';
                                     } elseif ($plural_match_ies[4] == ' s ') {
                                         /**
                                          * @var $singular 
                                          */
                                         $singular = $plural_match_ies[3];
                                     }
                                     if (in_array($singular, $dictionary) || in_array(strtolower($singular), $dictionary)) {
                                         $plural_array[$plural_match_ies[0]] = $plural_match_ies[0];
                                     }
                                 }
                                 if (in_array($word, $dictionary) || in_array(strtolower($word), $dictionary)) {
                                     $english_words_array[$word] = $word;
                                 }
                             }
                         }
                     }
                     /**
                      * @var $english_words 
                      */
                     $english_words = array();
                     if (isset($english_words_array) && !empty($english_words_array)) {
                         /**
                          * @var $english_words 
                          */
                         $english_words = $english_words_array;
                     }
                     if (isset($plural_array) && !empty($plural_array)) {
                         /**
                          * @var $english_words 
                          */
                         $english_words = array_merge($english_words, $plural_array);
                     }
                     /**
                      * @var $english_words 
                      */
                     $english_words = array_unique($english_words);
                 }
                 /**
                  * @var $generated_array 
                  */
                 $generated_array = array();
                 if (true !== $text_only) {
                     preg_match_all('/<(.*)>/uU', $text, $html_tags);
                     foreach ($html_tags[0] as $html_tag) {
                         if (!empty($html_tag)) {
                             $generated_array[$html_tag] = $html_tag;
                         }
                     }
                     preg_match_all('/<(style|script)(.*)<\\/(style|script)>/uUs', $text, $script_tags);
                     foreach ($script_tags[0] as $script_tag) {
                         if (!empty($script_tag)) {
                             $generated_array[$script_tag] = $script_tag;
                         }
                     }
                 }
                 /**
                  * @var $user_content_array 
                  */
                 $user_content_array = array();
                 if (isset($exceptions)) {
                     /**
                      * @var $exceps_array 
                      */
                     $exceps_array = explode(',', $exceptions);
                     if (!empty($exceps_array)) {
                         foreach ($exceps_array as $ignore_list) {
                             if (!empty($ignore_list) && strlen($ignore_list) > 4) {
                                 $generated_array[$ignore_list] = $ignore_list;
                                 $user_content_array[] = $ignore_list;
                             }
                         }
                     }
                 }
                 if (isset($suggested) && true === $suggested) {
                     if (file_exists($this->ROOT_DIR . $ext_dir . 'dic/userdic.dic')) {
                         /**
                          * @var $user_dic 
                          */
                         $user_dic = file($this->ROOT_DIR . $ext_dir . 'dic/userdic.dic', FILE_SKIP_EMPTY_LINES);
                         array_walk($user_dic, 'trim_value');
                         foreach ($user_dic as $user_word) {
                             if (!empty($user_word)) {
                                 $generated_array[$user_word] = $user_word;
                                 $user_content_array[] = $user_word;
                             }
                         }
                     }
                 }
                 if (!empty($user_content_array)) {
                     /**
                      * @var $user_content 
                      */
                     $user_content = "";
                     array_walk($user_content_array, 'trim_value');
                     asort($user_content_array);
                     /**
                      * @var $user_content_array 
                      */
                     $user_content_array = array_unique($user_content_array);
                     foreach ($user_content_array as $phrase) {
                         $user_content .= "{$phrase}\n";
                     }
                     /**
                      * @var $userdic_file 
                      */
                     $userdic_file = $this->ROOT_DIR . $ext_dir . 'dic/userdic.dic';
                     /**
                      * @var $uaf 
                      */
                     $uaf = fopen($userdic_file, 'w') or die("File is not writable or directory does not exist.");
                     fwrite($uaf, $user_content);
                     fclose($uaf);
                 }
                 /**
                  * @var $conv_array 
                  */
                 $conv_array = $conv_rules;
                 if (!empty($generated_array)) {
                     /**
                      * @var $conv_array 
                      */
                     $conv_array = array_merge($generated_array, $conv_array);
                 }
                 if (isset($english_words) && !empty($english_words)) {
                     /**
                      * @var $conv_array 
                      */
                     $conv_array = array_merge($english_words, $conv_array);
                 }
                 /**
                  * @var $final_text 
                  */
                 $final_text = strtr($text, $conv_array);
             } else {
                 /**
                  * @var $final_text 
                  */
                 $final_text = strtr($text, $conv_rules);
             }
             foreach ($final_regex_array as $key => $value) {
                 $reg_count = 0;
                 /**
                  * @var $final_text 
                  */
                 $final_text = preg_replace('/' . $key . '/us', $value, $final_text, -1, $reg_count);
                 //	print($reg_count.'<br>');
             }
         } else {
             foreach ($final_regex_array as $key => $value) {
                 $reg_count = 0;
                 /**
                  * @var $final_text 
                  */
                 $final_text = preg_replace('/' . $key . '/us', $value, $text, -1, $reg_count);
                 //	print($reg_count.'<br>');
             }
         }
     }
     /**
      * @var $text 
      */
     $this->text = $final_text;
     return $this->text;
 }