/** * Creates a new Pspell spell checker * * @param string $language the language used by this spell checker. This * should be a two-letter ISO 639 language code * followed by an optional two digit ISO 3166 * country code separated by a dash or underscore. * For example, 'en', 'en-CA' and 'en_CA' are * valid languages. * @param string $personal_wordlist optional. The filename of the personal * wordlist for this spell checker. If not * specified, no personal wordlist is * used. The personal wordlist may contain * spellings for words that are correct * but are not in the regular dictionary. * * @throws NateGoSearchException if the Pspell extension is not available. * @throws NateGoSearchtException if a dictionary in the specified language * could not be loaded. */ public function __construct($language, $path_to_data = '', $repl_pairs = '', $personal_wordlist = '') { if (!extension_loaded('pspell')) { throw new NateGoSearchException('The Pspell PHP extension is ' . 'required for NateGoSearchPSpellSpellChecker.'); } $config = pspell_config_create($language, '', '', 'utf-8'); pspell_config_mode($config, PSPELL_FAST); if ($path_to_data != '') { pspell_config_data_dir($config, $path_to_data); pspell_config_dict_dir($config, $path_to_data); } if ($repl_pairs != '') { pspell_config_repl($config, $repl_pairs); } if ($personal_wordlist != '') { pspell_config_personal($config, $personal_wordlist); if (file_exists($personal_wordlist) && fileowner($personal_wordlist) == posix_getuid()) { // update permissions (-rw-rw----) chmod($personal_wordlist, 0666); } $this->personal_wordlist = $personal_wordlist; } $this->dictionary = pspell_new_config($config); if ($this->dictionary === false) { throw new NateGoSearchException(sprintf("Could not create Pspell dictionary with language '%s'.", $this->language)); } $this->loadBlacklistedSuggestions(); }
private function loadReplacements() { $path = $this->language_path . $this->language . ".repl"; if (file_exists($path)) { //echo file_get_contents($path); $this->pspell_config = pspell_config_create($this->language); pspell_config_repl($this->pspell_config, $path); $this->pspell = pspell_new_config($this->pspell_config); } }
/** * Initialise our aSpell calling environment. * * @return array A tuple of environmental details (dictionary list,aspell call command,temporary file name,language being used) */ function aspell_init() { // Find the language if (!isset($_REQUEST['dictionary']) || strlen(trim($_REQUEST['dictionary'])) < 1) { $lang = function_exists('do_lang') ? do_lang('dictionary') : 'en_GB'; // Default to UK English (as per ocPortal) } else { $lang = $_REQUEST['dictionary']; } $aspellcommand = mixed(); $force_shell = false; if (!function_exists('pspell_check') || $force_shell) { if (str_replace(array('on', 'true', 'yes'), array('1', '1', '1'), strtolower(ini_get('safe_mode'))) == '1') { exit('Spell Checker does not work with safe mode systems that do not have direct pspell support into PHP'); } if (strpos(@ini_get('disable_functions'), 'shell_exec') !== false) { exit('Spell Checker does not work on systems with shell_exec disabled that do not have direct pspell support into PHP'); } // Our temporary spell check file $temptext = tempnam(str_replace(array('on', 'true', 'yes'), array('1', '1', '1'), strtolower(ini_get('safe_mode'))) == '1' || @strval(ini_get('open_basedir')) != '' && preg_match('#(^|:|;)/tmp($|:|;|/)#', ini_get('open_basedir')) == 0 ? get_custom_file_base() . '/safe_mode_temp/' : '/tmp/', 'spell_'); if ($temptext === false) { $temptext = tempnam(get_custom_file_base() . '/safe_mode_temp/', 'spell_'); } // Find aspell $aspell = 'aspell'; $aspell_args = '-a --lang=' . _filter_naughty_harsh($lang); if (DIRECTORY_SEPARATOR == '\\') { // See if there is a local install of aspell here if (file_exists(dirname(__FILE__) . '\\aspell\\bin\\aspell.exe')) { $aspell = dirname(__FILE__) . '\\aspell\\bin\\aspell.exe'; if (file_exists(dirname(__FILE__) . '\\aspell\\bin\\aspell_wrap.exe')) { $aspell = dirname(__FILE__) . '\\aspell\\bin\\aspell_wrap.exe ' . dirname(__FILE__) . '\\aspell\\bin\\'; } //$dic_dir=wrap_exec($aspell.' config dict-dir'); //$dicfil=preg_replace('/^.*\/lib\/(aspell\S*)\n.*/s','$1',$dic_dir); //$aspell_args.=' --dict-dir='.$dicfil; } else { $aspell = 'C:\\Progra~1\\Aspell\\bin\\aspell.exe'; } if (!file_exists($aspell)) { exit('ASpell not installed in default locations.'); } $aspell_version = wrap_exec($aspell . ' version'); } else { // See if there is a local install of aspell here if (file_exists(dirname(__FILE__) . '/aspell/bin/aspell')) { putenv('PATH=' . dirname(__FILE__) . '/aspell/bin:' . getenv('PATH')); putenv('LD_LIBRARY_PATH=' . dirname(__FILE__) . '/aspell/lib:' . getenv('LD_LIBRARY_PATH')); //$dic_dir=wrap_exec($aspell.' config dict-dir'); //$dicfil=dirname(__FILE__).'/aspell/lib/'.preg_replace('/^.*\/lib\/(aspell\S*)\n.*/s','$1',$dic_dir); //$aspell_args.=' --dict-dir='.$dicfil.' --add-filter-path='.$dicfil; } $aspell_version = wrap_exec($aspell . ' version'); } if ($aspell_version === false) { exit('ASpell would not execute. It is most likely not installed, or a security measure is in place, or file permissions are not correctly set. If on Windows, you may need to give windows\\system32\\cmd.exe execute permissions to the web user.'); } // Old aspell doesn't know about encoding, which means that unicode will be broke, but we should at least let it try. $a_ver = array(); preg_match('/really [aA]spell ([0-9]+)\\.([0-9]+)(?:\\.([0-9]+))?/i', $aspell_version, $a_ver); if (!array_key_exists(1, $a_ver)) { $a_ver[1] = '1'; } if (!array_key_exists(2, $a_ver)) { $a_ver[2] = '0'; } if (!array_key_exists(3, $a_ver)) { $a_ver[3] = '0'; } $a_ver = array('major' => (int) $a_ver[1], 'minor' => (int) $a_ver[2], 'release' => (int) $a_ver[3]); if ($a_ver['major'] >= 0 && $a_ver['minor'] >= 60) { $aspell_args .= ' -H --encoding=utf-8'; } elseif (preg_match('/--encoding/', wrap_exec($aspell . ' 2>&1')) != 0) { $aspell_args .= ' --mode=none --add-filter=sgml --encoding=utf-8'; } else { $aspell_args .= ' --mode=none --add-filter=sgml'; } $aspelldictionaries = $aspell . ' dump dicts'; $aspellcommand = $aspell . ' ' . $aspell_args . ' < ' . $temptext; } else { //list($lang,$spelling)=explode('_',$lang); $spelling = ''; $temptext = NULL; $aspelldictionaries = NULL; } // Personal dictionaries global $SITE_INFO; if (!isset($SITE_INFO)) { require_once '../../../../info.php'; } $cookie_member_id = $SITE_INFO['user_cookie']; $p_dicts_name = array_key_exists($cookie_member_id, $_COOKIE) ? _filter_naughty_harsh($_COOKIE[$cookie_member_id]) : 'guest'; $p_dict_path = get_custom_file_base() . '/data_custom/spelling/personal_dicts' . DIRECTORY_SEPARATOR . $p_dicts_name; if (!file_exists($p_dict_path)) { mkdir($p_dict_path, 02770); } if (is_null($temptext)) { list($lang_stub, ) = explode('_', $lang); $charset = str_replace('ISO-', 'iso', str_replace('iso-', 'iso', do_lang('charset'))); if (DIRECTORY_SEPARATOR == '\\') { $aspellcommand = @pspell_new_personal($p_dict_path . '/' . $lang_stub . '.pws', $lang, $spelling, '', $charset); if ($aspellcommand === false) { $aspellcommand = pspell_new_personal($p_dict_path . '/' . $lang_stub . '.pws', $lang, $spelling, '', $charset); } } else { $aspellconfig = @pspell_config_create($lang, $spelling, '', $charset); if ($aspellconfig === false) { $aspellconfig = pspell_config_create('en', $spelling, '', $charset); } pspell_config_personal($aspellconfig, $p_dict_path . '/' . $lang_stub . '.pws'); pspell_config_repl($aspellconfig, $p_dict_path . '/' . $lang_stub . '.prepl'); $aspellcommand = @pspell_new_config($aspellconfig); if ($aspellcommand === false && $lang != 'en') { $aspellconfig = pspell_config_create('en', $spelling, '', $charset); pspell_config_personal($aspellconfig, $p_dict_path . '/' . $lang_stub . '.pws'); pspell_config_repl($aspellconfig, $p_dict_path . '/' . $lang_stub . '.prepl'); $aspellcommand = pspell_new_config($aspellconfig); } } if (is_null($aspellcommand)) { exit; } } return array($aspelldictionaries, $aspellcommand, $temptext, $lang); }
function pspell_check($text, $lang = false) { global $charset; if ($lang) { $lang = $GLOBALS['LANG']; } $words = preg_split('/[\\W]+?/', $text); $misspelled = $return = array(); $pspell_config = pspell_config_create($lang, "", "", $charset, PSPELL_NORMAL | PSPELL_RUN_TOGETHER); //pspell_config_runtogether($pspell_config, true); if (PSPELL_PWL) { pspell_config_personal($pspell_config, PSPELL_PWL); } if (PSPELL_REPL) { pspell_config_repl($pspell_config, PSPELL_REPL); } $pspell = pspell_new_config($pspell_config); foreach ($words as $value) { // SplitPagename $value if (!pspell_check($pspell, $value)) { $misspelled[] = $value; } } foreach ($misspelled as $value) { $return[$value] = pspell_suggest($pspell, $value); } return $return; }