public function __construct($params) { $this->lang = (isset($params["lang"]) && $params["lang"] != '') ? $params["lang"] : 'en'; $this->skip_len = $params["skip_length"]; $this->pspell = (function_exists('pspell_config_create') && ($params["use_pspell"] == "Y")); //$this->custom_spell = $params["use_custom_spell"] == "Y"; $this->custom_spell = false; $this->pspell_mode = $params["mode"]; $this->dics_path = $this->checkDicPath(); $this->user_dics_path = $this->dics_path."/custom.pws"; $this->custom_dics_path = $this->dics_path.'/custom_dics/'.$this->lang.'_'; if($this->custom_spell) { $this->dic = array(); } if ($this->pspell) { $pspell_config = pspell_config_create ($this->lang, null, null, 'utf-8'); pspell_config_ignore($pspell_config, $this->skip_len); pspell_config_mode($pspell_config, $params["mode"]); pspell_config_personal($pspell_config, $this->user_dics_path); $this->pspell_link = pspell_new_config($pspell_config); } }
/** * Creates a new Pspell spell checker * * @param string $language the language used by this spell checker. This * should be a two-letter ISO 639 language code * followed by an optional two digit ISO 3166 * country code separated by a dash or underscore. * For example, 'en', 'en-CA' and 'en_CA' are * valid languages. * @param string $personal_wordlist optional. The filename of the personal * wordlist for this spell checker. If not * specified, no personal wordlist is * used. The personal wordlist may contain * spellings for words that are correct * but are not in the regular dictionary. * * @throws NateGoSearchException if the Pspell extension is not available. * @throws NateGoSearchtException if a dictionary in the specified language * could not be loaded. */ public function __construct($language, $path_to_data = '', $repl_pairs = '', $personal_wordlist = '') { if (!extension_loaded('pspell')) { throw new NateGoSearchException('The Pspell PHP extension is ' . 'required for NateGoSearchPSpellSpellChecker.'); } $config = pspell_config_create($language, '', '', 'utf-8'); pspell_config_mode($config, PSPELL_FAST); if ($path_to_data != '') { pspell_config_data_dir($config, $path_to_data); pspell_config_dict_dir($config, $path_to_data); } if ($repl_pairs != '') { pspell_config_repl($config, $repl_pairs); } if ($personal_wordlist != '') { pspell_config_personal($config, $personal_wordlist); if (file_exists($personal_wordlist) && fileowner($personal_wordlist) == posix_getuid()) { // update permissions (-rw-rw----) chmod($personal_wordlist, 0666); } $this->personal_wordlist = $personal_wordlist; } $this->dictionary = pspell_new_config($config); if ($this->dictionary === false) { throw new NateGoSearchException(sprintf("Could not create Pspell dictionary with language '%s'.", $this->language)); } $this->loadBlacklistedSuggestions(); }
function pspellConfig() { $pspell_config = pspell_config_create($this->lang); pspell_config_ignore($pspell_config, $this->skip_len); pspell_config_mode($pspell_config, $this->mode); pspell_config_personal($pspell_config, $this->personal_path); $this->pspell_link = pspell_new_config($pspell_config); }
function init() { $link = pspell_config_create($this->langcode); pspell_config_ignore($link, $this->ignore); if ($this->modus == 0) { pspell_config_mode($link, PSPELL_FAST); } elseif ($this->modus == 2) { pspell_config_mode($link, PSPELL_BAD_SPELLERS); } else { pspell_config_mode($link, PSPELL_NORMAL); } $this->resid = @pspell_new_config($link); if (!$this->resid) { $this->errormsg = 'Could not open dictionary "' . $this->langcode . '"'; } }
function check($word) { $pspell_config = pspell_config_create("ru", "", "", "UTF-8"); pspell_config_mode($pspell_config, PSPELL_FAST); $pspell_link = pspell_new_config($pspell_config); if (!pspell_check($pspell_link, $word)) { $arr = array(); $suggestions = pspell_suggest($pspell_link, $word); foreach ($suggestions as $suggestion) { array_push($arr, $suggestion); } $json = json_encode($arr); } else { $json = true; } echo $json; }
/** * Set aspell options. * */ function pspell_new($lang = "en", $spelling = "", $jargon = "", $enc = "utf-8", $mode = 0) { global $__pspell; $i = count($__pspell); $__pspell[$i] = array_merge($__pspell[0], array("lang" => $lang, "variety" => $spelling, "jargon" => $jargon, "encoding" => $enc)); if ($mode) { pspell_config_mode($i, $mode); } return $i; }
static function getLibrary() { global $IP; $pspell_config = pspell_config_create("en", 'american'); pspell_config_mode($pspell_config, PSPELL_FAST); //no longer using the custom dictionary //pspell_config_personal($pspell_config, $IP . wikiHowDictionary::DICTIONARY_LOC); $pspell_link = pspell_new_config($pspell_config); return $pspell_link; }
function init_spell($type, $dict) { $pspell_config = pspell_config_create($dict); pspell_config_mode($pspell_config, $type); pspell_config_personal($pspell_config, $GLOBALS['FORUM_SETTINGS_PATH'] . "forum.pws"); pspell_config_ignore($pspell_config, 2); define('__FUD_PSPELL_LINK__', pspell_new_config($pspell_config)); return true; }
//Set whether users are allowed to enter custom text // THIS OPTION IS CURRENTLY UNDER TESTING $allowCustomInserts = false; // If using a personal dictionary, set the path to it. Default is in the // personal_dictionary subdirectory of the location of spell_checker.php. $path_to_personal_dictionary = dirname(__FILE__) . "/personal_dictionary/personal_dictionary.txt"; //If pspell doesn't exist, then include the pspell wrapper for aspell. if (!function_exists('pspell_suggest')) { // Set the path to aspell if you need to use it. define('ASPELL_BIN', '/usr/bin/aspell'); require_once "pspell_comp.php"; } // Create and configure a link to the pspell module. $pspell_config = pspell_config_create("es"); //$pspell_config = pspell_new("en"); pspell_config_mode($pspell_config, PSPELL_FAST); if ($usePersonalDict) { // Allows the use of a custom dictionary (Thanks to Dylan Thurston for this addition). //pspell_config_personal($pspell_config, $path_to_personal_dictionary); } $pspell_link = pspell_new_config($pspell_config); require_once "cpaint/cpaint2.inc.php"; //AJAX library file $cp = new cpaint(); $cp->register('showSuggestions'); $cp->register('spellCheck'); $cp->register('switchText'); $cp->register('addWord'); $cp->start(); $cp->return_data(); /*************************************************************
function parseStringP($txt, $dialects) { global $errorstring, $parameters; $dialect = "british"; if (isset($dialects[1]) && $dialects[1] != "") { $dialect = $dialects[1]; $lang = $dialects[0]; } //pspell parse initialisation //$psp_conf = pspell_config_create ("en", $dialect); $psp_conf = pspell_config_create($lang); pspell_config_runtogether($psp_conf, false); pspell_config_mode($psp_conf, PSPELL_NORMAL); $psp = pspell_new_config($psp_conf); //join the string so that it is easier to parse - does not matter when formatting because it is html $txt = str_replace("\r\n", " ", $txt); $txt = str_replace("\r", " ", $txt); $txt = str_replace("\n", " ", $txt); $jstext = ""; $jsarrindex = 0; //split string by html tags $regexp = "#(<[^>]*>)#"; $arr = preg_split($regexp, $txt, -1, PREG_SPLIT_OFFSET_CAPTURE | PREG_SPLIT_DELIM_CAPTURE); foreach ($arr as $comp) { $arr2[] = $comp[0]; } //we now have the $arr2 - containing html tags and plain text portions for ($i = 0; $i < count($arr2); $i++) { $portion = $arr2[$i]; //the part if (strstr($portion, "<a")) { //we have a link //$arr2[$i] = str_replace('href', 'href1', $arr2[$i]); //replace so when clicking on the content we do not redirect } if (!strstr($portion, "<") && $portion != "") { //we do not work on html portions and empty text - which should be resolved in the preg_split function $portion = str_replace(" ", " ", $portion); //replace for parsing //get distinct words from string portion $words = explode(" ", $portion); $arr2[$i] = $words; for ($j = 0; $j < count($arr2[$i]); $j++) { $word = $arr2[$i][$j]; //we check if the "word" is really a word $cond = true; //is it a link address ? if (strstr($word, "http://")) { $cond = false; } /* //is it a number? if (preg_match("/^[0-9\.%]*$/", $word)) { $cond = false; } //does it begin with a capital letter ? if (ucwords($word) == $word) { //$cond = false; } */ if ($cond) { $regexp3 = "/([^a-zA-Z]*)([a-zA-Z']*)([^a-zA-Z]*)/"; //get the actual words - we can have strings like ",something.". We separate in 3 portions preg_match($regexp3, $word, $mt); $beforeword = $mt[1]; $inword = $mt[2]; if (addedWord($inword)) { continue; } $afterword = $mt[3]; if (!pspell_check($psp, $inword)) { $sugs = pspell_suggest($psp, $inword); if (count($sugs) > 0) { //build the option string //$arr2[$i][$j] = $beforeword.'<span onclick="fillOptions(this)" options="'.implode('|', $sugs).'" class="havesuggestion">'.$inword.'</span>'.$afterword; $arr2[$i][$j] = $beforeword . '<span badword=1>' . $inword . '</span>' . $afterword; $jstext .= "\nwords[" . $jsarrindex . "] = new Object();"; $jstext .= "\nwords[" . $jsarrindex . "].status = 0;"; $jstext .= "\nwords[" . $jsarrindex . "].word = \"" . $word . "\";"; $jstext .= "\nwords[" . $jsarrindex . "].suggestions = new Array();"; $jsarrindex2 = 0; foreach ($sugs as $suggestion) { $jstext .= "\nwords[" . $jsarrindex . "].suggestions[" . $jsarrindex2 . "] = \"" . str_replace(array("\r", "\n"), array("", ""), $suggestion) . "\";"; $jsarrindex2++; } $jsarrindex++; } else { //build the option string with red so that we know there is no replacement //$arr2[$i][$j] = $beforeword.'<span onclick="fillOptions(this)" options="'.implode('|', $sugs).'" class="nosuggestion">'.$inword.'</span>'.$afterword; $arr2[$i][$j] = $beforeword . '<span badword=1>' . $inword . '</span>' . $afterword; $jstext .= "\nwords[" . $jsarrindex . "] = new Object();"; $jstext .= "\nwords[" . $jsarrindex . "].status = 0;"; $jstext .= "\nwords[" . $jsarrindex . "].word = \"" . $word . "\";"; $jstext .= "\nwords[" . $jsarrindex . "].suggestions = new Array();"; $jsarrindex2 = 0; foreach ($sugs as $suggestion) { $jstext .= "\nwords[" . $jsarrindex . "].suggestions[" . $jsarrindex2 . "] = \"" . str_replace(array("\r", "\n"), array("", ""), $suggestion) . "\";"; $jsarrindex2++; } $jsarrindex++; } } } } } } //implosion of the parts to reconstitute the string $toret = ""; foreach ($arr2 as $portion) { if (is_array($portion)) { $toret .= implode(" ", $portion); } else { $toret .= $portion; } } //$toret = str_replace("\"", """, $toret); return array($toret, $jstext); }
/** * Konstrukor zapewniaj±cy ogóln± inicjalizacjê systemu przygotowywania danych: * tokenizer, korekta ortograficzna, uzupe³nianie polskich znaków, wulgaryzmy. * * @param mysqli $dbconn Obiekt po³±czenia z baz± danych u¿ywany w podklasach. * @param string $dictdir Folder ze s³ownikami, stoplistami itp. * @param int $idc Identyfikator wykorzystywanego zestawu komentarzy. * @param bool $copy_unknown Czy pozostawiaæ nierozpoznane wyrazy? * @param array $options Parametry konkretnego klasyfikatora jako tab. asocjacyjna. */ function __construct($dbconn, $dictdir, $idc, $copy_unknown, $options = null) { $this->idc = $idc; $this->copy_unknown = $copy_unknown; $this->dbconn = $dbconn; if (is_null(self::$validation)) { self::$validation = new Validation($dictdir . '/vulgarism.txt'); } if (is_null(self::$tokenizer)) { self::$tokenizer = new Tokenizer($dictdir . '/stoplist.txt'); } if (is_null(self::$fsaa)) { self::$fsaa = new Fsaa($dictdir . '/lort_acc_full.fsa'); } if (is_null(self::$fsal)) { self::$fsal = new Fsal($dictdir . '/llems_full.fsa'); } if (is_null(self::$pspell)) { $pspell_config = pspell_config_create("pl"); // opcje zapewniaj±ce wiêksz± szybko¶æ dzia³ania aspell pspell_config_ignore($pspell_config, 4); pspell_config_mode($pspell_config, PSPELL_FAST); pspell_config_runtogether($pspell_config, false); self::$pspell = pspell_new_config($pspell_config); } }