Beispiel #1
0
	public function __construct($params)
	{
		$this->lang = (isset($params["lang"]) && $params["lang"] != '') ? $params["lang"] : 'en';
		$this->skip_len = $params["skip_length"];

		$this->pspell = (function_exists('pspell_config_create') && ($params["use_pspell"] == "Y"));
		//$this->custom_spell = $params["use_custom_spell"] == "Y";
		$this->custom_spell = false;
		$this->pspell_mode = $params["mode"];

		$this->dics_path = $this->checkDicPath();
		$this->user_dics_path = $this->dics_path."/custom.pws";
		$this->custom_dics_path = $this->dics_path.'/custom_dics/'.$this->lang.'_';

		if($this->custom_spell)
		{
			$this->dic = array();
		}

		if ($this->pspell)
		{
			$pspell_config = pspell_config_create ($this->lang, null, null, 'utf-8');
			pspell_config_ignore($pspell_config, $this->skip_len);
			pspell_config_mode($pspell_config, $params["mode"]);
			pspell_config_personal($pspell_config, $this->user_dics_path);
			$this->pspell_link = pspell_new_config($pspell_config);
		}
	}
 /**
  * Creates a new Pspell spell checker
  *
  * @param string $language the language used by this spell checker. This
  *                          should be a two-letter ISO 639 language code
  *                          followed by an optional two digit ISO 3166
  *                          country code separated by a dash or underscore.
  *                          For example, 'en', 'en-CA' and 'en_CA' are
  *                          valid languages.
  * @param string $personal_wordlist optional. The filename of the personal
  *                                   wordlist for this spell checker. If not
  *                                   specified, no personal wordlist is
  *                                   used. The personal wordlist may contain
  *                                   spellings for words that are correct
  *                                   but are not in the regular dictionary.
  *
  * @throws NateGoSearchException if the Pspell extension is not available.
  * @throws NateGoSearchtException if a dictionary in the specified language
  *                                could not be loaded.
  */
 public function __construct($language, $path_to_data = '', $repl_pairs = '', $personal_wordlist = '')
 {
     if (!extension_loaded('pspell')) {
         throw new NateGoSearchException('The Pspell PHP extension is ' . 'required for NateGoSearchPSpellSpellChecker.');
     }
     $config = pspell_config_create($language, '', '', 'utf-8');
     pspell_config_mode($config, PSPELL_FAST);
     if ($path_to_data != '') {
         pspell_config_data_dir($config, $path_to_data);
         pspell_config_dict_dir($config, $path_to_data);
     }
     if ($repl_pairs != '') {
         pspell_config_repl($config, $repl_pairs);
     }
     if ($personal_wordlist != '') {
         pspell_config_personal($config, $personal_wordlist);
         if (file_exists($personal_wordlist) && fileowner($personal_wordlist) == posix_getuid()) {
             // update permissions (-rw-rw----)
             chmod($personal_wordlist, 0666);
         }
         $this->personal_wordlist = $personal_wordlist;
     }
     $this->dictionary = pspell_new_config($config);
     if ($this->dictionary === false) {
         throw new NateGoSearchException(sprintf("Could not create Pspell dictionary with language '%s'.", $this->language));
     }
     $this->loadBlacklistedSuggestions();
 }
 function pspellConfig()
 {
     $pspell_config = pspell_config_create($this->lang);
     pspell_config_ignore($pspell_config, $this->skip_len);
     pspell_config_mode($pspell_config, $this->mode);
     pspell_config_personal($pspell_config, $this->personal_path);
     $this->pspell_link = pspell_new_config($pspell_config);
 }
 function init()
 {
     $link = pspell_config_create($this->langcode);
     pspell_config_ignore($link, $this->ignore);
     if ($this->modus == 0) {
         pspell_config_mode($link, PSPELL_FAST);
     } elseif ($this->modus == 2) {
         pspell_config_mode($link, PSPELL_BAD_SPELLERS);
     } else {
         pspell_config_mode($link, PSPELL_NORMAL);
     }
     $this->resid = @pspell_new_config($link);
     if (!$this->resid) {
         $this->errormsg = 'Could not open dictionary "' . $this->langcode . '"';
     }
 }
Beispiel #5
0
function check($word)
{
    $pspell_config = pspell_config_create("ru", "", "", "UTF-8");
    pspell_config_mode($pspell_config, PSPELL_FAST);
    $pspell_link = pspell_new_config($pspell_config);
    if (!pspell_check($pspell_link, $word)) {
        $arr = array();
        $suggestions = pspell_suggest($pspell_link, $word);
        foreach ($suggestions as $suggestion) {
            array_push($arr, $suggestion);
        }
        $json = json_encode($arr);
    } else {
        $json = true;
    }
    echo $json;
}
Beispiel #6
0
 /**
  * Set aspell options.
  *
  */
 function pspell_new($lang = "en", $spelling = "", $jargon = "", $enc = "utf-8", $mode = 0)
 {
     global $__pspell;
     $i = count($__pspell);
     $__pspell[$i] = array_merge($__pspell[0], array("lang" => $lang, "variety" => $spelling, "jargon" => $jargon, "encoding" => $enc));
     if ($mode) {
         pspell_config_mode($i, $mode);
     }
     return $i;
 }
 static function getLibrary()
 {
     global $IP;
     $pspell_config = pspell_config_create("en", 'american');
     pspell_config_mode($pspell_config, PSPELL_FAST);
     //no longer using the custom dictionary
     //pspell_config_personal($pspell_config, $IP . wikiHowDictionary::DICTIONARY_LOC);
     $pspell_link = pspell_new_config($pspell_config);
     return $pspell_link;
 }
function init_spell($type, $dict)
{
    $pspell_config = pspell_config_create($dict);
    pspell_config_mode($pspell_config, $type);
    pspell_config_personal($pspell_config, $GLOBALS['FORUM_SETTINGS_PATH'] . "forum.pws");
    pspell_config_ignore($pspell_config, 2);
    define('__FUD_PSPELL_LINK__', pspell_new_config($pspell_config));
    return true;
}
Beispiel #9
0
//Set whether users are allowed to enter custom text
// THIS OPTION IS CURRENTLY UNDER TESTING
$allowCustomInserts = false;
// If using a personal dictionary, set the path to it.  Default is in the
// personal_dictionary subdirectory of the location of spell_checker.php.
$path_to_personal_dictionary = dirname(__FILE__) . "/personal_dictionary/personal_dictionary.txt";
//If pspell doesn't exist, then include the pspell wrapper for aspell.
if (!function_exists('pspell_suggest')) {
    // Set the path to aspell if you need to use it.
    define('ASPELL_BIN', '/usr/bin/aspell');
    require_once "pspell_comp.php";
}
// Create and configure a link to the pspell module.
$pspell_config = pspell_config_create("es");
//$pspell_config = pspell_new("en");
pspell_config_mode($pspell_config, PSPELL_FAST);
if ($usePersonalDict) {
    // Allows the use of a custom dictionary (Thanks to Dylan Thurston for this addition).
    //pspell_config_personal($pspell_config, $path_to_personal_dictionary);
}
$pspell_link = pspell_new_config($pspell_config);
require_once "cpaint/cpaint2.inc.php";
//AJAX library file
$cp = new cpaint();
$cp->register('showSuggestions');
$cp->register('spellCheck');
$cp->register('switchText');
$cp->register('addWord');
$cp->start();
$cp->return_data();
/*************************************************************
function parseStringP($txt, $dialects)
{
    global $errorstring, $parameters;
    $dialect = "british";
    if (isset($dialects[1]) && $dialects[1] != "") {
        $dialect = $dialects[1];
        $lang = $dialects[0];
    }
    //pspell parse initialisation
    //$psp_conf = pspell_config_create ("en", $dialect);
    $psp_conf = pspell_config_create($lang);
    pspell_config_runtogether($psp_conf, false);
    pspell_config_mode($psp_conf, PSPELL_NORMAL);
    $psp = pspell_new_config($psp_conf);
    //join the string so that it is easier to parse - does not matter when formatting because it is html
    $txt = str_replace("\r\n", " ", $txt);
    $txt = str_replace("\r", " ", $txt);
    $txt = str_replace("\n", " ", $txt);
    $jstext = "";
    $jsarrindex = 0;
    //split string by html tags
    $regexp = "#(<[^>]*>)#";
    $arr = preg_split($regexp, $txt, -1, PREG_SPLIT_OFFSET_CAPTURE | PREG_SPLIT_DELIM_CAPTURE);
    foreach ($arr as $comp) {
        $arr2[] = $comp[0];
    }
    //we now have the $arr2 - containing html tags and plain text portions
    for ($i = 0; $i < count($arr2); $i++) {
        $portion = $arr2[$i];
        //the part
        if (strstr($portion, "<a")) {
            //we have a link
            //$arr2[$i] = str_replace('href', 'href1', $arr2[$i]); //replace so when clicking on the content we do not redirect
        }
        if (!strstr($portion, "<") && $portion != "") {
            //we do not work on html portions and empty text - which should be resolved in the preg_split function
            $portion = str_replace("&nbsp;", " ", $portion);
            //replace for parsing
            //get distinct words from string portion
            $words = explode(" ", $portion);
            $arr2[$i] = $words;
            for ($j = 0; $j < count($arr2[$i]); $j++) {
                $word = $arr2[$i][$j];
                //we check if the "word" is really a word
                $cond = true;
                //is it a link address ?
                if (strstr($word, "http://")) {
                    $cond = false;
                }
                /*
                //is it a number?
                if (preg_match("/^[0-9\.%]*$/", $word)) {
                	$cond = false;
                }
                //does it begin with a capital letter ?
                if (ucwords($word) == $word) {
                	//$cond = false;
                }
                */
                if ($cond) {
                    $regexp3 = "/([^a-zA-Z]*)([a-zA-Z']*)([^a-zA-Z]*)/";
                    //get the actual words - we can have strings like ",something.". We separate in 3 portions
                    preg_match($regexp3, $word, $mt);
                    $beforeword = $mt[1];
                    $inword = $mt[2];
                    if (addedWord($inword)) {
                        continue;
                    }
                    $afterword = $mt[3];
                    if (!pspell_check($psp, $inword)) {
                        $sugs = pspell_suggest($psp, $inword);
                        if (count($sugs) > 0) {
                            //build the option string
                            //$arr2[$i][$j] = $beforeword.'<span onclick="fillOptions(this)" options="'.implode('|', $sugs).'" class="havesuggestion">'.$inword.'</span>'.$afterword;
                            $arr2[$i][$j] = $beforeword . '<span badword=1>' . $inword . '</span>' . $afterword;
                            $jstext .= "\nwords[" . $jsarrindex . "] = new Object();";
                            $jstext .= "\nwords[" . $jsarrindex . "].status = 0;";
                            $jstext .= "\nwords[" . $jsarrindex . "].word = \"" . $word . "\";";
                            $jstext .= "\nwords[" . $jsarrindex . "].suggestions = new Array();";
                            $jsarrindex2 = 0;
                            foreach ($sugs as $suggestion) {
                                $jstext .= "\nwords[" . $jsarrindex . "].suggestions[" . $jsarrindex2 . "] = \"" . str_replace(array("\r", "\n"), array("", ""), $suggestion) . "\";";
                                $jsarrindex2++;
                            }
                            $jsarrindex++;
                        } else {
                            //build the option string with red so that we know there is no replacement
                            //$arr2[$i][$j] = $beforeword.'<span onclick="fillOptions(this)" options="'.implode('|', $sugs).'" class="nosuggestion">'.$inword.'</span>'.$afterword;
                            $arr2[$i][$j] = $beforeword . '<span badword=1>' . $inword . '</span>' . $afterword;
                            $jstext .= "\nwords[" . $jsarrindex . "] = new Object();";
                            $jstext .= "\nwords[" . $jsarrindex . "].status = 0;";
                            $jstext .= "\nwords[" . $jsarrindex . "].word = \"" . $word . "\";";
                            $jstext .= "\nwords[" . $jsarrindex . "].suggestions = new Array();";
                            $jsarrindex2 = 0;
                            foreach ($sugs as $suggestion) {
                                $jstext .= "\nwords[" . $jsarrindex . "].suggestions[" . $jsarrindex2 . "] = \"" . str_replace(array("\r", "\n"), array("", ""), $suggestion) . "\";";
                                $jsarrindex2++;
                            }
                            $jsarrindex++;
                        }
                    }
                }
            }
        }
    }
    //implosion of the parts to reconstitute the string
    $toret = "";
    foreach ($arr2 as $portion) {
        if (is_array($portion)) {
            $toret .= implode(" ", $portion);
        } else {
            $toret .= $portion;
        }
    }
    //$toret = str_replace("\"", "&quot;", $toret);
    return array($toret, $jstext);
}
Beispiel #11
0
 /**
  * Konstrukor zapewniaj±cy ogóln± inicjalizacjê systemu przygotowywania danych:
  * tokenizer, korekta ortograficzna, uzupe³nianie polskich znaków, wulgaryzmy.
  *
  * @param mysqli $dbconn Obiekt po³±czenia z baz± danych u¿ywany w podklasach.
  * @param string $dictdir Folder ze s³ownikami, stoplistami itp.
  * @param int $idc Identyfikator wykorzystywanego zestawu komentarzy.
  * @param bool $copy_unknown Czy pozostawiaæ nierozpoznane wyrazy?
  * @param array $options Parametry konkretnego klasyfikatora jako tab. asocjacyjna.
  */
 function __construct($dbconn, $dictdir, $idc, $copy_unknown, $options = null)
 {
     $this->idc = $idc;
     $this->copy_unknown = $copy_unknown;
     $this->dbconn = $dbconn;
     if (is_null(self::$validation)) {
         self::$validation = new Validation($dictdir . '/vulgarism.txt');
     }
     if (is_null(self::$tokenizer)) {
         self::$tokenizer = new Tokenizer($dictdir . '/stoplist.txt');
     }
     if (is_null(self::$fsaa)) {
         self::$fsaa = new Fsaa($dictdir . '/lort_acc_full.fsa');
     }
     if (is_null(self::$fsal)) {
         self::$fsal = new Fsal($dictdir . '/llems_full.fsa');
     }
     if (is_null(self::$pspell)) {
         $pspell_config = pspell_config_create("pl");
         // opcje zapewniaj±ce wiêksz± szybko¶æ dzia³ania aspell
         pspell_config_ignore($pspell_config, 4);
         pspell_config_mode($pspell_config, PSPELL_FAST);
         pspell_config_runtogether($pspell_config, false);
         self::$pspell = pspell_new_config($pspell_config);
     }
 }