Exemplo n.º 1
0
 function pspell_config_mode($i, $mode)
 {
     global $__pspell;
     $__pspell[$i]["_mode"] = $mode;
     $modes = array(0x0 => "normal", PSPELL_NORMAL => "normal", PSPELL_FAST => "fast", PSPELL_ULTRA => "ultra", PSPELL_BAD_SPELLERS => "bad-spellers");
     $__pspell[$i]["sug-mode"] = $modes[$mode & 0x27];
     pspell_config_runtogether($i, $mode & PSPELL_RUN_TOGETHER);
 }
Exemplo n.º 2
0
function parseStringP($txt, $dialects)
{
    global $errorstring, $parameters;
    $dialect = "british";
    if (isset($dialects[1]) && $dialects[1] != "") {
        $dialect = $dialects[1];
        $lang = $dialects[0];
    }
    //pspell parse initialisation
    //$psp_conf = pspell_config_create ("en", $dialect);
    $psp_conf = pspell_config_create($lang);
    pspell_config_runtogether($psp_conf, false);
    pspell_config_mode($psp_conf, PSPELL_NORMAL);
    $psp = pspell_new_config($psp_conf);
    //join the string so that it is easier to parse - does not matter when formatting because it is html
    $txt = str_replace("\r\n", " ", $txt);
    $txt = str_replace("\r", " ", $txt);
    $txt = str_replace("\n", " ", $txt);
    $jstext = "";
    $jsarrindex = 0;
    //split string by html tags
    $regexp = "#(<[^>]*>)#";
    $arr = preg_split($regexp, $txt, -1, PREG_SPLIT_OFFSET_CAPTURE | PREG_SPLIT_DELIM_CAPTURE);
    foreach ($arr as $comp) {
        $arr2[] = $comp[0];
    }
    //we now have the $arr2 - containing html tags and plain text portions
    for ($i = 0; $i < count($arr2); $i++) {
        $portion = $arr2[$i];
        //the part
        if (strstr($portion, "<a")) {
            //we have a link
            //$arr2[$i] = str_replace('href', 'href1', $arr2[$i]); //replace so when clicking on the content we do not redirect
        }
        if (!strstr($portion, "<") && $portion != "") {
            //we do not work on html portions and empty text - which should be resolved in the preg_split function
            $portion = str_replace("&nbsp;", " ", $portion);
            //replace for parsing
            //get distinct words from string portion
            $words = explode(" ", $portion);
            $arr2[$i] = $words;
            for ($j = 0; $j < count($arr2[$i]); $j++) {
                $word = $arr2[$i][$j];
                //we check if the "word" is really a word
                $cond = true;
                //is it a link address ?
                if (strstr($word, "http://")) {
                    $cond = false;
                }
                /*
                //is it a number?
                if (preg_match("/^[0-9\.%]*$/", $word)) {
                	$cond = false;
                }
                //does it begin with a capital letter ?
                if (ucwords($word) == $word) {
                	//$cond = false;
                }
                */
                if ($cond) {
                    $regexp3 = "/([^a-zA-Z]*)([a-zA-Z']*)([^a-zA-Z]*)/";
                    //get the actual words - we can have strings like ",something.". We separate in 3 portions
                    preg_match($regexp3, $word, $mt);
                    $beforeword = $mt[1];
                    $inword = $mt[2];
                    if (addedWord($inword)) {
                        continue;
                    }
                    $afterword = $mt[3];
                    if (!pspell_check($psp, $inword)) {
                        $sugs = pspell_suggest($psp, $inword);
                        if (count($sugs) > 0) {
                            //build the option string
                            //$arr2[$i][$j] = $beforeword.'<span onclick="fillOptions(this)" options="'.implode('|', $sugs).'" class="havesuggestion">'.$inword.'</span>'.$afterword;
                            $arr2[$i][$j] = $beforeword . '<span badword=1>' . $inword . '</span>' . $afterword;
                            $jstext .= "\nwords[" . $jsarrindex . "] = new Object();";
                            $jstext .= "\nwords[" . $jsarrindex . "].status = 0;";
                            $jstext .= "\nwords[" . $jsarrindex . "].word = \"" . $word . "\";";
                            $jstext .= "\nwords[" . $jsarrindex . "].suggestions = new Array();";
                            $jsarrindex2 = 0;
                            foreach ($sugs as $suggestion) {
                                $jstext .= "\nwords[" . $jsarrindex . "].suggestions[" . $jsarrindex2 . "] = \"" . str_replace(array("\r", "\n"), array("", ""), $suggestion) . "\";";
                                $jsarrindex2++;
                            }
                            $jsarrindex++;
                        } else {
                            //build the option string with red so that we know there is no replacement
                            //$arr2[$i][$j] = $beforeword.'<span onclick="fillOptions(this)" options="'.implode('|', $sugs).'" class="nosuggestion">'.$inword.'</span>'.$afterword;
                            $arr2[$i][$j] = $beforeword . '<span badword=1>' . $inword . '</span>' . $afterword;
                            $jstext .= "\nwords[" . $jsarrindex . "] = new Object();";
                            $jstext .= "\nwords[" . $jsarrindex . "].status = 0;";
                            $jstext .= "\nwords[" . $jsarrindex . "].word = \"" . $word . "\";";
                            $jstext .= "\nwords[" . $jsarrindex . "].suggestions = new Array();";
                            $jsarrindex2 = 0;
                            foreach ($sugs as $suggestion) {
                                $jstext .= "\nwords[" . $jsarrindex . "].suggestions[" . $jsarrindex2 . "] = \"" . str_replace(array("\r", "\n"), array("", ""), $suggestion) . "\";";
                                $jsarrindex2++;
                            }
                            $jsarrindex++;
                        }
                    }
                }
            }
        }
    }
    //implosion of the parts to reconstitute the string
    $toret = "";
    foreach ($arr2 as $portion) {
        if (is_array($portion)) {
            $toret .= implode(" ", $portion);
        } else {
            $toret .= $portion;
        }
    }
    //$toret = str_replace("\"", "&quot;", $toret);
    return array($toret, $jstext);
}
Exemplo n.º 3
0
$cfg_pass = '';
$cfg_db = 'swk';
$cfg_tab = '3';
//////////////////
// po³±cz z baz± danych
$db = DBHelper::connect($cfg_serv, $cfg_user, $cfg_pass, $cfg_db);
// zainicjalizuj pozosta³e klasy
$validation = new Validation('../dict/vulgarism.txt');
$tokenizer = new Tokenizer('../dict/stoplist.txt');
$fsaa = new Fsaa('../dict/lort_acc_full.fsa');
$fsas = new Fsas('../dict/lort_acc_full.fsa');
$fsal = new Fsal('../dict/llems_full.fsa');
$pspell_config = pspell_config_create("pl");
pspell_config_ignore($pspell_config, 4);
pspell_config_mode($pspell_config, PSPELL_FAST);
pspell_config_runtogether($pspell_config, false);
$pspell_link = pspell_new_config($pspell_config);
// uruchomienie stopera, rozpoczêcie zbierania danych czasowych
$stoper = new Timer();
// pobierz zbiór wyników
$res = $db->query("SELECT * FROM comment_{$cfg_tab} WHERE type = 'OK' ORDER BY id");
while ($row = $res->fetch_row()) {
    list($id, $comment, $type) = $row;
    //echo $comment;
    $stoper->set('query');
    // sprawdzenie email i WWW
    if (Validation::findEmail($comment) || Validation::findWWW($comment)) {
        echo 'E' . $id . '--' . $comment . '===' . implode(', ', $tok_comment) . "\n";
        $stoper->set('email');
        continue;
    }
Exemplo n.º 4
0
 /**
  * Konstrukor zapewniaj±cy ogóln± inicjalizacjê systemu przygotowywania danych:
  * tokenizer, korekta ortograficzna, uzupe³nianie polskich znaków, wulgaryzmy.
  *
  * @param mysqli $dbconn Obiekt po³±czenia z baz± danych u¿ywany w podklasach.
  * @param string $dictdir Folder ze s³ownikami, stoplistami itp.
  * @param int $idc Identyfikator wykorzystywanego zestawu komentarzy.
  * @param bool $copy_unknown Czy pozostawiaæ nierozpoznane wyrazy?
  * @param array $options Parametry konkretnego klasyfikatora jako tab. asocjacyjna.
  */
 function __construct($dbconn, $dictdir, $idc, $copy_unknown, $options = null)
 {
     $this->idc = $idc;
     $this->copy_unknown = $copy_unknown;
     $this->dbconn = $dbconn;
     if (is_null(self::$validation)) {
         self::$validation = new Validation($dictdir . '/vulgarism.txt');
     }
     if (is_null(self::$tokenizer)) {
         self::$tokenizer = new Tokenizer($dictdir . '/stoplist.txt');
     }
     if (is_null(self::$fsaa)) {
         self::$fsaa = new Fsaa($dictdir . '/lort_acc_full.fsa');
     }
     if (is_null(self::$fsal)) {
         self::$fsal = new Fsal($dictdir . '/llems_full.fsa');
     }
     if (is_null(self::$pspell)) {
         $pspell_config = pspell_config_create("pl");
         // opcje zapewniaj±ce wiêksz± szybko¶æ dzia³ania aspell
         pspell_config_ignore($pspell_config, 4);
         pspell_config_mode($pspell_config, PSPELL_FAST);
         pspell_config_runtogether($pspell_config, false);
         self::$pspell = pspell_new_config($pspell_config);
     }
 }