PHP pt_stemmer Beispiele

Programmiersprache: PHP

Klasse / Typ: pt_stemmer

Beispiele auf hotexamples.com: 2

PHP pt_stemmer - 2 Beispiele gefunden. Dies sind die am besten bewerteten PHP Beispiele für die pt_stemmer, die aus Open Source-Projekten extrahiert wurden. Sie können Beispiele bewerten, um die Qualität der Beispiele zu verbessern.

Häufig verwendete Methoden

Anzeigen Verbergen

endsin(1)

endsinArr(1)

getNextConsonantPos(1)

getNextVowelPos(1)

is_vowel(1)

removeAccent(1)

stem(1)

Beispiel #1

Datei anzeigen

Datei: commonfuncs.php Projekt: hackersforcharity/rachelpiOS

function stem_word($word, $type)
{
    global $debug, $stem_words, $stem_dir, $min_word_length, $common;
    //if ($debug == '2') echo "\r\n\r\n<br /> unstemmed: $word<br />\r\n";
    //  no stemming for too short words or words containing some special characters
    if (strlen($word) < $min_word_length || preg_match("/[\\*\\!:]|[0-9]/si", $word)) {
        return $word;
    }
    if ($stem_words == 'bg') {
        require_once "{$stem_dir}/bg_stem.php";
        $word1 = bg_stemmer::stem($word);
    }
    if ($stem_words == 'cz') {
        require_once "{$stem_dir}/cz_stem.php";
        $word1 = cz_stemmer::stem($word);
    }
    if ($stem_words == 'de') {
        require_once "{$stem_dir}/de_stem.php";
        $word1 = de_stemmer::stem($word);
    }
    if ($stem_words == 'el') {
        require_once "{$stem_dir}/el_stem.php";
        $stemmer = new el_stemmer();
        $word1 = $stemmer->stem($word);
    }
    if ($stem_words == 'en') {
        require_once "{$stem_dir}/en_stem.php";
        // JFIELD jfield
        // make all special characters ascii (for english only)
        // NOTE: to kill a word completely, return void
        setlocale(LC_CTYPE, 'en_GB');
        $word = iconv('UTF-8', 'ASCII//TRANSLIT', $word);
        // does most of the magic
        $word = preg_replace("/[^\\w\\s]/", "", $word);
        // clean up a few weird things,
        // like umlauts becoming double quotes (!?)
        // END JFIELD end jfield
        $word1 = en_stemmer::stem($word);
        if ($word1 == "informal") {
            echo "<h1>'{$word}' - '{$word1}'</h1>";
            exit;
        }
    }
    if ($stem_words == 'es') {
        require_once "{$stem_dir}/es_stem.php";
        $word1 = es_stemmer::stem($word);
    }
    if ($stem_words == 'fi') {
        require_once "{$stem_dir}/fi_stem.php";
        $word1 = fi_stemmer::stem($word);
    }
    if ($stem_words == 'fr') {
        require_once "{$stem_dir}/fr_stem.php";
        $word1 = fr_stemmer::stem($word);
    }
    if ($stem_words == 'hu') {
        require_once "{$stem_dir}/hu_stem.php";
        $word1 = hu_stemmer::stem($word);
    }
    if ($stem_words == 'nl') {
        require_once "{$stem_dir}/nl_stem.php";
        $word1 = nl_stemmer::stem($word);
    }
    if ($stem_words == 'it') {
        require_once "{$stem_dir}/it_stem.php";
        $stemmer = new it_stemmer();
        $word1 = $stemmer->stem($word);
    }
    if ($stem_words == 'pt') {
        require_once "{$stem_dir}/pt_stem.php";
        $word1 = pt_stemmer::stem($word);
    }
    if ($stem_words == 'ru') {
        require_once "{$stem_dir}/ru_stem.php";
        $word1 = ru_stemmer::stem($word);
    }
    if ($stem_words == 'se') {
        require_once "{$stem_dir}/se_stem.php";
        $word1 = se_stemmer::stem($word);
    }
    //  Hopefully the stemmed word did not become too short
    //  and the stemming algorithm did not create a common word
    // JFIELD doesn't think we should undo stemming for common words
    // because that's f*****g stupid - instead discard the whole word
    if (strlen($word1) < $min_word_length || $common[$word1]) {
        return;
    }
    //if ($debug == '2') echo "\r\n\r\n<br /> &nbsp;&nbsp;&nbsp;stemmed: $word<br />\r\n";
    return $word1;
}

Beispiel #2

Datei anzeigen

Datei: pt_stem.php Projekt: hackersforcharity/rachelpiOS

 function stem($word)
 {
     //$word = lower_case($word);
     // � and � should be treated as a vowel followed by a consonant
     $word = str_replace('�', 'a~', $word);
     $word = str_replace('�', '~o', $word);
     $len = strlen($word);
     if ($len <= 2) {
         return $word;
     }
     $r1 = $r2 = $rv = $len;
     //R1 is the region after the first non-vowel following a vowel, or is the null region at the end of the word if there is no such non-vowel.
     for ($i = 0; $i < $len - 1 && $r1 == $len; $i++) {
         if (pt_stemmer::is_vowel($word[$i]) && !pt_stemmer::is_vowel($word[$i + 1])) {
             $r1 = $i + 2;
         }
     }
     //R2 is the region after the first non-vowel following a vowel in R1, or is the null region at the end of the word if there is no such non-vowel.
     for ($i = $r1; $i < $len - 1 && $r2 == $len; $i++) {
         if (pt_stemmer::is_vowel($word[$i]) && !pt_stemmer::is_vowel($word[$i + 1])) {
             $r2 = $i + 2;
         }
     }
     if ($len > 3) {
         if (!pt_stemmer::is_vowel($word[1])) {
             // If the second letter is a consonant, RV is the region after the next following vowel
             $rv = pt_stemmer::getNextVowelPos($word, 2) + 1;
         } elseif (pt_stemmer::is_vowel($word[0]) && pt_stemmer::is_vowel($word[1])) {
             // or if the first two letters are vowels, RV is the region after the next consonant
             $rv = pt_stemmer::getNextConsonantPos($word, 2) + 1;
         } else {
             //otherwise (consonant-vowel case) RV is the region after the third letter. But RV is the end of the word if these positions cannot be found.
             $rv = 3;
         }
     }
     $r1_txt = substr($word, $r1);
     $r2_txt = substr($word, $r2);
     $rv_txt = substr($word, $rv);
     $word_orig = $word;
     //  Step 1: Standard ending removal
     if (($suf = pt_stemmer::endsinArr($r2_txt, array('amentos', 'imentos', 'amento', 'imento', 'adoras', 'adores', 'a�o~es', 'ismos', 'istas', 'adora', 'a�a~o', 'antes', '�ncia', 'ezas', 'icos', 'icas', 'ismo', '�vel', '�vel', 'ista', 'osos', 'osas', 'ador', 'ante', 'eza', 'ico', 'ica', 'oso', 'osa'))) != '') {
         $word = substr($word, 0, -strlen($suf));
         # rule1
     } elseif (($suf = pt_stemmer::endsinArr($r2_txt, array('log�a', 'log�as'))) != '') {
         $word = substr($word, 0, -strlen($suf)) . 'log';
     } elseif (($suf = pt_stemmer::endsinArr($r2_txt, array('uci�n', 'uciones'))) != '') {
         $word = substr($word, 0, -strlen($suf)) . 'u';
     } elseif (($suf = pt_stemmer::endsinArr($r2_txt, array('�ncia', '�ncias'))) != '') {
         $word = substr($word, 0, -strlen($suf)) . 'ente';
     } elseif (($suf = pt_stemmer::endsinArr($r2_txt, array('ativamente', 'ivamente', 'osamente', 'icamente', 'adamente'))) != '') {
         $word = substr($word, 0, -strlen($suf));
     } elseif (($suf = pt_stemmer::endsinArr($r1_txt, array('amente'))) != '') {
         $word = substr($word, 0, -strlen($suf));
     } elseif (($suf = pt_stemmer::endsinArr($r2_txt, array('antemente', 'avelmente', '�velmente', 'mente'))) != '') {
         $word = substr($word, 0, -strlen($suf));
     } elseif (($suf = pt_stemmer::endsinArr($r2_txt, array('abilidade', 'abilidades', 'icidade', 'icidades', 'ividad', 'ividades', 'idade', 'idades'))) != '') {
         $word = substr($word, 0, -strlen($suf));
     } elseif (($suf = pt_stemmer::endsinArr($r2_txt, array('ativa', 'ativo', 'ativas', 'ativos', 'iva', 'ivo', 'ivas', 'ivos'))) != '') {
         $word = substr($word, 0, -strlen($suf));
     } elseif (($suf = pt_stemmer::endsinArr($rv_txt, array('eira', 'eiras'))) != '') {
         $word = substr($word, 0, -strlen($suf)) . 'ir';
     }
     if ($word != $word_after0) {
         $r1_txt = substr($word, $r1);
         $r2_txt = substr($word, $r2);
         $rv_txt = substr($word, $rv);
     }
     $stem = $word;
     if ($stem == $word_orig) {
         // Do step 2 if no ending was removed by step 1: now remove verb suffixes
         if ($suf = pt_stemmer::endsinArr($rv_txt, array('ar�amos', 'er�amos', 'ir�amos', '�ssemos', '�ssemos', '�ssemos', 'ar�eis', 'er�eis', 'ir�eis', '�sseis', '�sseis', '�sseis', '�ramos', '�ramos', '�ramos', '�vamos', 'aremos', 'eremos', 'iremos', 'ariam', 'eriam', 'iriam', 'assem', 'essem', 'issem', 'ara~o', 'era~o', 'ira~o', 'arias', 'erias', 'irias', 'ardes', 'erdes', 'irdes', 'asses', 'esses', 'isses', 'astes', 'estes', 'istes', '�reis', 'areis', '�reis', 'ereis', '�reis', 'ireis', '�veis', '�amos', 'armos', 'ermos', 'irmos', 'aria', 'eria', 'iria', 'asse', 'esse', 'isse', 'aste', 'este', 'iste', 'arei', 'erei', 'irei', 'aram', 'eram', 'iram', 'avam', 'arem', 'erem', 'irem', 'ando', 'endo', 'indo', 'adas', 'idas', 'ar�s', 'aras', 'er�s', 'eras', 'ir�s', 'avas', 'ares', 'eres', 'ires', '�eis', 'ados', 'idos', '�mos', 'amos', 'emos', 'imos', 'iras', 'ada', 'ida', 'ar�', 'ara', 'er�', 'era', 'ir�', 'ava', 'iam', 'ado', 'ido', 'ias', 'ais', 'eis', 'ira', 'ia', 'ei', 'am', 'em', 'ar', 'er', 'ir', 'as', 'es', 'is', 'eu', 'iu', 'ou'))) {
             $word = substr($word, 0, -strlen($suf));
         }
         if ($word != $word_after1) {
             $r1_txt = substr($word, $r1);
             $r2_txt = substr($word, $r2);
             $rv_txt = substr($word, $rv);
         }
         $stem = $word;
     }
     if ($stem != $word_orig) {
         //  Step 3
         if (pt_stemmer::endsin($rv_txt, 'ci')) {
             $word = substr($word, 0, -1);
             $r1_txt = substr($word, $r1);
             $r2_txt = substr($word, $r2);
             $rv_txt = substr($word, $rv);
         }
     } else {
         // Step 4 conditioned
         if (($suf = pt_stemmer::endsinArr($rv_txt, array('os', 'a', 'i', 'o', '�', '�', '�'))) != '') {
             $word = substr($word, 0, -strlen($suf));
             $r1_txt = substr($word, $r1);
             $r2_txt = substr($word, $r2);
             $rv_txt = substr($word, $rv);
         }
     }
     // Always perform step 5
     if (($suf = pt_stemmer::endsinArr($rv_txt, array(' cie', ' ci�', ' ci�', 'gue', 'gu�', 'gu�', 'e', '�', '�'))) != '') {
         $word = substr($word, 0, -strlen($suf));
     }
     $word = str_replace('a~', '�', $word);
     $word = str_replace('~o', '�', $word);
     return pt_stemmer::removeAccent($word);
 }