Пример #1
0
function stem_word($word, $type)
{
    global $debug, $stem_words, $stem_dir, $min_word_length, $common;
    //if ($debug == '2') echo "\r\n\r\n<br /> unstemmed: $word<br />\r\n";
    //  no stemming for too short words or words containing some special characters
    if (strlen($word) < $min_word_length || preg_match("/[\\*\\!:]|[0-9]/si", $word)) {
        return $word;
    }
    if ($stem_words == 'bg') {
        require_once "{$stem_dir}/bg_stem.php";
        $word1 = bg_stemmer::stem($word);
    }
    if ($stem_words == 'cz') {
        require_once "{$stem_dir}/cz_stem.php";
        $word1 = cz_stemmer::stem($word);
    }
    if ($stem_words == 'de') {
        require_once "{$stem_dir}/de_stem.php";
        $word1 = de_stemmer::stem($word);
    }
    if ($stem_words == 'el') {
        require_once "{$stem_dir}/el_stem.php";
        $stemmer = new el_stemmer();
        $word1 = $stemmer->stem($word);
    }
    if ($stem_words == 'en') {
        require_once "{$stem_dir}/en_stem.php";
        // JFIELD jfield
        // make all special characters ascii (for english only)
        // NOTE: to kill a word completely, return void
        setlocale(LC_CTYPE, 'en_GB');
        $word = iconv('UTF-8', 'ASCII//TRANSLIT', $word);
        // does most of the magic
        $word = preg_replace("/[^\\w\\s]/", "", $word);
        // clean up a few weird things,
        // like umlauts becoming double quotes (!?)
        // END JFIELD end jfield
        $word1 = en_stemmer::stem($word);
        if ($word1 == "informal") {
            echo "<h1>'{$word}' - '{$word1}'</h1>";
            exit;
        }
    }
    if ($stem_words == 'es') {
        require_once "{$stem_dir}/es_stem.php";
        $word1 = es_stemmer::stem($word);
    }
    if ($stem_words == 'fi') {
        require_once "{$stem_dir}/fi_stem.php";
        $word1 = fi_stemmer::stem($word);
    }
    if ($stem_words == 'fr') {
        require_once "{$stem_dir}/fr_stem.php";
        $word1 = fr_stemmer::stem($word);
    }
    if ($stem_words == 'hu') {
        require_once "{$stem_dir}/hu_stem.php";
        $word1 = hu_stemmer::stem($word);
    }
    if ($stem_words == 'nl') {
        require_once "{$stem_dir}/nl_stem.php";
        $word1 = nl_stemmer::stem($word);
    }
    if ($stem_words == 'it') {
        require_once "{$stem_dir}/it_stem.php";
        $stemmer = new it_stemmer();
        $word1 = $stemmer->stem($word);
    }
    if ($stem_words == 'pt') {
        require_once "{$stem_dir}/pt_stem.php";
        $word1 = pt_stemmer::stem($word);
    }
    if ($stem_words == 'ru') {
        require_once "{$stem_dir}/ru_stem.php";
        $word1 = ru_stemmer::stem($word);
    }
    if ($stem_words == 'se') {
        require_once "{$stem_dir}/se_stem.php";
        $word1 = se_stemmer::stem($word);
    }
    //  Hopefully the stemmed word did not become too short
    //  and the stemming algorithm did not create a common word
    // JFIELD doesn't think we should undo stemming for common words
    // because that's f*****g stupid - instead discard the whole word
    if (strlen($word1) < $min_word_length || $common[$word1]) {
        return;
    }
    //if ($debug == '2') echo "\r\n\r\n<br /> &nbsp;&nbsp;&nbsp;stemmed: $word<br />\r\n";
    return $word1;
}
Пример #2
0
<?php 
error_reporting(E_ERROR | E_PARSE);
echo "Hello\n";
echo "'" . en_stemmer::stem("informal") . "'\n";
echo "Goodbye\n";
/* o------------------------------------------------------------------------------o
 *
 *  This script is based on Martin Porter's stemming algorithm.
 *   First PHP implementation by Jon Abernathy
 *  Improvements,  PHP5 implementation and adapted for Sphider-plus application
 *   by Rolf Kellner [Tec] March 2010
 *
 * o------------------------------------------------------------------------------o */
class en_stemmer
{
    public function stem($word)
    {
        if (strlen($word) > 2) {
            //$word = lower_case($word);
            $word = self::step_1($word);
            $word = self::step_2($word);
            $word = self::step_3($word);
            $word = self::step_4($word);
            $word = self::step_5($word);
        }
        return $word;
    }
    //  Step1, if the word is in plural form, it is reduced to singular form.
    //  Then, any -ed or -ing endings are removed as appropriate, and finally,
    //  words ending in "y" with a vowel in the stem have the "y" changed to "i".
    function step_1($word)