Esempi di codice in PHP per Utf8::substr

Esempio n. 1

0

Mostra file

File: English.php Progetto: wamania/php-stemmer

 /**
  * Define a short syllable in a word as either (a) a vowel followed by a non-vowel other than w, x or Y and preceded by a non-vowel,
  *  or * (b) a vowel at the beginning of the word followed by a non-vowel.
  *
  *  So rap, trap, entrap end with a short syllable, and ow, on, at are classed as short syllables.
  *  But uproot, bestow, disturb do not end with a short syllable.
  */
 private function searchShortSyllabe($from, $nbLetters)
 {
     $length = Utf8::strlen($this->word);
     if ($from < 0) {
         $from = $length + $from;
     }
     if ($from < 0) {
         $from = 0;
     }
     // (a) is just for beginning of the word
     if ($nbLetters == 2 && $from != 0) {
         return false;
     }
     $first = Utf8::substr($this->word, $from, 1);
     $second = Utf8::substr($this->word, $from + 1, 1);
     if ($nbLetters == 2) {
         if (in_array($first, self::$vowels) && !in_array($second, self::$vowels)) {
             return true;
         }
     }
     $third = Utf8::substr($this->word, $from + 2, 1);
     if (!in_array($first, self::$vowels) && in_array($second, self::$vowels) && !in_array($third, array_merge(self::$vowels, array('x', 'Y', 'w')))) {
         return true;
     }
     return false;
 }

Esempio n. 2

0

Mostra file

File: Stem.php Progetto: wamania/php-stemmer

 /**
  * Used by spanish, italian, portuguese, etc (but not by french)
  *
  * If the second letter is a consonant, RV is the region after the next following vowel,
  * or if the first two letters are vowels, RV is the region after the next consonant,
  * and otherwise (consonant-vowel case) RV is the region after the third letter.
  * But RV is the end of the word if these positions cannot be found.
  */
 protected function rv()
 {
     $length = Utf8::strlen($this->word);
     $this->rv = '';
     $this->rvIndex = $length;
     if ($length < 3) {
         return true;
     }
     $first = Utf8::substr($this->word, 0, 1);
     $second = Utf8::substr($this->word, 1, 1);
     // If the second letter is a consonant, RV is the region after the next following vowel,
     if (!in_array($second, static::$vowels)) {
         for ($i = 2; $i < $length; $i++) {
             $letter = Utf8::substr($this->word, $i, 1);
             if (in_array($letter, static::$vowels)) {
                 $this->rvIndex = $i + 1;
                 $this->rv = Utf8::substr($this->word, $i + 1);
                 return true;
             }
         }
     }
     // or if the first two letters are vowels, RV is the region after the next consonant,
     if (in_array($first, static::$vowels) && in_array($second, static::$vowels)) {
         for ($i = 2; $i < $length; $i++) {
             $letter = Utf8::substr($this->word, $i, 1);
             if (!in_array($letter, static::$vowels)) {
                 $this->rvIndex = $i + 1;
                 $this->rv = Utf8::substr($this->word, $i + 1);
                 return true;
             }
         }
     }
     // and otherwise (consonant-vowel case) RV is the region after the third letter.
     if (!in_array($first, static::$vowels) && in_array($second, static::$vowels)) {
         $this->rv = Utf8::substr($this->word, 3);
         $this->rvIndex = 3;
         return true;
     }
 }

Esempio n. 3

0

Mostra file

File: Portuguese.php Progetto: wamania/php-stemmer

 /**
  * Step 5
  */
 public function step5()
 {
     // If the word ends with one of "e   é   ê" in RV, delete it, and if preceded by gu (or ci) with the u (or i) in RV, delete the u (or i).
     if ($this->searchIfInRv(array('e', 'é', 'ê')) !== false) {
         $this->word = Utf8::substr($this->word, 0, -1);
         if (($position2 = $this->search(array('gu', 'ci'))) !== false) {
             if ($this->inRv($position2 + 1)) {
                 $this->word = Utf8::substr($this->word, 0, -1);
             }
         }
         return true;
     } else {
         if ($this->search(array('ç')) !== false) {
             $this->word = preg_replace('#(ç)$#u', 'c', $this->word);
             return true;
         }
     }
     return false;
 }

Esempio n. 4

0

Mostra file

File: Dutch.php Progetto: wamania/php-stemmer

 /**
  * Step 4: undouble vowel
  * If the words ends CVD, where C is a non-vowel, D is a non-vowel other than I, and V is double a, e, o or u,
  * remove one of the vowels from V (for example, maan -> man, brood -> brod).
  */
 private function step4()
 {
     // D is a non-vowel other than I
     $d = Utf8::substr($this->word, -1, 1);
     if (in_array($d, array_merge(self::$vowels, array('I')))) {
         return false;
     }
     // V is double a, e, o or u
     $v = Utf8::substr($this->word, -3, 2);
     if (!in_array($v, array('aa', 'ee', 'oo', 'uu'))) {
         return false;
     }
     $singleV = Utf8::substr($v, 0, 1);
     // C is a non-vowel
     $c = Utf8::substr($this->word, -4, 1);
     if (in_array($c, self::$vowels)) {
         return false;
     }
     $this->word = Utf8::substr($this->word, 0, -4);
     $this->word .= $c . $singleV . $d;
 }

Esempio n. 5

0

Mostra file

File: Italian.php Progetto: wamania/php-stemmer

 /**
  * Step 3a
  * Delete a final a, e, i, o, à, è, ì or ò if it is in RV, and a preceding i if it is in RV
  */
 private function step3a()
 {
     if ($this->searchIfInRv(array('a', 'e', 'i', 'o', 'à', 'è', 'ì', 'ò')) !== false) {
         $this->word = Utf8::substr($this->word, 0, -1);
         if ($this->searchIfInRv(array('i')) !== false) {
             $this->word = Utf8::substr($this->word, 0, -1);
         }
         return true;
     }
     return false;
 }

Esempio n. 6

0

Mostra file

File: Spanish.php Progetto: wamania/php-stemmer

 /**
  * Step 3: residual suffix
  * Search for the longest among the following suffixes in RV, and perform the action indicated.
  */
 private function step3()
 {
     // os   a   o   á   í   ó
     //      delete if in RV
     if (($position = $this->searchIfInRv(array('os', 'a', 'o', 'á', 'í', 'ó'))) != false) {
         $this->word = Utf8::substr($this->word, 0, $position);
         return true;
     }
     // e   é
     //      delete if in RV, and if preceded by gu with the u in RV delete the u
     if (($position = $this->searchIfInRv(array('e', 'é'))) != false) {
         $this->word = Utf8::substr($this->word, 0, $position);
         if (($position2 = $this->searchIfInRv(array('u'))) != false) {
             $before = Utf8::substr($this->word, $position2 - 1, 1);
             if (isset($before) && $before == 'g') {
                 $this->word = Utf8::substr($this->word, 0, $position2);
                 return true;
             }
         }
     }
     return false;
 }

Esempio n. 7

0

Mostra file

File: Romanian.php Progetto: wamania/php-stemmer

 /**
  * Step 4: Removal of final vowel
  */
 public function step4()
 {
     // Search for the longest among the suffixes "a   e   i   ie   ă " and, if it is in RV, delete it.
     if (($position = $this->search(array('a', 'ie', 'e', 'i', 'ă'))) !== false) {
         if ($this->inRv($position)) {
             $this->word = Utf8::substr($this->word, 0, $position);
         }
     }
     return true;
 }

Esempio n. 8

0

Mostra file

File: German.php Progetto: wamania/php-stemmer

 /**
  * Step 3: d-suffixes
  */
 public function step3()
 {
     // end   ung
     //      delete if in R2
     //      if preceded by ig, delete if in R2 and not preceded by e
     if (($position = $this->search(array('end', 'ung'))) !== false) {
         if ($this->inR2($position)) {
             $this->word = Utf8::substr($this->word, 0, $position);
         }
         if (($position2 = $this->search(array('ig'))) !== false) {
             $before = $position2 - 1;
             $letter = Utf8::substr($this->word, $before, 1);
             if ($this->inR2($position2) && $letter != 'e') {
                 $this->word = Utf8::substr($this->word, 0, $position2);
             }
         }
         return true;
     }
     // ig   ik   isch
     //      delete if in R2 and not preceded by e
     if (($position = $this->search(array('ig', 'ik', 'isch'))) !== false) {
         $before = $position - 1;
         $letter = Utf8::substr($this->word, $before, 1);
         if ($this->inR2($position) && $letter != 'e') {
             $this->word = Utf8::substr($this->word, 0, $position);
         }
         return true;
     }
     // lich   heit
     //      delete if in R2
     //      if preceded by er or en, delete if in R1
     if (($position = $this->search(array('lich', 'heit'))) != false) {
         if ($this->inR2($position)) {
             $this->word = Utf8::substr($this->word, 0, $position);
         }
         if (($position2 = $this->search(array('er', 'en'))) !== false) {
             if ($this->inR1($position2)) {
                 $this->word = Utf8::substr($this->word, 0, $position2);
             }
         }
         return true;
     }
     // keit
     //      delete if in R2
     //      if preceded by lich or ig, delete if in R2
     if (($position = $this->search(array('keit'))) != false) {
         if ($this->inR2($position)) {
             $this->word = Utf8::substr($this->word, 0, $position);
         }
         if (($position2 = $this->search(array('lich', 'ig'))) !== false) {
             if ($this->inR2($position2)) {
                 $this->word = Utf8::substr($this->word, 0, $position2);
             }
         }
         return true;
     }
     return false;
 }

Esempio n. 9

0

Mostra file

File: French.php Progetto: wamania/php-stemmer

 /**
  *  If the word begins with two vowels, RV is the region after the third letter,
  *  otherwise the region after the first vowel not at the beginning of the word,
  *  or the end of the word if these positions cannot be found.
  *  (Exceptionally, par, col or tap, at the begining of a word is also taken to define RV as the region to their right.)
  */
 protected function rv()
 {
     $length = Utf8::strlen($this->word);
     $this->rv = '';
     $this->rvIndex = $length;
     if ($length < 3) {
         return true;
     }
     // If the word begins with two vowels, RV is the region after the third letter
     $first = Utf8::substr($this->word, 0, 1);
     $second = Utf8::substr($this->word, 1, 1);
     if (in_array($first, self::$vowels) && in_array($second, self::$vowels)) {
         $this->rv = Utf8::substr($this->word, 3);
         $this->rvIndex = 3;
         return true;
     }
     // (Exceptionally, par, col or tap, at the begining of a word is also taken to define RV as the region to their right.)
     $begin3 = Utf8::substr($this->word, 0, 3);
     if (in_array($begin3, array('par', 'col', 'tap'))) {
         $this->rv = Utf8::substr($this->word, 3);
         $this->rvIndex = 3;
         return true;
     }
     //  otherwise the region after the first vowel not at the beginning of the word,
     for ($i = 1; $i < $length; $i++) {
         $letter = Utf8::substr($this->word, $i, 1);
         if (in_array($letter, self::$vowels)) {
             $this->rv = Utf8::substr($this->word, $i + 1);
             $this->rvIndex = $i + 1;
             return true;
         }
     }
     return false;
 }

Esempio n. 10

0

Mostra file

File: Norwegian.php Progetto: wamania/php-stemmer

 /**
  * Step 3:
  * Search for the longest among the following suffixes in R1, and if found, delete.
  */
 private function step3()
 {
     // leg   eleg   ig   eig   lig   elig   els   lov   elov   slov   hetslov
     if (($position = $this->searchIfInR1(array('hetslov', 'eleg', 'elov', 'slov', 'elig', 'eig', 'lig', 'els', 'lov', 'leg', 'ig'))) !== false) {
         $this->word = Utf8::substr($this->word, 0, $position);
     }
 }

Esempio n. 11

0

Mostra file

File: Swedish.php Progetto: wamania/php-stemmer

 /**
  * Step 3:
  * Search for the longest among the following suffixes in R1, and perform the action indicated.
  */
 private function step3()
 {
     // lig   ig   els
     //      delete
     if (($position = $this->searchIfInR1(array('lig', 'ig', 'els'))) !== false) {
         $this->word = Utf8::substr($this->word, 0, $position);
         return true;
     }
     // löst
     //      replace with lös
     if ($this->searchIfInR1(array('löst')) !== false) {
         $this->word = Utf8::substr($this->word, 0, -1);
         return true;
     }
     // fullt
     //      replace with full
     if ($this->searchIfInR1(array('fullt')) !== false) {
         $this->word = Utf8::substr($this->word, 0, -1);
         return true;
     }
 }

Esempi in PHP per Utf8::substr