コード例 #1
0
ファイル: fixDivertaBooks.php プロジェクト: florinp/dexonline
         break;
     case '':
         $book->title = '';
         break;
 }
 $book->title = str_replace(array('`', '…', '–', '’', ' .', ' ,', ' :', ' ?', ' !', 'intr-'), array("'", '...', '-', "'", '.', ',', ':', '?', '!', 'într-'), $book->title);
 $book->title = preg_replace('/([IVX]+) a/', '$1-a', $book->title);
 $book->title = preg_replace('/([IVX]+) lea/', '$1-lea', $book->title);
 print "Prelim: {$book->id} [{$book->title}]\n";
 // Extract words
 $newTitle = '';
 $inWord = false;
 $word = '';
 for ($i = 0; $i < mb_strlen($book->title); $i++) {
     $c = text_getCharAt($book->title, $i);
     if (text_isUnicodeLetter($c)) {
         $word .= $c;
         $inWord = true;
     } else {
         if ($inWord) {
             $newTitle = appendWord($newTitle, matchCase(suggest($word), $word));
         }
         $word = '';
         $inWord = false;
         $newTitle .= $c;
     }
 }
 if ($inWord) {
     $newTitle = appendWord($newTitle, matchCase(suggest($word), $word));
 }
 $book->title = $newTitle;
コード例 #2
0
ファイル: replaceHyphens.php プロジェクト: florinp/dexonline
         $ambiguousLexems = true;
     } else {
         $noLexems = true;
     }
 }
 $rep = $def->internalRep;
 $len = mb_strlen($rep);
 $newRep = '';
 $prevC = '';
 $curInflection = 0;
 //print "Examining {$def->internalRep}\n";
 for ($i = 0; $i < $len; $i++) {
     $c = text_getCharAt($rep, $i);
     if (!text_isUnicodeLetter($prevC) && $c == '-' && $i <= MAX_LEN) {
         $j = $i + 1;
         while (text_isUnicodeLetter(text_getCharAt($rep, $j))) {
             $j++;
         }
         $chunk = mb_substr($rep, $i, $j - $i);
         if ($chunk != '-') {
             $suffix = mb_substr($chunk, 1);
             //print "{$def->id} [{$def->lexicon}] $i [$chunk]\n";
             if ($lexem) {
                 $matchingForm = null;
                 foreach ($inflections as $inflId) {
                     $wls = WordList::loadByLexemIdInflectionId($lexem->id, $inflId);
                     foreach ($wls as $wl) {
                         if (matchesWithAccent($wl->form, $suffix)) {
                             $matchingForm = $wl->form;
                             //print "Matching [{$wl->form}] to [$chunk]\n";
                         }