break; case '': $book->title = ''; break; } $book->title = str_replace(array('`', '…', '–', '’', ' .', ' ,', ' :', ' ?', ' !', 'intr-'), array("'", '...', '-', "'", '.', ',', ':', '?', '!', 'într-'), $book->title); $book->title = preg_replace('/([IVX]+) a/', '$1-a', $book->title); $book->title = preg_replace('/([IVX]+) lea/', '$1-lea', $book->title); print "Prelim: {$book->id} [{$book->title}]\n"; // Extract words $newTitle = ''; $inWord = false; $word = ''; for ($i = 0; $i < mb_strlen($book->title); $i++) { $c = text_getCharAt($book->title, $i); if (text_isUnicodeLetter($c)) { $word .= $c; $inWord = true; } else { if ($inWord) { $newTitle = appendWord($newTitle, matchCase(suggest($word), $word)); } $word = ''; $inWord = false; $newTitle .= $c; } } if ($inWord) { $newTitle = appendWord($newTitle, matchCase(suggest($word), $word)); } $book->title = $newTitle;
$ambiguousLexems = true; } else { $noLexems = true; } } $rep = $def->internalRep; $len = mb_strlen($rep); $newRep = ''; $prevC = ''; $curInflection = 0; //print "Examining {$def->internalRep}\n"; for ($i = 0; $i < $len; $i++) { $c = text_getCharAt($rep, $i); if (!text_isUnicodeLetter($prevC) && $c == '-' && $i <= MAX_LEN) { $j = $i + 1; while (text_isUnicodeLetter(text_getCharAt($rep, $j))) { $j++; } $chunk = mb_substr($rep, $i, $j - $i); if ($chunk != '-') { $suffix = mb_substr($chunk, 1); //print "{$def->id} [{$def->lexicon}] $i [$chunk]\n"; if ($lexem) { $matchingForm = null; foreach ($inflections as $inflId) { $wls = WordList::loadByLexemIdInflectionId($lexem->id, $inflId); foreach ($wls as $wl) { if (matchesWithAccent($wl->form, $suffix)) { $matchingForm = $wl->form; //print "Matching [{$wl->form}] to [$chunk]\n"; }