function extractWords($text)
{
    $alphabet = 'abcdefghijklmnopqrstuvwxyzăâîșț';
    $text = mb_strtolower($text);
    $text = AdminStringUtil::removeAccents($text);
    $result = array();
    $currentWord = '';
    $chars = AdminStringUtil::unicodeExplode($text);
    foreach ($chars as $c) {
        if (strpos($alphabet, $c) !== false) {
            $currentWord .= $c;
        } else {
            if ($currentWord) {
                $result[] = $currentWord;
            }
            $currentWord = '';
        }
    }
    if ($currentWord) {
        $result[] = $currentWord;
    }
    return $result;
}
Exemple #2
0
assertAbbreviations("FILLER bWv FILLER", "FILLER bWv FILLER", "FILLER bWv FILLER", 32);
assertAbbreviations("FILLER ed. FILLER", "FILLER #ed.# FILLER", "FILLER <abbr class=\"abbrev\" title=\"ediție, editat\">ed.</abbr> FILLER", 32);
assertAbbreviations("FILLER Ed. FILLER", "FILLER #Ed.# FILLER", "FILLER <abbr class=\"abbrev\" title=\"Editura\">Ed.</abbr> FILLER", 32);
assertAbbreviations("FILLER ED. FILLER", "FILLER #Ed.# FILLER", "FILLER <abbr class=\"abbrev\" title=\"Editura\">Ed.</abbr> FILLER", 32);
// Abbreviation includes special characters
assertAbbreviations("FILLER RRHA, TMC FILLER", "FILLER #RRHA, TMC# FILLER", "FILLER <abbr class=\"abbrev\" title=\"Revue Roumaine d'Histoire de l'Art, série Théâtre, Musique, Cinématographie\">RRHA, TMC</abbr> FILLER", 32);
assertAbbreviations("FILLER adj. interog.-rel. FILLER", "FILLER #adj. interog.-rel.# FILLER", "FILLER <abbr class=\"abbrev\" title=\"adjectiv interogativ-relativ\">adj. interog.-rel.</abbr> FILLER", 1);
// Abbreviation is not delimited by spaces
assertAbbreviations("AGNUS DEI", "AGNUS DEI", "AGNUS DEI", 32);
assertEquals('@MÁRE^2,@ $mări,$ s.f.', AdminStringUtil::migrateFormatChars('@MÁRE^2@, $mări$, s.f.'));
assertEquals('@$%spaced% text$@', AdminStringUtil::migrateFormatChars('@$ % spaced % text $@'));
assertEquals('40\\% dolomite', AdminStringUtil::migrateFormatChars('40\\% dolomite'));
assertEquals('40 %dolomite%', AdminStringUtil::migrateFormatChars('40% dolomite%'));
assertEquals('cățel', AdminStringUtil::internalizeWordName("C~A,t'EL"));
assertEquals('ă', AdminStringUtil::internalizeWordName("~~A~~!@#\$%^&*()123456790"));
assertEquals('casă', AdminStringUtil::removeAccents('cásă'));
assertEquals('mama', StringUtil::cleanupQuery("'mama'"));
assertEquals('mama', StringUtil::cleanupQuery('"mama"'));
assertEquals('aăbcdef', StringUtil::cleanupQuery("aăbc<mamă foo bar>def"));
assertEquals('AĂBCDEF', StringUtil::cleanupQuery("AĂBC<MAMĂ FOO BAR>DEF"));
assertEquals('a~abcdef', StringUtil::cleanupQuery("a~abc<mam~a foo bar>def"));
assertEquals('a~ABcdef', StringUtil::cleanupQuery("a~ABc<mam~a foo bar>def"));
assertEquals('1234', StringUtil::cleanupQuery('12&qweasd;34'));
assert(StringUtil::hasDiacritics('mamă'));
assert(!StringUtil::hasDiacritics('mama'));
$def = Model::factory('Definition')->create();
$def->sourceId = 1;
$def->internalRep = 'abcd';
assertEquals('abcd', AdminStringUtil::extractLexicon($def));
$def->internalRep = 'wxyz';
assertEquals('wxyz', AdminStringUtil::extractLexicon($def));