コード例 #1
0
ファイル: fixReferences.php プロジェクト: florinp/dexonline
function cleanupReferences($s)
{
    global $dropped;
    global $kept;
    $result = '';
    $text = '';
    $ref = '';
    $mode = 0;
    // 0 = not between bars; 1 = text; 2 = reference
    for ($i = 0; $i < strlen($s); $i++) {
        $char = $s[$i];
        if ($char == '|') {
            if ($mode == 2) {
                $sText = simplifyText($text);
                $sRef = simplifyText($ref);
                if ($sText == $sRef || $sRef == '') {
                    $result .= $text;
                    $dropped++;
                } else {
                    if (isInflectedForm($sText, $sRef)) {
                        $result .= $text;
                        $dropped++;
                    } else {
                        if (text_endsWith($sText, ' ' . $ref)) {
                            $result .= $text;
                            $dropped++;
                        } else {
                            if (text_startsWith($sText, $ref . ' ')) {
                                $result .= $text;
                                $dropped++;
                            } else {
                                //print "Keeping reference |$text|$ref|\n";
                                $result .= "|{$text}|{$ref}|";
                                $kept++;
                            }
                        }
                    }
                }
                $text = '';
                $ref = '';
            }
            $mode = ($mode + 1) % 3;
        } else {
            switch ($mode) {
                case 0:
                    $result .= $char;
                    break;
                case 1:
                    $text .= $char;
                    break;
                case 2:
                    $ref .= $char;
            }
        }
    }
    assert($mode == 0);
    return $result;
}
コード例 #2
0
ファイル: removeExtra.php プロジェクト: florinp/dexonline
<?php

require_once '../../phplib/util.php';
assert_options(ASSERT_BAIL, 1);
debug_off();
$dbResult = mysql_query("select * from lexems where lexem_extra != ''");
$seen = 0;
$removed = 0;
while (($dbRow = mysql_fetch_assoc($dbResult)) != null) {
    $l = Lexem::createFromDbRow($dbRow);
    $seen++;
    $extra = $l->extra;
    if (text_startsWith($extra, '[') && text_endsWith($extra, ']')) {
        $extra = mb_substr($extra, 1, mb_strlen($extra) - 2);
    }
    if (text_startsWith($extra, '(') && text_endsWith($extra, ')')) {
        $extra = mb_substr($extra, 1, mb_strlen($extra) - 2);
    }
    // Sometimes the extra is just the model number
    $found = $extra == $l->modelType . $l->modelNumber . $l->restriction;
    // Sometimes the extra refers to a homonym's model
    if (!$found) {
        $homonyms = $l->loadHomonyms();
        foreach ($homonyms as $h) {
            $found |= $extra == $h->modelType . $h->modelNumber . $h->restriction;
        }
    }
    if (!$found) {
        $defs = Definition::loadByLexemId($l->id);
        foreach ($defs as $def) {
            $found |= text_contains($def->internalRep, $extra);
コード例 #3
0
ファイル: common.php プロジェクト: florinp/dexonline
function normalizeForm($form)
{
    // Special case 1: staro-s/ș-ti or staro-s/ș-tii or staro-s/ș-tilor
    if (text_startsWith($form, 'staro-s/ș-ti')) {
        $rest = mb_substr($form, 12);
        return array("starosti{$rest}", "staroști{$rest}");
    }
    // Special case 2: [a]iastalaltă (-tă-)
    if ($form == "[a]iastalaltă (-tă-)") {
        return array("aiastalaltă", "iastalaltă");
    }
    $form = str_replace(array('-', 'á', 'é', 'í', 'ó', 'ú'), array('', "'a", "'e", "'i", "'o", "'u"), $form);
    $form = trim($form);
    return normalizeFormRecursively($form);
}
コード例 #4
0
ファイル: parseLoc4List.php プロジェクト: florinp/dexonline
/**
 * Returns a list containing the next token and the new stream position.
 * If we reach the end of the file, the token is empty.
 * Tokens are opening tags (without the attributes), closing tags or text.
 **/
function getNextToken($pos)
{
    global $data;
    global $dataLen;
    while ($pos < $dataLen && ctype_space($data[$pos])) {
        $pos++;
    }
    if ($pos >= $dataLen) {
        return array('', $dataLen);
    }
    $result = '';
    // If we hit a '<' sign, parse tag.
    if ($data[$pos] == '<') {
        do {
            $result .= $data[$pos];
            $done = text_startsWith($result, '<!--') ? text_endsWith($result, '-->') : $data[$pos] == '>';
            $pos++;
        } while (!$done);
        // Strip the attributes
        $tagEnd = 1;
        while (!ctype_space($result[$tagEnd]) && $result[$tagEnd] != '>') {
            $tagEnd++;
        }
        $result = substr($result, 0, $tagEnd) . '>';
        return array($result, $pos);
    }
    // Parse text to the next '<' sign or EOF.
    while ($pos < $dataLen && $data[$pos] != '<') {
        $result .= $data[$pos];
        $pos++;
    }
    return array(trim($result), $pos);
}