define('FILENAME', '/tmp/DLRM-Dan.csv'); define('CORRECTED_FILENAME', '/tmp/corectat.csv'); // First build a hashmap of 4.1 lexems. $lexem41Map = array(); $dbResult = db_execute("select id, form, isLoc, concat(modelType, modelNumber, restriction) from LOC_4_1.Lexem"); while (!$dbResult->EOF) { $lexem41Map[$dbResult->fields[0]] = array($dbResult->fields[1], intval($dbResult->fields[2]), $dbResult->fields[3]); $dbResult->MoveNext(); } // Next, load the corrected file (Matei provided this as an errata to Dan's file) $lexemDlrmMap = array(); $f = fopen(CORRECTED_FILENAME, 'r'); while (($fields = fgetcsv($f)) !== false) { $formNoAccent = locNotationToDexNotation($fields[0]); $model = $fields[1] ? $fields[1] : 'I1'; $lexem = Lexem::get("formNoAccent = '{$formNoAccent}' and concat(modelType, modelNumber, restriction) = '{$model}'"); if ($lexem) { $lexemDlrmMap[$lexem->id] = true; } } fclose($f); // Next, load the file and build a hashmap of DLRM lexems to keep in LOC $f = fopen(FILENAME, 'r'); while (($fields = fgetcsv($f)) !== false) { $formNoAccent = locNotationToDexNotation($fields[1]); $models = preg_split('/\\s+/', trim($fields[2])); $modelStrings = ''; foreach ($models as $m) { if ($m) { if ($modelStrings) { $modelStrings .= ',';