static function create($definition, $lexemIds) { $diffSize = 0; $sr = new SimilarRecord(); $sr->source = SimilarSource::getSimilarSource($definition->sourceId); $sr->definition = $definition->loadSimilar($lexemIds, $diffSize); if ($sr->definition) { $sr->htmlDiff = LDiff::htmlDiff($sr->definition->internalRep, $definition->internalRep, true); } else { $sr->htmlDiff = null; } $sr->identical = $sr->definition && $diffSize == 0; return $sr; }
function loadSimilar($lexemIds, &$diffSize = null) { $result = null; // First see if there is a similar source $similarSource = SimilarSource::getSimilarSource($this->sourceId); if ($similarSource && count($lexemIds)) { // Load all definitions from $similarSource mapped to any of $lexemIds $candidates = Model::factory('Definition')->table_alias('d')->select('d.*')->distinct()->join('LexemDefinitionMap', 'ldm.definitionId = d.id', 'ldm')->where_not_equal('d.status', self::ST_DELETED)->where('d.sourceId', $similarSource->id)->where_in('ldm.lexemId', $lexemIds)->find_many(); // Find the definition with the minimum diff from the original $diffSize = 0; foreach ($candidates as $d) { $size = LDiff::diffMeasure($this->internalRep, $d->internalRep); if (!$result || $size < $diffSize) { $result = $d; $diffSize = $size; } } } return $result; }
<?php require_once __DIR__ . '/../phplib/util.php'; define('SERVER_URL', 'http://localhost/~cata/DEX/wwwbase'); $opts = getopt('', array('user:'******'source:', 'date:')); if (count($opts) != 3) { usage(); } $user = User::get_by_nick($opts['user']); $source = Source::get_by_urlName($opts['source']); $timestamp = strtotime($opts['date']); if (!$user || !$source || !$timestamp) { usage(); } $similarSource = SimilarSource::getSimilarSource($source->id); if (!$similarSource) { usage(); } $defs = Model::factory('Definition')->where('userId', $user->id)->where('sourceId', $source->id)->where_gt('createDate', $timestamp)->where('status', Definition::ST_ACTIVE)->order_by_asc('lexicon')->find_many(); $truePositives = $falsePositives = $trueNegatives = 0; foreach ($defs as $def) { $lexemIds = db_getArray("select distinct lexemId from LexemDefinitionMap where definitionId = {$def->id}"); $similar = $def->loadSimilar($lexemIds, $diffSize); if ($similar) { $correct = ($def->similarSource == 1) == ($diffSize == 0); if ($correct) { if ($def->similarSource) { $truePositives++; } else { $trueNegatives++; }