function getLabelDistribution($crisisID) { $crisisID = intval($crisisID); $sql = "select\n mf.nominalAttributeID,\n nl.nominalLabelCode,\n count(distinct d.documentID) as labelCountTotal,\n sum(!isEvaluationSet) as labelCountTraining,\n sum(isEvaluationSet) as labelCountEvaluation\n from model_family mf\n left join nominal_label nl on nl.nominalAttributeID=mf.nominalAttributeID\n left join document_nominal_label dnl on dnl.nominalLabelID=nl.nominalLabelID\n left join document d on d.documentID=dnl.documentID\n where mf.crisisID={$crisisID}\n group by nominalAttributeID, nominalLabelCode\n order by nominalAttributeID, nominalLabelCode!='null', labelCountTraining desc"; $attributes = array(); $con = getMySqlConnection(); $result = $con->query($sql); if ($result) { $a = null; while ($row = $result->fetch_assoc()) { $attributeID = intval($row["nominalAttributeID"]); if (is_null($a) || $a->attributeID != $attributeID) { $a = new AttributeStatistics(); $a->attributeID = $attributeID; $a->rows = array(); $attributes[$attributeID] = $a; } $d = new LabelDistribution(); $d->labelCode = $row["nominalLabelCode"]; $d->labelCountTotal = intval($row["labelCountTotal"]); $d->labelCountTraining = floatval($row["labelCountTraining"]); $d->labelCountEvaluation = intval($row["labelCountEvaluation"]); $a->rows[] = $d; } } return $attributes; }
function aggregateTaskAnswers($taskID) { global $minAgreement; global $dontknow; $taskID = intval($taskID); $con = getMySqlConnection(); //Get all answers for this task if ($result = $con->query("select answer from task_answer where documentID = {$taskID} and answer!='null'")) { //Find the most frequent label per attribute $answers = array(); while ($row = $result->fetch_assoc()) { $answerSet = json_decode($row["answer"]); foreach ($answerSet as $answer) { $answers[$answer->attributeID][] = $answer->labelID; } } $result->close(); $finalAnswers = array(); foreach ($answers as $attributeID => $labelIDs) { $c = array_count_values($labelIDs); if (max($c) < $minAgreement) { //More answers are needed to reach agreement. Clean up and return. $con->close(); return; } $val = array_search(max($c), $c); $finalAnswers[$attributeID] = intval($val); } //Save the final answers foreach ($finalAnswers as $attributeID => $label) { if ($label == $dontknow) { continue; } $con->query("insert ignore into document_nominal_label (documentID, nominalLabelID, timestamp) values " . "({$taskID}, {$label}, utc_timestamp())"); } //Mark task as completed $con->query("update document set hasHumanLabels=1 where documentID={$taskID}"); //Send notification that a new training sample has arrived $crisisID = intval(selectSingleValue($con, "select crisisid from document where documentid={$taskID}")); notifyRedis($crisisID, array_keys($answers)); } $con->close(); }