예제 #1
0
function getLabelDistribution($crisisID)
{
    $crisisID = intval($crisisID);
    $sql = "select\n            mf.nominalAttributeID,\n            nl.nominalLabelCode,\n            count(distinct d.documentID) as labelCountTotal,\n            sum(!isEvaluationSet) as labelCountTraining,\n            sum(isEvaluationSet) as labelCountEvaluation\n        from model_family mf\n        left join nominal_label nl on nl.nominalAttributeID=mf.nominalAttributeID\n        left join document_nominal_label dnl on dnl.nominalLabelID=nl.nominalLabelID\n        left join document d on d.documentID=dnl.documentID\n        where mf.crisisID={$crisisID}\n        group by nominalAttributeID, nominalLabelCode\n        order by nominalAttributeID, nominalLabelCode!='null', labelCountTraining desc";
    $attributes = array();
    $con = getMySqlConnection();
    $result = $con->query($sql);
    if ($result) {
        $a = null;
        while ($row = $result->fetch_assoc()) {
            $attributeID = intval($row["nominalAttributeID"]);
            if (is_null($a) || $a->attributeID != $attributeID) {
                $a = new AttributeStatistics();
                $a->attributeID = $attributeID;
                $a->rows = array();
                $attributes[$attributeID] = $a;
            }
            $d = new LabelDistribution();
            $d->labelCode = $row["nominalLabelCode"];
            $d->labelCountTotal = intval($row["labelCountTotal"]);
            $d->labelCountTraining = floatval($row["labelCountTraining"]);
            $d->labelCountEvaluation = intval($row["labelCountEvaluation"]);
            $a->rows[] = $d;
        }
    }
    return $attributes;
}
예제 #2
0
function aggregateTaskAnswers($taskID)
{
    global $minAgreement;
    global $dontknow;
    $taskID = intval($taskID);
    $con = getMySqlConnection();
    //Get all answers for this task
    if ($result = $con->query("select answer from task_answer where documentID = {$taskID} and answer!='null'")) {
        //Find the most frequent label per attribute
        $answers = array();
        while ($row = $result->fetch_assoc()) {
            $answerSet = json_decode($row["answer"]);
            foreach ($answerSet as $answer) {
                $answers[$answer->attributeID][] = $answer->labelID;
            }
        }
        $result->close();
        $finalAnswers = array();
        foreach ($answers as $attributeID => $labelIDs) {
            $c = array_count_values($labelIDs);
            if (max($c) < $minAgreement) {
                //More answers are needed to reach agreement. Clean up and return.
                $con->close();
                return;
            }
            $val = array_search(max($c), $c);
            $finalAnswers[$attributeID] = intval($val);
        }
        //Save the final answers
        foreach ($finalAnswers as $attributeID => $label) {
            if ($label == $dontknow) {
                continue;
            }
            $con->query("insert ignore into document_nominal_label (documentID, nominalLabelID, timestamp) values " . "({$taskID}, {$label}, utc_timestamp())");
        }
        //Mark task as completed
        $con->query("update document set hasHumanLabels=1 where documentID={$taskID}");
        //Send notification that a new training sample has arrived
        $crisisID = intval(selectSingleValue($con, "select crisisid from document where documentid={$taskID}"));
        notifyRedis($crisisID, array_keys($answers));
    }
    $con->close();
}