Example #1
0
function get_mentions_text_by_objects($object_ids)
{
    $mentions = array();
    $men_res = sql_query("\n        SELECT entity_id, mention_id, object_id, start_token, length, object_type_id, tf_text\n        FROM ne_entities\n            LEFT JOIN tokens ON start_token = tf_id\n            LEFT JOIN ne_entities_mentions USING (entity_id)\n            LEFT JOIN ne_mentions USING (mention_id)\n        WHERE object_id IN (" . implode(",", $object_ids) . ")\n        ORDER BY object_id, mention_id, start_token");
    $men_id = 0;
    $obj_id = 0;
    $mention = array();
    while ($rm = sql_fetch_array($men_res)) {
        if ($rm["mention_id"] != $men_id) {
            if (!empty($mention)) {
                $mention["text"] = "[" . implode("] [", $mention["entities"]) . "]";
                $mentions[$obj_id][] = $mention;
            }
            $men_id = $rm["mention_id"];
            $obj_id = $rm["object_id"];
            $mention = array("mention_id" => $men_id, "object_type_id" => $rm["object_type_id"], "entities" => array());
        }
        // one-token entities taken simply by join
        if ($rm["length"] == 1) {
            $mention["entities"][] = $rm["tf_text"];
        } else {
            $mention["entities"][] = implode(" ", array_map(function ($arr) {
                return $arr[1];
            }, get_ne_entity_tokens_info($rm["start_token"], $rm["length"])));
        }
    }
    if (!empty($mention)) {
        $mention["text"] = implode(" ", $mention["entities"]);
        $mentions[$obj_id][] = $mention;
    }
    return $mentions;
}
Example #2
0
function get_ne_by_paragraph($par_id, $user_id, $tagset_id, $group_by_mention = false)
{
    if (!$user_id) {
        throw new UnexpectedValueException();
    }
    $res = sql_pe("\n        SELECT annot_id\n        FROM ne_paragraphs\n        WHERE par_id = ?\n        AND user_id = ?\n        AND tagset_id = ?\n        LIMIT 1\n    ", array($par_id, $user_id, $tagset_id));
    if (!sizeof($res)) {
        return array();
    }
    $out = array('annot_id' => $res[0]['annot_id'], 'entities' => array());
    $res = sql_query("\n        SELECT entity_id, start_token, length, mention_id, object_type_id\n        FROM ne_entities\n        LEFT JOIN ne_entities_mentions\n            USING (entity_id)\n        LEFT JOIN ne_mentions\n            USING (mention_id)\n        WHERE annot_id=" . $out['annot_id']);
    $tag_res = sql_prepare("\n        SELECT tag_id, tag_name\n        FROM ne_entity_tags\n        JOIN ne_tags USING (tag_id)\n        WHERE entity_id = ?\n    ");
    while ($r = sql_fetch_array($res)) {
        $eid = $r['entity_id'];
        if (isset($out['entities'][$eid])) {
            $out['entities'][$eid]['mention_ids'][] = $r['mention_id'];
            $out['entities'][$eid]['mention_types'][] = $r['object_type_id'];
            continue;
        }
        $entity = array('id' => $r['entity_id'], 'start_token' => $r['start_token'], 'length' => $r['length'], 'tokens' => array(), 'mention_ids' => array($r['mention_id']), 'mention_types' => array($r['object_type_id']), 'tags' => array(), 'tag_ids' => array());
        if (empty($r['mention_id'])) {
            $entity['mention_ids'] = array();
            $entity['mention_types'] = array();
        }
        sql_execute($tag_res, array($eid));
        while ($r1 = sql_fetch_array($tag_res)) {
            $entity['tags'][] = array($r1['tag_id'], $r1['tag_name']);
            $entity['tag_ids'][] = $r1['tag_id'];
        }
        // TODO check that tags belong to the correct tagset
        $out['entities'][$eid] = $entity;
    }
    $tag_res->closeCursor();
    // add token info
    foreach ($out['entities'] as &$entity) {
        $entity['tokens'] = get_ne_entity_tokens_info($entity['start_token'], $entity['length']);
        if (sizeof($entity['tokens']) != $entity['length']) {
            throw new Exception("len of entity tokens != entity.length, entity " . $entity['id']);
        }
    }
    // sort entities by position in paragraph (by first token pos)
    usort($out['entities'], function ($e1, $e2) {
        $s1 = $e1['tokens'][0]['sent_id'];
        $s2 = $e2['tokens'][0]['sent_id'];
        $pos1 = $e1['tokens'][0]['pos'];
        $pos2 = $e2['tokens'][0]['pos'];
        return $s1 == $s2 ? $pos1 - $pos2 : $s1 - $s2;
    });
    if ($group_by_mention) {
        $out['entities'] = group_entities_by_mention($out['entities']);
    }
    return $out;
}