function get_mentions_text_by_objects($object_ids) { $mentions = array(); $men_res = sql_query("\n SELECT entity_id, mention_id, object_id, start_token, length, object_type_id, tf_text\n FROM ne_entities\n LEFT JOIN tokens ON start_token = tf_id\n LEFT JOIN ne_entities_mentions USING (entity_id)\n LEFT JOIN ne_mentions USING (mention_id)\n WHERE object_id IN (" . implode(",", $object_ids) . ")\n ORDER BY object_id, mention_id, start_token"); $men_id = 0; $obj_id = 0; $mention = array(); while ($rm = sql_fetch_array($men_res)) { if ($rm["mention_id"] != $men_id) { if (!empty($mention)) { $mention["text"] = "[" . implode("] [", $mention["entities"]) . "]"; $mentions[$obj_id][] = $mention; } $men_id = $rm["mention_id"]; $obj_id = $rm["object_id"]; $mention = array("mention_id" => $men_id, "object_type_id" => $rm["object_type_id"], "entities" => array()); } // one-token entities taken simply by join if ($rm["length"] == 1) { $mention["entities"][] = $rm["tf_text"]; } else { $mention["entities"][] = implode(" ", array_map(function ($arr) { return $arr[1]; }, get_ne_entity_tokens_info($rm["start_token"], $rm["length"]))); } } if (!empty($mention)) { $mention["text"] = implode(" ", $mention["entities"]); $mentions[$obj_id][] = $mention; } return $mentions; }
function get_ne_by_paragraph($par_id, $user_id, $tagset_id, $group_by_mention = false) { if (!$user_id) { throw new UnexpectedValueException(); } $res = sql_pe("\n SELECT annot_id\n FROM ne_paragraphs\n WHERE par_id = ?\n AND user_id = ?\n AND tagset_id = ?\n LIMIT 1\n ", array($par_id, $user_id, $tagset_id)); if (!sizeof($res)) { return array(); } $out = array('annot_id' => $res[0]['annot_id'], 'entities' => array()); $res = sql_query("\n SELECT entity_id, start_token, length, mention_id, object_type_id\n FROM ne_entities\n LEFT JOIN ne_entities_mentions\n USING (entity_id)\n LEFT JOIN ne_mentions\n USING (mention_id)\n WHERE annot_id=" . $out['annot_id']); $tag_res = sql_prepare("\n SELECT tag_id, tag_name\n FROM ne_entity_tags\n JOIN ne_tags USING (tag_id)\n WHERE entity_id = ?\n "); while ($r = sql_fetch_array($res)) { $eid = $r['entity_id']; if (isset($out['entities'][$eid])) { $out['entities'][$eid]['mention_ids'][] = $r['mention_id']; $out['entities'][$eid]['mention_types'][] = $r['object_type_id']; continue; } $entity = array('id' => $r['entity_id'], 'start_token' => $r['start_token'], 'length' => $r['length'], 'tokens' => array(), 'mention_ids' => array($r['mention_id']), 'mention_types' => array($r['object_type_id']), 'tags' => array(), 'tag_ids' => array()); if (empty($r['mention_id'])) { $entity['mention_ids'] = array(); $entity['mention_types'] = array(); } sql_execute($tag_res, array($eid)); while ($r1 = sql_fetch_array($tag_res)) { $entity['tags'][] = array($r1['tag_id'], $r1['tag_name']); $entity['tag_ids'][] = $r1['tag_id']; } // TODO check that tags belong to the correct tagset $out['entities'][$eid] = $entity; } $tag_res->closeCursor(); // add token info foreach ($out['entities'] as &$entity) { $entity['tokens'] = get_ne_entity_tokens_info($entity['start_token'], $entity['length']); if (sizeof($entity['tokens']) != $entity['length']) { throw new Exception("len of entity tokens != entity.length, entity " . $entity['id']); } } // sort entities by position in paragraph (by first token pos) usort($out['entities'], function ($e1, $e2) { $s1 = $e1['tokens'][0]['sent_id']; $s2 = $e2['tokens'][0]['sent_id']; $pos1 = $e1['tokens'][0]['pos']; $pos2 = $e2['tokens'][0]['pos']; return $s1 == $s2 ? $pos1 - $pos2 : $s1 - $s2; }); if ($group_by_mention) { $out['entities'] = group_entities_by_mention($out['entities']); } return $out; }