function sentence_annotation($count, $filter) { global $set, $id, $dir, $biconcor; # get input $filtered = array(); $file = get_current_analysis_filename("coverage", "input-annotation"); if (file_exists($file)) { $input = file($file); # filter is so specified if ($filter != "") { for ($i = 0; $i < count($input); $i++) { $item = explode("\t", $input[$i]); $word = explode(" ", $item[0]); $keep = 0; for ($j = 0; $j < count($word); $j++) { if ($word[$j] == $filter) { $keep = 1; } } if (!$keep) { $filtered[$i] = 1; } } } } # load bleu scores $data = file(get_current_analysis_filename("basic", "bleu-annotation")); for ($i = 0; $i < count($data); $i++) { $item = split("\t", $data[$i]); if (!array_key_exists($item[1], $filtered)) { $line["bleu"] = $item[0]; $line["id"] = $item[1]; $line["system"] = $item[2]; $line["reference"] = ""; for ($j = 3; $j < count($item); $j++) { if ($j > 3) { $line["reference"] .= "<br>"; } $line["reference"] .= $item[$j]; } $bleu[] = $line; } } # sort and label additional sentences as filtered global $sort; function cmp($a, $b) { global $sort; if ($sort == "order") { $a_idx = $a["id"]; $b_idx = $b["id"]; } else { if ($sort == "worst" || $sort == "75") { $a_idx = $a["bleu"]; $b_idx = $b["bleu"]; if ($a_idx == $b_idx) { $a_idx = $b["id"]; $b_idx = $a["id"]; } } else { if ($sort == "best" || $sort == "avg" || $sort == "25") { $a_idx = -$a["bleu"]; $b_idx = -$b["bleu"]; if ($a_idx == $b_idx) { $a_idx = $a["id"]; $b_idx = $b["id"]; } } } } if ($a_idx == $b_idx) { return 0; } return $a_idx < $b_idx ? -1 : 1; } $sort = $_GET['sort']; if ($sort == '') { $sort = "order"; } usort($bleu, 'cmp'); $offset = 0; if ($sort == "25" || $sort == "75") { $offset = (int) (count($bleu) / 4); } else { if ($sort == "avg") { $offset = (int) (count($bleu) / 2); } } $retained = array(); for ($i = $offset; $i < $count + $offset && $i < count($bleu); $i++) { $line = $bleu[$i]; $retained[$line["id"]] = 1; } # get segmentation (phrase alignment) $file = get_current_analysis_filename("basic", "segmentation-annotation"); if (file_exists($file)) { $data = file($file); for ($i = 0; $i < count($data); $i++) { if ($filter == "" || array_key_exists($i, $retained)) { $segment = 0; foreach (split(" ", $data[$i]) as $item) { list($in_start, $in_end, $out_start, $out_end) = split(":", $item); $segment++; $segmentation[$i]["input_start"][$in_start] = $segment; $segmentation[$i]["input_end"][$in_end] = $segment; $segmentation[$i]["output_start"][$out_start] = $segment; $segmentation[$i]["output_end"][$out_end + 0] = $segment; } } } } # get hierarchical data $hierarchical = 0; $file = get_current_analysis_filename("basic", "input-tree"); if (file_exists($file)) { $data = file($file); $span = 0; $last_sentence = -1; $nt_count = array(); for ($i = 0; $i < count($data); $i++) { list($sentence, $brackets, $nt, $words) = split("\t", $data[$i]); if ($sentence != $last_sentence) { $span = 0; } $last_sentence = $sentence; if (array_key_exists($sentence, $retained)) { $segmentation[$sentence][$span]["brackets"] = $brackets; # $segmentation[$sentence][$span]["nt"] = $nt; $segmentation[$sentence][$span]["words"] = rtrim($words); if ($nt != "") { $nt_count[$nt] = 1; } $span++; } } $hierarchical = 1; # if (count($nt_count) <= 2) { # foreach ($segmentation as $sentence => $segmentation_span) { # foreach ($segmentation_span as $span => $type) { # $segmentation[$sentence][$span]["nt"]=""; # } # } # } } $file = get_current_analysis_filename("basic", "output-tree"); if (file_exists($file)) { $data = file($file); $span = 0; $last_sentence = -1; $nt_count = array(); for ($i = 0; $i < count($data); $i++) { list($sentence, $brackets, $nt, $words) = split("\t", $data[$i]); if ($sentence != $last_sentence) { $span = 0; } $last_sentence = $sentence; if (array_key_exists($sentence, $retained)) { $segmentation_out[$sentence][$span]["brackets"] = $brackets; $segmentation_out[$sentence][$span]["nt"] = $nt; $segmentation_out[$sentence][$span]["words"] = rtrim($words); if ($nt != "") { $nt_count[$nt] = 1; } $span++; } } # no non-terminal markup, if there are two or less non-terminals (X,S) if (count($nt_count) <= 2) { foreach ($segmentation_out as $sentence => $segmentation_span) { foreach ($segmentation_span as $span => $type) { $segmentation_out[$sentence][$span]["nt"] = ""; } } } } $file = get_current_analysis_filename("basic", "node"); if (file_exists($file)) { $data = file($file); $n = 0; $last_sentence = -1; for ($i = 0; $i < count($data); $i++) { list($sentence, $depth, $start_div, $end_div, $start_div_in, $end_div_in, $children) = split(" ", $data[$i]); if ($sentence != $last_sentence) { $n = 0; } $last_sentence = $sentence; if (array_key_exists($sentence, $retained)) { $node[$sentence][$n]['depth'] = $depth; $node[$sentence][$n]['start_div'] = $start_div; $node[$sentence][$n]['end_div'] = $end_div; $node[$sentence][$n]['start_div_in'] = $start_div_in; $node[$sentence][$n]['end_div_in'] = $end_div_in; $node[$sentence][$n]['children'] = rtrim($children); $n++; } } } # display if ($filter != "") { print " (" . (count($input) - count($filtered)) . " retaining)"; } print "</font><BR>\n"; $biconcor = get_biconcor_version($dir, $set, $id); //print "<div id=\"debug\">$sort / $offset</div>"; for ($i = $offset; $i < $count + $offset && $i < count($bleu); $i++) { $line = $bleu[$i]; $search_graph_dir = get_current_analysis_filename("basic", "search-graph"); if (file_exists($search_graph_dir) && file_exists($search_graph_dir . "/graph." . $line["id"])) { $state = return_state_for_link(); print "<FONT SIZE=-1><A TARGET=_blank HREF=\"?{$state}&analysis=sgviz&set={$set}&id={$id}&sentence=" . $line["id"] . "\">show search graph</a></FONT><br>\n"; } if ($hierarchical) { annotation_hierarchical($line["id"], $segmentation[$line["id"]], $segmentation_out[$line["id"]], $node[$line["id"]]); } if ($input) { print "<div id=\"info-" . $line["id"] . "\" style=\"border-color:black; background:#ffff80; opacity:0; width:100%; border:1px;\">0 occ. in corpus, 0 translations, entropy: 0.00</div>\n"; if ($biconcor) { print "<div id=\"biconcor-" . $line["id"] . "\" class=\"biconcor\"><font size=-2>(click on input phrase for bilingual concordancer)</font></div>"; } if ($hierarchical) { sentence_annotation_hierarchical("#" . $line["id"], $line["id"], $input[$line["id"]], $segmentation[$line["id"]], "in"); } else { print "<font size=-2>[#" . $line["id"] . "]</font> "; input_annotation($line["id"], $input[$line["id"]], $segmentation[$line["id"]], $filter); } } //else { // print "<font size=-2>[".$line["id"].":".$line["bleu"]."]</font> "; //} if ($hierarchical) { sentence_annotation_hierarchical($line["bleu"], $line["id"], $line["system"], $segmentation_out[$line["id"]], "out"); } else { print "<font size=-2>[" . $line["bleu"] . "]</font> "; output_annotation($line["id"], $line["system"], $segmentation[$line["id"]]); } print "<br><font size=-2>[ref]</font> " . $line["reference"] . "<hr>"; } }
function output_score($id, $info) { global $evalset; global $has_analysis; global $setup; global $dir; reset($evalset); $state = return_state_for_link(); while (list($set, $dummy) = each($evalset)) { if (property_exists($info, "result") && array_key_exists($set, $info->result)) { $score = $info->result[$set]; } else { $score = ""; } print "<td align=center id=\"score-{$id}-{$set}\">"; // print "<table><tr><td>"; $each_score = explode(" ; ", $score); for ($i = 0; $i < count($each_score); $i++) { if (preg_match('/([\\d\\(\\)\\.\\s]+) (BLEU[\\-c]*)/', $each_score[$i], $match) || preg_match('/([\\d\\(\\)\\.\\s]+) (IBM[\\-c]*)/', $each_score[$i], $match) || preg_match('/([\\d\\(\\)\\.\\s]+) (METEOR[\\-c]*)/', $each_score[$i], $match)) { if ($i > 0) { print "<BR>"; } $opened_a_tag = 0; if ($set != "avg") { if (file_exists("{$dir}/evaluation/{$set}.cleaned.{$id}")) { print "<a href=\"?{$state}&show=evaluation/{$set}.cleaned.{$id}\">"; $opened_a_tag = 1; } else { if (file_exists("{$dir}/evaluation/{$set}.output.{$id}")) { print "<a href=\"?{$state}&show=evaluation/{$set}.output.{$id}\">"; $opened_a_tag = 1; } } } if ($set == "avg" && count($each_score) > 1) { print $match[2] . ": "; } print $match[1]; if ($opened_a_tag) { print "</a>"; } } else { print "-"; } } print "</td>"; if ($has_analysis && array_key_exists($set, $has_analysis)) { print "<td align=center>"; global $dir; $analysis = get_analysis_version($dir, $set, $id); if ($analysis["basic"]) { print "<a href=\"?analysis=show&setup={$setup}&set={$set}&id={$id}\">Ⓐ</a> <input type=checkbox name=analysis-{$id}-{$set} value=1>"; } print "</td>"; } } }