function sgviz_data($sentence) { header('Content-type: application/json'); $file = get_current_analysis_filename("basic", "search-graph") . "/graph.{$sentence}"; $handle = fopen($file, "r"); while (($line = fgets($handle)) !== false) { $e = explode("\t", addslashes(chop($line))); $edge[$e[0]] = array($e[1], $e[2], $e[3], $e[4], $e[5], $e[6], $e[7], $e[8], $e[9], $e[10]); } $return['edge'] = $edge; print json_encode($return); exit; }
function sentence_annotation($count, $filter) { global $set, $id, $dir, $biconcor; # get input $filtered = array(); $file = get_current_analysis_filename("coverage", "input-annotation"); if (file_exists($file)) { $input = file($file); # filter is so specified if ($filter != "") { for ($i = 0; $i < count($input); $i++) { $item = explode("\t", $input[$i]); $word = explode(" ", $item[0]); $keep = 0; for ($j = 0; $j < count($word); $j++) { if ($word[$j] == $filter) { $keep = 1; } } if (!$keep) { $filtered[$i] = 1; } } } } # load bleu scores $data = file(get_current_analysis_filename("basic", "bleu-annotation")); for ($i = 0; $i < count($data); $i++) { $item = split("\t", $data[$i]); if (!array_key_exists($item[1], $filtered)) { $line["bleu"] = $item[0]; $line["id"] = $item[1]; $line["system"] = $item[2]; $line["reference"] = ""; for ($j = 3; $j < count($item); $j++) { if ($j > 3) { $line["reference"] .= "<br>"; } $line["reference"] .= $item[$j]; } $bleu[] = $line; } } # sort and label additional sentences as filtered global $sort; function cmp($a, $b) { global $sort; if ($sort == "order") { $a_idx = $a["id"]; $b_idx = $b["id"]; } else { if ($sort == "worst" || $sort == "75") { $a_idx = $a["bleu"]; $b_idx = $b["bleu"]; if ($a_idx == $b_idx) { $a_idx = $b["id"]; $b_idx = $a["id"]; } } else { if ($sort == "best" || $sort == "avg" || $sort == "25") { $a_idx = -$a["bleu"]; $b_idx = -$b["bleu"]; if ($a_idx == $b_idx) { $a_idx = $a["id"]; $b_idx = $b["id"]; } } } } if ($a_idx == $b_idx) { return 0; } return $a_idx < $b_idx ? -1 : 1; } $sort = $_GET['sort']; if ($sort == '') { $sort = "order"; } usort($bleu, 'cmp'); $offset = 0; if ($sort == "25" || $sort == "75") { $offset = (int) (count($bleu) / 4); } else { if ($sort == "avg") { $offset = (int) (count($bleu) / 2); } } $retained = array(); for ($i = $offset; $i < $count + $offset && $i < count($bleu); $i++) { $line = $bleu[$i]; $retained[$line["id"]] = 1; } # get segmentation (phrase alignment) $file = get_current_analysis_filename("basic", "segmentation-annotation"); if (file_exists($file)) { $data = file($file); for ($i = 0; $i < count($data); $i++) { if ($filter == "" || array_key_exists($i, $retained)) { $segment = 0; foreach (split(" ", $data[$i]) as $item) { list($in_start, $in_end, $out_start, $out_end) = split(":", $item); $segment++; $segmentation[$i]["input_start"][$in_start] = $segment; $segmentation[$i]["input_end"][$in_end] = $segment; $segmentation[$i]["output_start"][$out_start] = $segment; $segmentation[$i]["output_end"][$out_end + 0] = $segment; } } } } # get hierarchical data $hierarchical = 0; $file = get_current_analysis_filename("basic", "input-tree"); if (file_exists($file)) { $data = file($file); $span = 0; $last_sentence = -1; $nt_count = array(); for ($i = 0; $i < count($data); $i++) { list($sentence, $brackets, $nt, $words) = split("\t", $data[$i]); if ($sentence != $last_sentence) { $span = 0; } $last_sentence = $sentence; if (array_key_exists($sentence, $retained)) { $segmentation[$sentence][$span]["brackets"] = $brackets; # $segmentation[$sentence][$span]["nt"] = $nt; $segmentation[$sentence][$span]["words"] = rtrim($words); if ($nt != "") { $nt_count[$nt] = 1; } $span++; } } $hierarchical = 1; # if (count($nt_count) <= 2) { # foreach ($segmentation as $sentence => $segmentation_span) { # foreach ($segmentation_span as $span => $type) { # $segmentation[$sentence][$span]["nt"]=""; # } # } # } } $file = get_current_analysis_filename("basic", "output-tree"); if (file_exists($file)) { $data = file($file); $span = 0; $last_sentence = -1; $nt_count = array(); for ($i = 0; $i < count($data); $i++) { list($sentence, $brackets, $nt, $words) = split("\t", $data[$i]); if ($sentence != $last_sentence) { $span = 0; } $last_sentence = $sentence; if (array_key_exists($sentence, $retained)) { $segmentation_out[$sentence][$span]["brackets"] = $brackets; $segmentation_out[$sentence][$span]["nt"] = $nt; $segmentation_out[$sentence][$span]["words"] = rtrim($words); if ($nt != "") { $nt_count[$nt] = 1; } $span++; } } # no non-terminal markup, if there are two or less non-terminals (X,S) if (count($nt_count) <= 2) { foreach ($segmentation_out as $sentence => $segmentation_span) { foreach ($segmentation_span as $span => $type) { $segmentation_out[$sentence][$span]["nt"] = ""; } } } } $file = get_current_analysis_filename("basic", "node"); if (file_exists($file)) { $data = file($file); $n = 0; $last_sentence = -1; for ($i = 0; $i < count($data); $i++) { list($sentence, $depth, $start_div, $end_div, $start_div_in, $end_div_in, $children) = split(" ", $data[$i]); if ($sentence != $last_sentence) { $n = 0; } $last_sentence = $sentence; if (array_key_exists($sentence, $retained)) { $node[$sentence][$n]['depth'] = $depth; $node[$sentence][$n]['start_div'] = $start_div; $node[$sentence][$n]['end_div'] = $end_div; $node[$sentence][$n]['start_div_in'] = $start_div_in; $node[$sentence][$n]['end_div_in'] = $end_div_in; $node[$sentence][$n]['children'] = rtrim($children); $n++; } } } # display if ($filter != "") { print " (" . (count($input) - count($filtered)) . " retaining)"; } print "</font><BR>\n"; $biconcor = get_biconcor_version($dir, $set, $id); //print "<div id=\"debug\">$sort / $offset</div>"; for ($i = $offset; $i < $count + $offset && $i < count($bleu); $i++) { $line = $bleu[$i]; $search_graph_dir = get_current_analysis_filename("basic", "search-graph"); if (file_exists($search_graph_dir) && file_exists($search_graph_dir . "/graph." . $line["id"])) { $state = return_state_for_link(); print "<FONT SIZE=-1><A TARGET=_blank HREF=\"?{$state}&analysis=sgviz&set={$set}&id={$id}&sentence=" . $line["id"] . "\">show search graph</a></FONT><br>\n"; } if ($hierarchical) { annotation_hierarchical($line["id"], $segmentation[$line["id"]], $segmentation_out[$line["id"]], $node[$line["id"]]); } if ($input) { print "<div id=\"info-" . $line["id"] . "\" style=\"border-color:black; background:#ffff80; opacity:0; width:100%; border:1px;\">0 occ. in corpus, 0 translations, entropy: 0.00</div>\n"; if ($biconcor) { print "<div id=\"biconcor-" . $line["id"] . "\" class=\"biconcor\"><font size=-2>(click on input phrase for bilingual concordancer)</font></div>"; } if ($hierarchical) { sentence_annotation_hierarchical("#" . $line["id"], $line["id"], $input[$line["id"]], $segmentation[$line["id"]], "in"); } else { print "<font size=-2>[#" . $line["id"] . "]</font> "; input_annotation($line["id"], $input[$line["id"]], $segmentation[$line["id"]], $filter); } } //else { // print "<font size=-2>[".$line["id"].":".$line["bleu"]."]</font> "; //} if ($hierarchical) { sentence_annotation_hierarchical($line["bleu"], $line["id"], $line["system"], $segmentation_out[$line["id"]], "out"); } else { print "<font size=-2>[" . $line["bleu"] . "]</font> "; output_annotation($line["id"], $line["system"], $segmentation[$line["id"]]); } print "<br><font size=-2>[ref]</font> " . $line["reference"] . "<hr>"; } }
function ngram_summary_diff() { global $experiment, $evalset, $dir, $set, $id, $id2; // load data for ($idx = 0; $idx < 2; $idx++) { $data = file(get_analysis_filename($dir, $set, $idx ? $id2 : $id, "basic", "summary")); for ($i = 0; $i < count($data); $i++) { $item = split(": ", $data[$i]); $info[$idx][$item[0]] = $item[1]; } } print "<table cellspacing=5 width=100%><tr><td valign=top align=center bgcolor=#eeeeee>"; print "<b>Precision of Output</b><br>"; //foreach (array("precision","recall") as $type) { $type = "precision"; print "<table><tr><td>{$type}</td><td>1-gram</td><td>2-gram</td><td>3-gram</td><td>4-gram</td></tr>\n"; printf("<tr><td>correct</td><td>%d (%+d)</td><td>%d (%+d)</td><td>%d (%+d)</td><td>%d (%+d)</td></tr>\n", $info[1]["{$type}-1-correct"], $info[1]["{$type}-1-correct"] - $info[0]["{$type}-1-correct"], $info[1]["{$type}-2-correct"], $info[1]["{$type}-2-correct"] - $info[0]["{$type}-2-correct"], $info[1]["{$type}-3-correct"], $info[1]["{$type}-3-correct"] - $info[0]["{$type}-3-correct"], $info[1]["{$type}-4-correct"], $info[1]["{$type}-4-correct"] - $info[0]["{$type}-4-correct"]); printf("<tr><td> </td><td>%.1f%s (%+.1f%s)</td><td>%.1f%s (%+.1f%s)</td><td>%.1f%s (%+.1f%s)</td><td>%.1f%s (%+.1f%s)</td></tr>\n", $info[1]["{$type}-1-correct"] / $info[1]["{$type}-1-total"] * 100, '%', $info[1]["{$type}-1-correct"] / $info[1]["{$type}-1-total"] * 100 - $info[0]["{$type}-1-correct"] / $info[0]["{$type}-1-total"] * 100, '%', $info[1]["{$type}-2-correct"] / $info[1]["{$type}-2-total"] * 100, '%', $info[1]["{$type}-2-correct"] / $info[1]["{$type}-2-total"] * 100 - $info[0]["{$type}-2-correct"] / $info[0]["{$type}-2-total"] * 100, '%', $info[1]["{$type}-3-correct"] / $info[1]["{$type}-3-total"] * 100, '%', $info[1]["{$type}-3-correct"] / $info[1]["{$type}-3-total"] * 100 - $info[0]["{$type}-3-correct"] / $info[0]["{$type}-3-total"] * 100, '%', $info[1]["{$type}-4-correct"] / $info[1]["{$type}-4-total"] * 100, '%', $info[1]["{$type}-4-correct"] / $info[1]["{$type}-4-total"] * 100 - $info[0]["{$type}-4-correct"] / $info[0]["{$type}-4-total"] * 100, '%'); printf("<tr><td>wrong</td><td>%d (%+d)</td><td>%d (%+d)</td><td>%d (%+d)</td><td>%d (%+d)</td></tr>\n", $info[1]["{$type}-1-total"] - $info[1]["{$type}-1-correct"], $info[1]["{$type}-1-total"] - $info[1]["{$type}-1-correct"] - ($info[0]["{$type}-1-total"] - $info[0]["{$type}-1-correct"]), $info[1]["{$type}-2-total"] - $info[1]["{$type}-2-correct"], $info[1]["{$type}-2-total"] - $info[1]["{$type}-2-correct"] - ($info[0]["{$type}-2-total"] - $info[0]["{$type}-2-correct"]), $info[1]["{$type}-3-total"] - $info[1]["{$type}-3-correct"], $info[1]["{$type}-3-total"] - $info[1]["{$type}-3-correct"] - ($info[0]["{$type}-3-total"] - $info[0]["{$type}-3-correct"]), $info[1]["{$type}-4-total"] - $info[1]["{$type}-4-correct"], $info[1]["{$type}-4-total"] - $info[1]["{$type}-4-correct"] - ($info[0]["{$type}-4-total"] - $info[0]["{$type}-4-correct"])); print "</table>"; //} print "<A HREF=\"javascript:generic_show_diff('PrecisionRecallDetailsDiff','')\">details</A> "; if (file_exists(get_current_analysis_filename("precision", "precision-by-corpus-coverage")) && file_exists(get_current_analysis_filename2("precision", "precision-by-corpus-coverage"))) { print "| <A HREF=\"javascript:generic_show_diff('PrecisionByCoverageDiff','')\">precision of input by coverage</A> "; } print "</td><td valign=top align=center bgcolor=#eeeeee>"; print "<b>Metrics</b><br>\n"; for ($idx = 0; $idx < 2; $idx++) { $each_score = explode(" ; ", $experiment[$idx ? $id2 : $id]->result[$set]); for ($i = 0; $i < count($each_score); $i++) { if (preg_match('/([\\d\\(\\)\\.\\s]+) (BLEU[\\-c]*)/', $each_score[$i], $match) || preg_match('/([\\d\\(\\)\\.\\s]+) (IBM[\\-c]*)/', $each_score[$i], $match) || preg_match('/([\\d\\(\\)\\.\\s]+) (METEOR[\\-c]*)/', $each_score[$i], $match)) { $score[$match[2]][$idx] = $match[1]; } } } $header = ""; $score_line = ""; $diff_line = ""; foreach ($score as $name => $value) { $header .= "<td>{$name}</td>"; $score_line .= "<td>" . $score[$name][1] . "</td>"; $diff_line .= sprintf("<td>%+.2f</td>", $score[$name][1] - $score[$name][0]); } print "<table border=1><tr>" . $header . "</tr><tr>" . $score_line . "</tr><tr>" . $diff_line . "</tr></table>"; printf("length-diff<br>%d (%+d)", $info[1]["precision-1-total"] - $info[1]["recall-1-total"], $info[1]["precision-1-total"] - $info[0]["precision-1-total"]); print "</td><tr><table>"; }