Beispiel #1
0
function sgviz_data($sentence)
{
    header('Content-type: application/json');
    $file = get_current_analysis_filename("basic", "search-graph") . "/graph.{$sentence}";
    $handle = fopen($file, "r");
    while (($line = fgets($handle)) !== false) {
        $e = explode("\t", addslashes(chop($line)));
        $edge[$e[0]] = array($e[1], $e[2], $e[3], $e[4], $e[5], $e[6], $e[7], $e[8], $e[9], $e[10]);
    }
    $return['edge'] = $edge;
    print json_encode($return);
    exit;
}
Beispiel #2
0
function sentence_annotation($count, $filter)
{
    global $set, $id, $dir, $biconcor;
    # get input
    $filtered = array();
    $file = get_current_analysis_filename("coverage", "input-annotation");
    if (file_exists($file)) {
        $input = file($file);
        # filter is so specified
        if ($filter != "") {
            for ($i = 0; $i < count($input); $i++) {
                $item = explode("\t", $input[$i]);
                $word = explode(" ", $item[0]);
                $keep = 0;
                for ($j = 0; $j < count($word); $j++) {
                    if ($word[$j] == $filter) {
                        $keep = 1;
                    }
                }
                if (!$keep) {
                    $filtered[$i] = 1;
                }
            }
        }
    }
    # load bleu scores
    $data = file(get_current_analysis_filename("basic", "bleu-annotation"));
    for ($i = 0; $i < count($data); $i++) {
        $item = split("\t", $data[$i]);
        if (!array_key_exists($item[1], $filtered)) {
            $line["bleu"] = $item[0];
            $line["id"] = $item[1];
            $line["system"] = $item[2];
            $line["reference"] = "";
            for ($j = 3; $j < count($item); $j++) {
                if ($j > 3) {
                    $line["reference"] .= "<br>";
                }
                $line["reference"] .= $item[$j];
            }
            $bleu[] = $line;
        }
    }
    # sort and label additional sentences as filtered
    global $sort;
    function cmp($a, $b)
    {
        global $sort;
        if ($sort == "order") {
            $a_idx = $a["id"];
            $b_idx = $b["id"];
        } else {
            if ($sort == "worst" || $sort == "75") {
                $a_idx = $a["bleu"];
                $b_idx = $b["bleu"];
                if ($a_idx == $b_idx) {
                    $a_idx = $b["id"];
                    $b_idx = $a["id"];
                }
            } else {
                if ($sort == "best" || $sort == "avg" || $sort == "25") {
                    $a_idx = -$a["bleu"];
                    $b_idx = -$b["bleu"];
                    if ($a_idx == $b_idx) {
                        $a_idx = $a["id"];
                        $b_idx = $b["id"];
                    }
                }
            }
        }
        if ($a_idx == $b_idx) {
            return 0;
        }
        return $a_idx < $b_idx ? -1 : 1;
    }
    $sort = $_GET['sort'];
    if ($sort == '') {
        $sort = "order";
    }
    usort($bleu, 'cmp');
    $offset = 0;
    if ($sort == "25" || $sort == "75") {
        $offset = (int) (count($bleu) / 4);
    } else {
        if ($sort == "avg") {
            $offset = (int) (count($bleu) / 2);
        }
    }
    $retained = array();
    for ($i = $offset; $i < $count + $offset && $i < count($bleu); $i++) {
        $line = $bleu[$i];
        $retained[$line["id"]] = 1;
    }
    # get segmentation (phrase alignment)
    $file = get_current_analysis_filename("basic", "segmentation-annotation");
    if (file_exists($file)) {
        $data = file($file);
        for ($i = 0; $i < count($data); $i++) {
            if ($filter == "" || array_key_exists($i, $retained)) {
                $segment = 0;
                foreach (split(" ", $data[$i]) as $item) {
                    list($in_start, $in_end, $out_start, $out_end) = split(":", $item);
                    $segment++;
                    $segmentation[$i]["input_start"][$in_start] = $segment;
                    $segmentation[$i]["input_end"][$in_end] = $segment;
                    $segmentation[$i]["output_start"][$out_start] = $segment;
                    $segmentation[$i]["output_end"][$out_end + 0] = $segment;
                }
            }
        }
    }
    # get hierarchical data
    $hierarchical = 0;
    $file = get_current_analysis_filename("basic", "input-tree");
    if (file_exists($file)) {
        $data = file($file);
        $span = 0;
        $last_sentence = -1;
        $nt_count = array();
        for ($i = 0; $i < count($data); $i++) {
            list($sentence, $brackets, $nt, $words) = split("\t", $data[$i]);
            if ($sentence != $last_sentence) {
                $span = 0;
            }
            $last_sentence = $sentence;
            if (array_key_exists($sentence, $retained)) {
                $segmentation[$sentence][$span]["brackets"] = $brackets;
                #	  $segmentation[$sentence][$span]["nt"] = $nt;
                $segmentation[$sentence][$span]["words"] = rtrim($words);
                if ($nt != "") {
                    $nt_count[$nt] = 1;
                }
                $span++;
            }
        }
        $hierarchical = 1;
        #      if (count($nt_count) <= 2) {
        #	  foreach ($segmentation as $sentence => $segmentation_span) {
        #	      foreach ($segmentation_span as $span => $type) {
        #		  $segmentation[$sentence][$span]["nt"]="";
        #	      }
        #	  }
        #     }
    }
    $file = get_current_analysis_filename("basic", "output-tree");
    if (file_exists($file)) {
        $data = file($file);
        $span = 0;
        $last_sentence = -1;
        $nt_count = array();
        for ($i = 0; $i < count($data); $i++) {
            list($sentence, $brackets, $nt, $words) = split("\t", $data[$i]);
            if ($sentence != $last_sentence) {
                $span = 0;
            }
            $last_sentence = $sentence;
            if (array_key_exists($sentence, $retained)) {
                $segmentation_out[$sentence][$span]["brackets"] = $brackets;
                $segmentation_out[$sentence][$span]["nt"] = $nt;
                $segmentation_out[$sentence][$span]["words"] = rtrim($words);
                if ($nt != "") {
                    $nt_count[$nt] = 1;
                }
                $span++;
            }
        }
        # no non-terminal markup, if there are two or less non-terminals (X,S)
        if (count($nt_count) <= 2) {
            foreach ($segmentation_out as $sentence => $segmentation_span) {
                foreach ($segmentation_span as $span => $type) {
                    $segmentation_out[$sentence][$span]["nt"] = "";
                }
            }
        }
    }
    $file = get_current_analysis_filename("basic", "node");
    if (file_exists($file)) {
        $data = file($file);
        $n = 0;
        $last_sentence = -1;
        for ($i = 0; $i < count($data); $i++) {
            list($sentence, $depth, $start_div, $end_div, $start_div_in, $end_div_in, $children) = split(" ", $data[$i]);
            if ($sentence != $last_sentence) {
                $n = 0;
            }
            $last_sentence = $sentence;
            if (array_key_exists($sentence, $retained)) {
                $node[$sentence][$n]['depth'] = $depth;
                $node[$sentence][$n]['start_div'] = $start_div;
                $node[$sentence][$n]['end_div'] = $end_div;
                $node[$sentence][$n]['start_div_in'] = $start_div_in;
                $node[$sentence][$n]['end_div_in'] = $end_div_in;
                $node[$sentence][$n]['children'] = rtrim($children);
                $n++;
            }
        }
    }
    # display
    if ($filter != "") {
        print " (" . (count($input) - count($filtered)) . " retaining)";
    }
    print "</font><BR>\n";
    $biconcor = get_biconcor_version($dir, $set, $id);
    //print "<div id=\"debug\">$sort / $offset</div>";
    for ($i = $offset; $i < $count + $offset && $i < count($bleu); $i++) {
        $line = $bleu[$i];
        $search_graph_dir = get_current_analysis_filename("basic", "search-graph");
        if (file_exists($search_graph_dir) && file_exists($search_graph_dir . "/graph." . $line["id"])) {
            $state = return_state_for_link();
            print "<FONT SIZE=-1><A TARGET=_blank HREF=\"?{$state}&analysis=sgviz&set={$set}&id={$id}&sentence=" . $line["id"] . "\">show search graph</a></FONT><br>\n";
        }
        if ($hierarchical) {
            annotation_hierarchical($line["id"], $segmentation[$line["id"]], $segmentation_out[$line["id"]], $node[$line["id"]]);
        }
        if ($input) {
            print "<div id=\"info-" . $line["id"] . "\" style=\"border-color:black; background:#ffff80; opacity:0; width:100%; border:1px;\">0 occ. in corpus, 0 translations, entropy: 0.00</div>\n";
            if ($biconcor) {
                print "<div id=\"biconcor-" . $line["id"] . "\" class=\"biconcor\"><font size=-2>(click on input phrase for bilingual concordancer)</font></div>";
            }
            if ($hierarchical) {
                sentence_annotation_hierarchical("#" . $line["id"], $line["id"], $input[$line["id"]], $segmentation[$line["id"]], "in");
            } else {
                print "<font size=-2>[#" . $line["id"] . "]</font> ";
                input_annotation($line["id"], $input[$line["id"]], $segmentation[$line["id"]], $filter);
            }
        }
        //else {
        // print "<font size=-2>[".$line["id"].":".$line["bleu"]."]</font> ";
        //}
        if ($hierarchical) {
            sentence_annotation_hierarchical($line["bleu"], $line["id"], $line["system"], $segmentation_out[$line["id"]], "out");
        } else {
            print "<font size=-2>[" . $line["bleu"] . "]</font> ";
            output_annotation($line["id"], $line["system"], $segmentation[$line["id"]]);
        }
        print "<br><font size=-2>[ref]</font> " . $line["reference"] . "<hr>";
    }
}
function ngram_summary_diff()
{
    global $experiment, $evalset, $dir, $set, $id, $id2;
    // load data
    for ($idx = 0; $idx < 2; $idx++) {
        $data = file(get_analysis_filename($dir, $set, $idx ? $id2 : $id, "basic", "summary"));
        for ($i = 0; $i < count($data); $i++) {
            $item = split(": ", $data[$i]);
            $info[$idx][$item[0]] = $item[1];
        }
    }
    print "<table cellspacing=5 width=100%><tr><td valign=top align=center bgcolor=#eeeeee>";
    print "<b>Precision of Output</b><br>";
    //foreach (array("precision","recall") as $type) {
    $type = "precision";
    print "<table><tr><td>{$type}</td><td>1-gram</td><td>2-gram</td><td>3-gram</td><td>4-gram</td></tr>\n";
    printf("<tr><td>correct</td><td>%d (%+d)</td><td>%d (%+d)</td><td>%d (%+d)</td><td>%d (%+d)</td></tr>\n", $info[1]["{$type}-1-correct"], $info[1]["{$type}-1-correct"] - $info[0]["{$type}-1-correct"], $info[1]["{$type}-2-correct"], $info[1]["{$type}-2-correct"] - $info[0]["{$type}-2-correct"], $info[1]["{$type}-3-correct"], $info[1]["{$type}-3-correct"] - $info[0]["{$type}-3-correct"], $info[1]["{$type}-4-correct"], $info[1]["{$type}-4-correct"] - $info[0]["{$type}-4-correct"]);
    printf("<tr><td>&nbsp;</td><td>%.1f%s (%+.1f%s)</td><td>%.1f%s (%+.1f%s)</td><td>%.1f%s (%+.1f%s)</td><td>%.1f%s (%+.1f%s)</td></tr>\n", $info[1]["{$type}-1-correct"] / $info[1]["{$type}-1-total"] * 100, '%', $info[1]["{$type}-1-correct"] / $info[1]["{$type}-1-total"] * 100 - $info[0]["{$type}-1-correct"] / $info[0]["{$type}-1-total"] * 100, '%', $info[1]["{$type}-2-correct"] / $info[1]["{$type}-2-total"] * 100, '%', $info[1]["{$type}-2-correct"] / $info[1]["{$type}-2-total"] * 100 - $info[0]["{$type}-2-correct"] / $info[0]["{$type}-2-total"] * 100, '%', $info[1]["{$type}-3-correct"] / $info[1]["{$type}-3-total"] * 100, '%', $info[1]["{$type}-3-correct"] / $info[1]["{$type}-3-total"] * 100 - $info[0]["{$type}-3-correct"] / $info[0]["{$type}-3-total"] * 100, '%', $info[1]["{$type}-4-correct"] / $info[1]["{$type}-4-total"] * 100, '%', $info[1]["{$type}-4-correct"] / $info[1]["{$type}-4-total"] * 100 - $info[0]["{$type}-4-correct"] / $info[0]["{$type}-4-total"] * 100, '%');
    printf("<tr><td>wrong</td><td>%d (%+d)</td><td>%d (%+d)</td><td>%d (%+d)</td><td>%d (%+d)</td></tr>\n", $info[1]["{$type}-1-total"] - $info[1]["{$type}-1-correct"], $info[1]["{$type}-1-total"] - $info[1]["{$type}-1-correct"] - ($info[0]["{$type}-1-total"] - $info[0]["{$type}-1-correct"]), $info[1]["{$type}-2-total"] - $info[1]["{$type}-2-correct"], $info[1]["{$type}-2-total"] - $info[1]["{$type}-2-correct"] - ($info[0]["{$type}-2-total"] - $info[0]["{$type}-2-correct"]), $info[1]["{$type}-3-total"] - $info[1]["{$type}-3-correct"], $info[1]["{$type}-3-total"] - $info[1]["{$type}-3-correct"] - ($info[0]["{$type}-3-total"] - $info[0]["{$type}-3-correct"]), $info[1]["{$type}-4-total"] - $info[1]["{$type}-4-correct"], $info[1]["{$type}-4-total"] - $info[1]["{$type}-4-correct"] - ($info[0]["{$type}-4-total"] - $info[0]["{$type}-4-correct"]));
    print "</table>";
    //}
    print "<A HREF=\"javascript:generic_show_diff('PrecisionRecallDetailsDiff','')\">details</A> ";
    if (file_exists(get_current_analysis_filename("precision", "precision-by-corpus-coverage")) && file_exists(get_current_analysis_filename2("precision", "precision-by-corpus-coverage"))) {
        print "| <A HREF=\"javascript:generic_show_diff('PrecisionByCoverageDiff','')\">precision of input by coverage</A> ";
    }
    print "</td><td valign=top align=center bgcolor=#eeeeee>";
    print "<b>Metrics</b><br>\n";
    for ($idx = 0; $idx < 2; $idx++) {
        $each_score = explode(" ; ", $experiment[$idx ? $id2 : $id]->result[$set]);
        for ($i = 0; $i < count($each_score); $i++) {
            if (preg_match('/([\\d\\(\\)\\.\\s]+) (BLEU[\\-c]*)/', $each_score[$i], $match) || preg_match('/([\\d\\(\\)\\.\\s]+) (IBM[\\-c]*)/', $each_score[$i], $match) || preg_match('/([\\d\\(\\)\\.\\s]+) (METEOR[\\-c]*)/', $each_score[$i], $match)) {
                $score[$match[2]][$idx] = $match[1];
            }
        }
    }
    $header = "";
    $score_line = "";
    $diff_line = "";
    foreach ($score as $name => $value) {
        $header .= "<td>{$name}</td>";
        $score_line .= "<td>" . $score[$name][1] . "</td>";
        $diff_line .= sprintf("<td>%+.2f</td>", $score[$name][1] - $score[$name][0]);
    }
    print "<table border=1><tr>" . $header . "</tr><tr>" . $score_line . "</tr><tr>" . $diff_line . "</tr></table>";
    printf("length-diff<br>%d (%+d)", $info[1]["precision-1-total"] - $info[1]["recall-1-total"], $info[1]["precision-1-total"] - $info[0]["precision-1-total"]);
    print "</td><tr><table>";
}