Exemple #1
0
function sentence_annotation($count, $filter)
{
    global $set, $id, $dir, $biconcor;
    # get input
    $filtered = array();
    $file = get_current_analysis_filename("coverage", "input-annotation");
    if (file_exists($file)) {
        $input = file($file);
        # filter is so specified
        if ($filter != "") {
            for ($i = 0; $i < count($input); $i++) {
                $item = explode("\t", $input[$i]);
                $word = explode(" ", $item[0]);
                $keep = 0;
                for ($j = 0; $j < count($word); $j++) {
                    if ($word[$j] == $filter) {
                        $keep = 1;
                    }
                }
                if (!$keep) {
                    $filtered[$i] = 1;
                }
            }
        }
    }
    # load bleu scores
    $data = file(get_current_analysis_filename("basic", "bleu-annotation"));
    for ($i = 0; $i < count($data); $i++) {
        $item = split("\t", $data[$i]);
        if (!array_key_exists($item[1], $filtered)) {
            $line["bleu"] = $item[0];
            $line["id"] = $item[1];
            $line["system"] = $item[2];
            $line["reference"] = "";
            for ($j = 3; $j < count($item); $j++) {
                if ($j > 3) {
                    $line["reference"] .= "<br>";
                }
                $line["reference"] .= $item[$j];
            }
            $bleu[] = $line;
        }
    }
    # sort and label additional sentences as filtered
    global $sort;
    function cmp($a, $b)
    {
        global $sort;
        if ($sort == "order") {
            $a_idx = $a["id"];
            $b_idx = $b["id"];
        } else {
            if ($sort == "worst" || $sort == "75") {
                $a_idx = $a["bleu"];
                $b_idx = $b["bleu"];
                if ($a_idx == $b_idx) {
                    $a_idx = $b["id"];
                    $b_idx = $a["id"];
                }
            } else {
                if ($sort == "best" || $sort == "avg" || $sort == "25") {
                    $a_idx = -$a["bleu"];
                    $b_idx = -$b["bleu"];
                    if ($a_idx == $b_idx) {
                        $a_idx = $a["id"];
                        $b_idx = $b["id"];
                    }
                }
            }
        }
        if ($a_idx == $b_idx) {
            return 0;
        }
        return $a_idx < $b_idx ? -1 : 1;
    }
    $sort = $_GET['sort'];
    if ($sort == '') {
        $sort = "order";
    }
    usort($bleu, 'cmp');
    $offset = 0;
    if ($sort == "25" || $sort == "75") {
        $offset = (int) (count($bleu) / 4);
    } else {
        if ($sort == "avg") {
            $offset = (int) (count($bleu) / 2);
        }
    }
    $retained = array();
    for ($i = $offset; $i < $count + $offset && $i < count($bleu); $i++) {
        $line = $bleu[$i];
        $retained[$line["id"]] = 1;
    }
    # get segmentation (phrase alignment)
    $file = get_current_analysis_filename("basic", "segmentation-annotation");
    if (file_exists($file)) {
        $data = file($file);
        for ($i = 0; $i < count($data); $i++) {
            if ($filter == "" || array_key_exists($i, $retained)) {
                $segment = 0;
                foreach (split(" ", $data[$i]) as $item) {
                    list($in_start, $in_end, $out_start, $out_end) = split(":", $item);
                    $segment++;
                    $segmentation[$i]["input_start"][$in_start] = $segment;
                    $segmentation[$i]["input_end"][$in_end] = $segment;
                    $segmentation[$i]["output_start"][$out_start] = $segment;
                    $segmentation[$i]["output_end"][$out_end + 0] = $segment;
                }
            }
        }
    }
    # get hierarchical data
    $hierarchical = 0;
    $file = get_current_analysis_filename("basic", "input-tree");
    if (file_exists($file)) {
        $data = file($file);
        $span = 0;
        $last_sentence = -1;
        $nt_count = array();
        for ($i = 0; $i < count($data); $i++) {
            list($sentence, $brackets, $nt, $words) = split("\t", $data[$i]);
            if ($sentence != $last_sentence) {
                $span = 0;
            }
            $last_sentence = $sentence;
            if (array_key_exists($sentence, $retained)) {
                $segmentation[$sentence][$span]["brackets"] = $brackets;
                #	  $segmentation[$sentence][$span]["nt"] = $nt;
                $segmentation[$sentence][$span]["words"] = rtrim($words);
                if ($nt != "") {
                    $nt_count[$nt] = 1;
                }
                $span++;
            }
        }
        $hierarchical = 1;
        #      if (count($nt_count) <= 2) {
        #	  foreach ($segmentation as $sentence => $segmentation_span) {
        #	      foreach ($segmentation_span as $span => $type) {
        #		  $segmentation[$sentence][$span]["nt"]="";
        #	      }
        #	  }
        #     }
    }
    $file = get_current_analysis_filename("basic", "output-tree");
    if (file_exists($file)) {
        $data = file($file);
        $span = 0;
        $last_sentence = -1;
        $nt_count = array();
        for ($i = 0; $i < count($data); $i++) {
            list($sentence, $brackets, $nt, $words) = split("\t", $data[$i]);
            if ($sentence != $last_sentence) {
                $span = 0;
            }
            $last_sentence = $sentence;
            if (array_key_exists($sentence, $retained)) {
                $segmentation_out[$sentence][$span]["brackets"] = $brackets;
                $segmentation_out[$sentence][$span]["nt"] = $nt;
                $segmentation_out[$sentence][$span]["words"] = rtrim($words);
                if ($nt != "") {
                    $nt_count[$nt] = 1;
                }
                $span++;
            }
        }
        # no non-terminal markup, if there are two or less non-terminals (X,S)
        if (count($nt_count) <= 2) {
            foreach ($segmentation_out as $sentence => $segmentation_span) {
                foreach ($segmentation_span as $span => $type) {
                    $segmentation_out[$sentence][$span]["nt"] = "";
                }
            }
        }
    }
    $file = get_current_analysis_filename("basic", "node");
    if (file_exists($file)) {
        $data = file($file);
        $n = 0;
        $last_sentence = -1;
        for ($i = 0; $i < count($data); $i++) {
            list($sentence, $depth, $start_div, $end_div, $start_div_in, $end_div_in, $children) = split(" ", $data[$i]);
            if ($sentence != $last_sentence) {
                $n = 0;
            }
            $last_sentence = $sentence;
            if (array_key_exists($sentence, $retained)) {
                $node[$sentence][$n]['depth'] = $depth;
                $node[$sentence][$n]['start_div'] = $start_div;
                $node[$sentence][$n]['end_div'] = $end_div;
                $node[$sentence][$n]['start_div_in'] = $start_div_in;
                $node[$sentence][$n]['end_div_in'] = $end_div_in;
                $node[$sentence][$n]['children'] = rtrim($children);
                $n++;
            }
        }
    }
    # display
    if ($filter != "") {
        print " (" . (count($input) - count($filtered)) . " retaining)";
    }
    print "</font><BR>\n";
    $biconcor = get_biconcor_version($dir, $set, $id);
    //print "<div id=\"debug\">$sort / $offset</div>";
    for ($i = $offset; $i < $count + $offset && $i < count($bleu); $i++) {
        $line = $bleu[$i];
        $search_graph_dir = get_current_analysis_filename("basic", "search-graph");
        if (file_exists($search_graph_dir) && file_exists($search_graph_dir . "/graph." . $line["id"])) {
            $state = return_state_for_link();
            print "<FONT SIZE=-1><A TARGET=_blank HREF=\"?{$state}&analysis=sgviz&set={$set}&id={$id}&sentence=" . $line["id"] . "\">show search graph</a></FONT><br>\n";
        }
        if ($hierarchical) {
            annotation_hierarchical($line["id"], $segmentation[$line["id"]], $segmentation_out[$line["id"]], $node[$line["id"]]);
        }
        if ($input) {
            print "<div id=\"info-" . $line["id"] . "\" style=\"border-color:black; background:#ffff80; opacity:0; width:100%; border:1px;\">0 occ. in corpus, 0 translations, entropy: 0.00</div>\n";
            if ($biconcor) {
                print "<div id=\"biconcor-" . $line["id"] . "\" class=\"biconcor\"><font size=-2>(click on input phrase for bilingual concordancer)</font></div>";
            }
            if ($hierarchical) {
                sentence_annotation_hierarchical("#" . $line["id"], $line["id"], $input[$line["id"]], $segmentation[$line["id"]], "in");
            } else {
                print "<font size=-2>[#" . $line["id"] . "]</font> ";
                input_annotation($line["id"], $input[$line["id"]], $segmentation[$line["id"]], $filter);
            }
        }
        //else {
        // print "<font size=-2>[".$line["id"].":".$line["bleu"]."]</font> ";
        //}
        if ($hierarchical) {
            sentence_annotation_hierarchical($line["bleu"], $line["id"], $line["system"], $segmentation_out[$line["id"]], "out");
        } else {
            print "<font size=-2>[" . $line["bleu"] . "]</font> ";
            output_annotation($line["id"], $line["system"], $segmentation[$line["id"]]);
        }
        print "<br><font size=-2>[ref]</font> " . $line["reference"] . "<hr>";
    }
}
Exemple #2
0
function output_score($id, $info)
{
    global $evalset;
    global $has_analysis;
    global $setup;
    global $dir;
    reset($evalset);
    $state = return_state_for_link();
    while (list($set, $dummy) = each($evalset)) {
        if (property_exists($info, "result") && array_key_exists($set, $info->result)) {
            $score = $info->result[$set];
        } else {
            $score = "";
        }
        print "<td align=center id=\"score-{$id}-{$set}\">";
        // print "<table><tr><td>";
        $each_score = explode(" ; ", $score);
        for ($i = 0; $i < count($each_score); $i++) {
            if (preg_match('/([\\d\\(\\)\\.\\s]+) (BLEU[\\-c]*)/', $each_score[$i], $match) || preg_match('/([\\d\\(\\)\\.\\s]+) (IBM[\\-c]*)/', $each_score[$i], $match) || preg_match('/([\\d\\(\\)\\.\\s]+) (METEOR[\\-c]*)/', $each_score[$i], $match)) {
                if ($i > 0) {
                    print "<BR>";
                }
                $opened_a_tag = 0;
                if ($set != "avg") {
                    if (file_exists("{$dir}/evaluation/{$set}.cleaned.{$id}")) {
                        print "<a href=\"?{$state}&show=evaluation/{$set}.cleaned.{$id}\">";
                        $opened_a_tag = 1;
                    } else {
                        if (file_exists("{$dir}/evaluation/{$set}.output.{$id}")) {
                            print "<a href=\"?{$state}&show=evaluation/{$set}.output.{$id}\">";
                            $opened_a_tag = 1;
                        }
                    }
                }
                if ($set == "avg" && count($each_score) > 1) {
                    print $match[2] . ": ";
                }
                print $match[1];
                if ($opened_a_tag) {
                    print "</a>";
                }
            } else {
                print "-";
            }
        }
        print "</td>";
        if ($has_analysis && array_key_exists($set, $has_analysis)) {
            print "<td align=center>";
            global $dir;
            $analysis = get_analysis_version($dir, $set, $id);
            if ($analysis["basic"]) {
                print "<a href=\"?analysis=show&setup={$setup}&set={$set}&id={$id}\">&#x24B6;</a> <input type=checkbox name=analysis-{$id}-{$set} value=1>";
            }
            print "</td>";
        }
    }
}