Ejemplo n.º 1
0
function EVAL_MAIN()
{
    ///////////////////////////////////
    $agg_1 = fopen("agg_precision.txt", "a");
    $agg_2 = fopen("agg_recall.txt", "a");
    $agg_3 = fopen("agg_precision_at_10.txt", "a");
    $agg_4 = fopen("agg_f_measure.txt", "a");
    $agg_5 = fopen("agg_rel_average_precision.txt", "a");
    ///////////////////////////////////
    ///////////////////
    $agg_precision = 0;
    $agg_recall = 0;
    $agg_precision_at_10 = 0;
    $agg_f_measure = 0;
    $agg_rel_average_precision = 0;
    ///////////////////
    global $query_meta;
    global $url_array;
    include 'global_vars_insert.php';
    $_POST["query"] = $query_meta;
    $_POST['Case_Folding'] = 'Case_Folding_On';
    $_POST['Synonyms'] = 'Synonyms_On';
    $_POST['Stemming'] = 'Stemming_On';
    include 'preprocessor.php';
    //run query through preprocessor
    /*
    $bing_query = $query_meta;
    $entireweb_query = $query_meta;
    $blekko_query = $query_meta;
    */
    echo 'QUERY = ' . $bing_query . '<br>';
    $google_url_array = GOOGLE_RESULTS();
    foreach ($google_url_array as $key => $link) {
        echo $key + 1 . '<br>';
        echo $link . '<br>';
    }
    get_initial_results();
    calculate_collection_weights();
    calculate_engine_weights();
    calculate_merged_pages_scores();
    usort($url_array, "cmp");
    //sort array from high to low
    $t = 1;
    foreach ($url_array as $key => $val) {
        echo $t . '<br>';
        echo 'TOTAL WEIGHT = ' . $val["weight"] . '<br>';
        echo '<a href="' . $val["url"] . '">' . $val["title"] . '</a>' . '<br>';
        echo $val["description"] . '<br>';
        echo $val["url"] . '<br><br>';
        $t += 1;
        if ($t > 100) {
            break;
        }
    }
    /////////////////////////////////////////////////////
    $agg_num_rel_docs = 0;
    $rank = 1;
    $total_docs = 0;
    //take into account that the results will not always be 100
    //for example, when I stem 'michworks' to 'michwork,' this only
    //ends up with 60 results, not 100
    if (count($url_array) < 100) {
        $total_docs = count($url_array);
    } else {
        $total_docs = 100;
    }
    echo '<br>TOTAL DOCS = ' . $total_docs . '<br>';
    foreach ($url_array as $key => $val) {
        if (in_array($val["url"], $google_url_array)) {
            $agg_num_rel_docs++;
            $agg_rel_average_precision += $agg_num_rel_docs / $rank;
            echo $agg_num_rel_docs . '/' . $rank . '<br>';
        }
        if ($rank == 10) {
            $agg_precision_at_10 = $agg_num_rel_docs / 10;
        } else {
            if ($rank == 100) {
                break;
            }
        }
        $rank++;
    }
    $agg_precision = $agg_num_rel_docs / $total_docs;
    $agg_recall = $agg_num_rel_docs / 100;
    $agg_f_measure = 2 * $agg_precision * $agg_recall / ($agg_precision + $agg_recall);
    if ($agg_precision + $agg_recall == 0) {
        $agg_f_measure = 0.0;
    }
    $agg_rel_average_precision = $agg_rel_average_precision / 100;
    echo 'agg Precision = ' . $agg_precision . '<br>';
    fwrite($agg_1, $agg_precision . "\n");
    echo 'agg Recall = ' . $agg_recall . '<br>';
    fwrite($agg_2, $agg_recall . "\n");
    echo 'agg Precision at 10 = ' . $agg_precision_at_10 . '<br>';
    fwrite($agg_3, $agg_precision_at_10 . "\n");
    echo 'agg F-Measure = ' . $agg_f_measure . '<br>';
    fwrite($agg_4, $agg_f_measure . "\n");
    echo 'agg Rel Ave Precision TOTAL = ' . $agg_rel_average_precision . '<br>';
    fwrite($agg_5, $agg_rel_average_precision . "\n");
    ////////////////////////////////////////////////////
}
Ejemplo n.º 2
0
function EVAL_MAIN()
{
    ///////////////////////////////////
    $bing_1 = fopen("bing_precision.txt", "a");
    $bing_2 = fopen("bing_recall.txt", "a");
    $bing_3 = fopen("bing_precision_at_10.txt", "a");
    $bing_4 = fopen("bing_f_measure.txt", "a");
    $bing_5 = fopen("bing_rel_average_precision.txt", "a");
    ///////////////////////////////////
    ///////////////////////////////////
    $blekko_1 = fopen("blekko_precision.txt", "a");
    $blekko_2 = fopen("blekko_recall.txt", "a");
    $blekko_3 = fopen("blekko_precision_at_10.txt", "a");
    $blekko_4 = fopen("blekko_f_measure.txt", "a");
    $blekko_5 = fopen("blekko_rel_average_precision.txt", "a");
    ///////////////////////////////////
    ///////////////////
    $bing_precision = 0;
    $bing_recall = 0;
    $bing_precision_at_10 = 0;
    $bing_f_measure = 0;
    $bing_rel_average_precision = 0;
    ///////////////////
    ///////////////////
    $blekko_precision = 0;
    $blekko_recall = 0;
    $blekko_precision_at_10 = 0;
    $blekko_f_measure = 0;
    $blekko_rel_average_precision = 0;
    ///////////////////
    global $query_meta;
    include 'global_vars_insert.php';
    $_POST["query"] = $query_meta;
    $_POST['Case_Folding'] = 'Case_Folding_On';
    $_POST['Synonyms'] = 'Synonyms_On';
    $_POST['Stemming'] = 'Stemming_On';
    include 'preprocessor.php';
    //run query through preprocessor
    /*
    $bing_query = $query_meta;
    $entireweb_query = $query_meta;
    $blekko_query = $query_meta;
    */
    echo 'QUERY = ' . $bing_query . '<br>';
    $google_url_array = GOOGLE_RESULTS();
    foreach ($google_url_array as $key => $link) {
        echo $key + 1 . '<br>';
        echo $link . '<br>';
    }
    get_initial_results();
    GET_BING_URLS();
    GET_BLEKKO_URLS();
    foreach ($bing_urls as $key => $link) {
        echo $key + 1 . '<br>';
        echo $link . '<br>';
    }
    foreach ($blekko_urls as $key => $link) {
        echo $key + 1 . '<br>';
        echo $link . '<br>';
    }
    /////////////////////////////////////////////////////
    $bing_num_rel_docs = 0;
    $rank = 1;
    $bing_total_docs = 0;
    //take into account that the results will not always be 100
    //for example, when I stem 'michworks' to 'michwork,' this only
    //ends up with 60 results, not 100
    if (count($bing_urls) < 100) {
        $bing_total_docs = count($bing_urls);
    } else {
        $bing_total_docs = 100;
    }
    foreach ($bing_urls as $key => $url) {
        if (in_array($url, $google_url_array)) {
            $bing_num_rel_docs++;
            $bing_rel_average_precision += $bing_num_rel_docs / $rank;
            echo $bing_num_rel_docs . '/' . $rank . '<br>';
        }
        if ($rank == 10) {
            $bing_precision_at_10 = $bing_num_rel_docs / 10;
        }
        $rank++;
    }
    $bing_precision = $bing_num_rel_docs / $bing_total_docs;
    $bing_recall = $bing_num_rel_docs / 100;
    $bing_f_measure = 2 * $bing_precision * $bing_recall / ($bing_precision + $bing_recall);
    if ($bing_precision == 0) {
        $bing_f_measure = 0.0;
    }
    $bing_rel_average_precision = $bing_rel_average_precision / 100;
    echo '<br>BING TOTAL DOCS = ' . $bing_total_docs . '<br>';
    echo 'Bing Precision = ' . $bing_precision . '<br>';
    fwrite($bing_1, $bing_precision . "\n");
    echo 'Bing Recall = ' . $bing_recall . '<br>';
    fwrite($bing_2, $bing_recall . "\n");
    echo 'Bing Precision at 10 = ' . $bing_precision_at_10 . '<br>';
    fwrite($bing_3, $bing_precision_at_10 . "\n");
    echo 'Bing F-Measure = ' . $bing_f_measure . '<br>';
    fwrite($bing_4, $bing_f_measure . "\n");
    echo 'Bing Rel Ave Precision TOTAL = ' . $bing_rel_average_precision . '<br>';
    fwrite($bing_5, $bing_rel_average_precision . "\n");
    ////////////////////////////////////////////////////
    $blekko_num_rel_docs = 0;
    $rank = 1;
    $blekko_total_docs = 0;
    //take into account that the results will not always be 100
    //for example, when I stem 'michworks' to 'michwork,' this only
    //ends up with 60 results, not 100
    if (count($blekko_urls) < 100) {
        $blekko_total_docs = count($blekko_urls);
    } else {
        $blekko_total_docs = 100;
    }
    foreach ($blekko_urls as $key => $url) {
        if (in_array($url, $google_url_array)) {
            $blekko_num_rel_docs++;
            $blekko_rel_average_precision += $blekko_num_rel_docs / $rank;
            echo $blekko_num_rel_docs . '/' . $rank . '<br>';
        }
        if ($rank == 10) {
            $blekko_precision_at_10 = $blekko_num_rel_docs / 10;
        }
        $rank++;
    }
    $blekko_precision = $blekko_num_rel_docs / $blekko_total_docs;
    $blekko_recall = $blekko_num_rel_docs / 100;
    $blekko_f_measure = 2 * $blekko_precision * $blekko_recall / ($blekko_precision + $blekko_recall);
    if ($blekko_precision == 0) {
        $blekko_f_measure = 0.0;
    }
    $blekko_rel_average_precision = $blekko_rel_average_precision / 100;
    echo '<br>BLEKKO TOTAL DOCS = ' . $blekko_total_docs . '<br>';
    echo 'blekko Precision = ' . $blekko_precision . '<br>';
    fwrite($blekko_1, $blekko_precision . "\n");
    echo 'blekko Recall = ' . $blekko_recall . '<br>';
    fwrite($blekko_2, $blekko_recall . "\n");
    echo 'blekko Precision at 10 = ' . $blekko_precision_at_10 . '<br>';
    fwrite($blekko_3, $blekko_precision_at_10 . "\n");
    echo 'blekko F-Measure = ' . $blekko_f_measure . '<br>';
    fwrite($blekko_4, $blekko_f_measure . "\n");
    echo 'blekko Rel Ave Precision TOTAL = ' . $blekko_rel_average_precision . '<br>';
    fwrite($blekko_5, $blekko_rel_average_precision . "\n");
    /////////////////////////////////////////////////////
}