Example #1
0
             // Remove stopwords and apply stemming
             $stemsUnique = applyPorterStemming($searchExploded);
             $search = implode(" ", $stemsUnique);
             $bing = searchBing($search);
             if ($bing == false) {
                 $bingresults = "Sorry, no results matching {$search}";
             }
             $entireWeb = searchEntireWeb($search);
             if ($entireWeb == false) {
                 $entireWebresults = "Sorry, no results matching {$search}";
             }
             $blekko = searchBlekko($search);
             if ($blekko == false) {
                 $blekkoresults = "Sorry, no results matching {$search}";
             }
             $google = scrapeGoogle(urlencode($search));
             if ($google == false) {
                 $googleresults = "Sorry, no results matching {$search}";
             }
         }
     }
 }
 ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
 /** 
  * For evaluation only: to get MAP!
  */
 /*
 // Read queries from file
 $queries = file ( 'lists/queries.txt' ); 
 
 // Send a REST request to the search engines for each query
*/
$res = $dbw->query('select st_title, st_id from suggested_titles left join suggested_links on
				st_id=sl_sugg where st_used=0 and st_group=5 and sl_sugg is null order by rand()');
while ($row = $dbw->fetchObject($res)) {
    $s = $row->st_title;
    $sugg[$s] = $row->st_id;
}
echo date("r") . " -  got " . sizeof($sugg) . " suggestions\n";
$count = 0;
$xx = time();
foreach ($sugg as $s => $sid) {
    $t = Title::newFromText($s);
    if (!$t) {
        continue;
    }
    $ids = scrapeGoogle($t->getText());
    if (sizeof($ids) > 0) {
        $sql = "";
        foreach ($ids as $id) {
            $sql .= "({$sid},{$id},rand()) ";
        }
        $sql = str_replace(" ", ", ", trim($sql));
        $dbw->query("insert into suggested_links values {$sql};");
    } else {
        #echo "no results for $s\n";
    }
    $count++;
    //+=sizeof($ids);;
    if ($count % 500 == 0 && $count > 0) {
        $yy = time() - $xx;
        echo date("r") . " - processed {$count} suggestions, took {$yy} seconds, sleeping\n";
    #print_r($oAnalytics->getVisitors());
    // print out pageviews for given period
    #print_r($oAnalytics->getPageviews());
    // use dimensions and metrics for output
    // see: http://code.google.com/intl/nl/apis/analytics/docs/gdata/gdataReferenceDimensionsMetrics.html
    $results = $oAnalytics->getData(array('dimensions' => 'ga:keyword', 'metrics' => 'ga:visits', 'sort' => '-ga:visits'));
    $skip = array("(other)", "(not set)", "wikihow", "wiki how");
    array_shift($results);
    foreach ($results as $r => $c) {
        $r = trim(preg_replace("@^how to@im", "", $r));
        if (in_array($r, $skip)) {
            continue;
        }
        $l = new LSearch();
        #$results = $l->googleSearchResultTitles($r);
        #$results = gSearch::query($r);
        $result = scrapeGoogle($r);
        if (empty($result)) {
            echo "{$r}\t(no matches)\n";
            continue;
        }
        $top = Title::newFromURL(str_replace("http://www.wikihow.com/", "", $result));
        if (!$top) {
            echo "Can't build title out of {$result} - query: {$r}\n";
            continue;
        }
        echo "{$r}\t{$top->getText()}\n";
    }
} catch (Exception $e) {
    echo 'Caught exception: ' . $e->getMessage();
}