// Remove stopwords and apply stemming $stemsUnique = applyPorterStemming($searchExploded); $search = implode(" ", $stemsUnique); $bing = searchBing($search); if ($bing == false) { $bingresults = "Sorry, no results matching {$search}"; } $entireWeb = searchEntireWeb($search); if ($entireWeb == false) { $entireWebresults = "Sorry, no results matching {$search}"; } $blekko = searchBlekko($search); if ($blekko == false) { $blekkoresults = "Sorry, no results matching {$search}"; } $google = scrapeGoogle(urlencode($search)); if ($google == false) { $googleresults = "Sorry, no results matching {$search}"; } } } } //////////////////////////////////////////////////////////////////////////////////////////////////////////////////// /** * For evaluation only: to get MAP! */ /* // Read queries from file $queries = file ( 'lists/queries.txt' ); // Send a REST request to the search engines for each query
*/ $res = $dbw->query('select st_title, st_id from suggested_titles left join suggested_links on st_id=sl_sugg where st_used=0 and st_group=5 and sl_sugg is null order by rand()'); while ($row = $dbw->fetchObject($res)) { $s = $row->st_title; $sugg[$s] = $row->st_id; } echo date("r") . " - got " . sizeof($sugg) . " suggestions\n"; $count = 0; $xx = time(); foreach ($sugg as $s => $sid) { $t = Title::newFromText($s); if (!$t) { continue; } $ids = scrapeGoogle($t->getText()); if (sizeof($ids) > 0) { $sql = ""; foreach ($ids as $id) { $sql .= "({$sid},{$id},rand()) "; } $sql = str_replace(" ", ", ", trim($sql)); $dbw->query("insert into suggested_links values {$sql};"); } else { #echo "no results for $s\n"; } $count++; //+=sizeof($ids);; if ($count % 500 == 0 && $count > 0) { $yy = time() - $xx; echo date("r") . " - processed {$count} suggestions, took {$yy} seconds, sleeping\n";
#print_r($oAnalytics->getVisitors()); // print out pageviews for given period #print_r($oAnalytics->getPageviews()); // use dimensions and metrics for output // see: http://code.google.com/intl/nl/apis/analytics/docs/gdata/gdataReferenceDimensionsMetrics.html $results = $oAnalytics->getData(array('dimensions' => 'ga:keyword', 'metrics' => 'ga:visits', 'sort' => '-ga:visits')); $skip = array("(other)", "(not set)", "wikihow", "wiki how"); array_shift($results); foreach ($results as $r => $c) { $r = trim(preg_replace("@^how to@im", "", $r)); if (in_array($r, $skip)) { continue; } $l = new LSearch(); #$results = $l->googleSearchResultTitles($r); #$results = gSearch::query($r); $result = scrapeGoogle($r); if (empty($result)) { echo "{$r}\t(no matches)\n"; continue; } $top = Title::newFromURL(str_replace("http://www.wikihow.com/", "", $result)); if (!$top) { echo "Can't build title out of {$result} - query: {$r}\n"; continue; } echo "{$r}\t{$top->getText()}\n"; } } catch (Exception $e) { echo 'Caught exception: ' . $e->getMessage(); }