function checkTitles($titles) { $mini = $ajax = $google = 0; $ret = array('google' => 0, 'mini' => 0, 'ajax' => 0); foreach ($titles as $t) { #echo "checking {$t->getText()}\n"; $results = checkGoogle($t->getText()); if (!$results) { $results = array(); } foreach ($results as $r) { #echo "{$r->getDBKey()}\t{$t->getDBKey()}\n"; if ($r->getDBKey() == $t->getDBKey()) { #echo "check!"; exit; $ret['google']++; break; } } /* // get the ajax results $results = GoogleAjaxSearch::scrapeGoogle($t->getText()); if (!$results) $results = array(); foreach ($results as $r) { #echo "{$r->getDBKey()}\t{$t->getDBKey()}\n"; if ($r->getDBKey() == $t->getDBKey()) { #echo "check!"; exit; $ret['ajax']++; break; } } $l = new LSearch(); $results = $l->googleSearchResultTitles($t->getText()); if (!$results) $results = array(); foreach ($results as $r) { if ($r->getDBKEy() == $t->getDBKey()) { $ret['mini']++; break; } } */ } return $ret; }
foreach ($titles as $title) { if ($title == null) { echo "error title is null " . print_r($title, true); continue; } $url = "http://www.wikihow.com/" . $title->getPartialURL(); // get age $findAge = true; $age = 0; if ($findAge) { $min = $dbr->selectField('revision', 'min(rev_timestamp)', array('rev_page=' . $title->getArticleID())); $d = format_data($min); $diff = time() - $d; $age = ceil($diff / 60 / 60 / 24); //echo "$url " . $title->getArticleID() . " is $age days old..\n"; $age = " age {$age} days"; } $position = checkGoogle(wfMsg('howto', $title->getFullText()), "http://www.wikihow.com/" . $title->getPrefixedURL()); $sql = "INSERT INTO google_indexed (gi_page, gi_is_indexed, gi_position) VALUES \n\t\t\t\t({$title->getArticleID()}, " . ($position > 0 ? "1" : "0") . ", {$position} );"; $dbw->query($sql); if ($position > 0) { print "indexed: {$url} {$age} position {$position} \n"; } else { print "not indexed: {$url} {$age}\n"; } //throttle(); } echo "total api requests {$gTotalAPIRequests}\n"; ?>
} } } if (sizeof($matches) == 0) { echo "size of matches is 0:-------------\n\n " . $contents . "\n-------------\n\n "; continue; } foreach ($results as $r) { $sql = "INSERT INTO serps.chris_serps(gs_page, gs_query, gs_position, gs_domain, gs_url) \n\t\t\t\tVALUES ({$page_id},\n\t\t\t\t\t\t{$dbw->addQuotes($query)},\n\t\t\t\t\t\t{$r['position']},\n\t\t\t\t\t\t'{$r['domain']}',\n\t\t\t\t\t\t{$dbw->addQuotes($r['url'])}\n\t\t\t\t\t);"; $dbw->query($sql); } #echo "adding " . sizeof($results) . " for $query " . print_r($results, true) . "\n"; } $suggestions = getSuggestions(); foreach ($suggestions as $s) { checkGoogle($s, 0, $domains, $dbw); } // get dates $dbr = wfGetDB(DB_SLAVE); $res = $dbr->query('select distinct(substr(gs_timestamp, 1,10)) as date from serps.chris_serps'); $dates = array(); while ($row = $dbr->fetchObject($res)) { $dates[] = $row->date; } echo "<table><tr><td>Query</td>"; foreach ($dates as $d) { echo "<td>{$d}</td>"; } echo "</tr>"; $results = array(); foreach ($dates as $d) {
$found = false; foreach ($matches[0] as $url) { $url = substr($url, 6, strlen($url) - 7); // check for cache article if (strpos($url, "/search?q=cache") !== false || strpos($url, "google.com/") !== false) { continue; } $count++; $domain = str_replace("http://", "", $url); $domain = substr($domain, 0, strpos($domain, '/')); if (strpos($domain, "wikihow.com") !== false) { $sql = "INSERT INTO google_monitor_results (gmr_page, gmr_position) VALUES ({$page_id}, {$count});"; $dbw->query($sql); $found = true; break; } } if (!$found) { $sql = "INSERT INTO google_monitor_results (gmr_page, gmr_position) VALUES ({$page_id}, 0);"; $dbw->query($sql); } } // load queries from the database $titles = getTitles(); foreach ($titles as $title) { checkGoogle($title->getText(), $title->getArticleID(), $dbw); throttle(); } ?>