Example #1
0
require_once "batch_lib.inc";
require_once "bootstrap.inc";
$date = getdate();
$label = substr($date['month'], 0, 3) . " " . $date['mday'] . " " . $date['year'];
$aCrawlnames = array("dev", "mobile", "android", "chrome");
foreach ($aCrawlnames as $crawlname) {
    $sProblems = "";
    // we fill the status table before creating the crawl
    $numStatus = doSimpleQuery("select count(*) from status{$crawlname} where label = '{$label}';");
    if (0 === $numStatus) {
        $sProblems .= "    No URLs have been queued up in the status{$crawlname} table.\n";
    } else {
        if (("dev" === $crawlname || "chrome" === $crawlname) && 490000 > $numStatus || ("mobile" === $crawlname || "android" === $crawlname) && 4900 > $numStatus) {
            $sProblems .= "    Only {$numStatus} URLs have been queued up in the status{$crawlname} table for crawl \"{$label}\".\n";
        }
    }
    // check that the crawl exists and has the right number of URLs
    $device = curDevice($crawlname);
    $crawl = getCrawl($label, null, $device);
    if (!$crawl) {
        $sProblems .= "    Could not find the crawl for \"{$label}\".\n";
    } else {
        $numUrls = $crawl['numUrls'];
        if (("dev" === $crawlname || "chrome" === $crawlname) && $numStatus !== $numUrls || ("mobile" === $crawlname || "android" === $crawlname) && $numStatus !== $numUrls) {
            $sProblems .= "    Only {$numUrls} URLs (instead of {$numStatus}) have been set for the {$crawlname} crawl \"{$label}\".\n";
        }
    }
    if ($sProblems) {
        echo "Problems with the {$crawlname} crawl:\n{$sProblems}\n";
    }
}
Example #2
0
require_once "../utils.inc";
require_once "../dbapi.inc";
require_once "../requests.inc";
require_once "../crawls.inc";
require_once "../stats.inc";
require_once "batch_lib.inc";
// Even tho we run this from DEV we want to take action on the production tables.
if ($gbDev) {
    $gbDev = false;
    $gRequestsTable = "requests";
    $gPagesTable = "pages";
    $gStatsTable = "stats";
}
$pagesTable = $gPagesTable;
$requestsTable = $gRequestsTable;
$device = curDevice();
$label = null;
if (array_key_exists(1, $argv)) {
    $label = $argv[1];
}
if (!$label) {
    tprint("ERROR: you must specify the label, eg, \"Nov 15 2012\".");
    exit;
}
$crawl = getCrawl($label, "All", $device);
if (FALSE === $crawl) {
    tprint("ERROR: Crawl \"{$label}\" for archive \"All\" and location \"{$device}\" wasn't found.");
}
$minPageid = $crawl['minPageid'];
$maxPageid = $crawl['maxPageid'];
$pageidCond = "pageid >= {$minPageid} and pageid <= {$maxPageid}";
Example #3
0
<td id=rightarrow class=arrow></td>
</table>
<div>
<a href="about.php#bigquery">Write your own custom queries!</a>
</div>
</center>

<script type="text/javascript">
// HTML strings for each image
var gaSnippets = new Array();

<?php 
$gLabel = latestLabel();
require_once "stats.inc";
require_once "charts.inc";
$hStats = getStats($gLabel, "All", curDevice());
?>
gaSnippets.push("<?php 
echo bytesContentTypeChart($hStats);
?>
");
gaSnippets.push("<?php 
echo responseSizes($hStats);
?>
");
gaSnippets.push("<?php 
echo percentGoogleLibrariesAPI($hStats);
?>
");
gaSnippets.push("<?php 
echo percentFlash($hStats);
Example #4
0
</form>
</div>

<form>
	<label>Choose URLs:</label>
<?php 
echo selectSlice($gSlice, "onchange='document.location=\"?a={$gArchive}&l={$gLabel}&s=\"+escape(this.options[this.selectedIndex].value)'");
?>
</form>

<div id=interesting style="margin-top: 40px;">
<?php 
require_once "stats.inc";
require_once "charts.inc";
$hStats = getStats($gLabel, $gSlice, curDevice());
$hCdf = getCdfData($gLabel, $gSlice, curDevice());
echo bytesContentTypeChart($hStats) . "\n";
echo responseSizes($hStats) . "\n";
echo histogram($hCdf, "bytesHtmlDoc", "HTML Document Transfer Size", "bytesHtmlDoc", 5 * 1024) . "\n";
echo histogram($hCdf, "numDomElements", "# of DOM Elements per Page", "numDomElements", 400, 2) . "\n";
echo percentGoogleLibrariesAPI($hStats) . "\n";
echo percentFlash($hStats) . "\n";
echo percentFonts($hStats) . "\n";
echo popularImageFormats($hStats) . "\n";
echo maxage($hStats) . "\n";
echo histogram($hCdf, "numRedirects", "Redirects per Page", "redirects") . "\n";
echo histogram($hCdf, "_connections", "Connections per Page", "connections", 10) . "\n";
echo histogram($hCdf, "avg_dom_depth", "Avg DOM Depth", "avgdomdepth") . "\n";
echo histogram($hCdf, "document_height", "Document Height (pixels)", "docheight", 1000) . "\n";
echo histogram($hCdf, "localstorage_size", "Size of localStorage (chars)", "localstorage", 50) . "\n";
echo histogram($hCdf, "sessionstorage_size", "Size of sessionStorage (chars)", "sessionstorage", 50) . "\n";
Example #5
0
?>
.<?php 
echo $wptrun;
?>
.0">watch video</a>
</ul>


<h2 id=sitestats>Stats</h2>

<?php 
$gSlice = "url";
$gUrl = $url;
require_once "stats.inc";
require_once "charts.inc";
$hStats = getStats($gLabel, $gSlice, curDevice(), $url);
echo bytesContentTypeChart($hStats);
echo responseSizes($hStats);
echo popularImageFormats($hStats);
echo maxage($hStats);
echo percentByProtocol($hStats);
?>

<h2 id=trends>Trends</h2>

<?php 
// trends.inc is REALLY SLOW so we flush the buffer first.
ob_flush();
flush();
require_once 'trends.inc';
?>
Example #6
0
<option value=200 <?php 
echo 200 == $gW ? "selected" : "";
?>
> medium
<option value=400 <?php 
echo 400 == $gW ? "selected" : "";
?>
> large
</select>
</form>
<a href="javascript:gotoLink()" style="margin-left: 1em; font-size: 0.8em;" class=txt>link</a>
</div>

<?php 
// Figure out which crawl is earliest:
$earliestLabel = doSimpleQuery("select label from crawls where (label = '{$gLabel1}' or label = '{$gLabel2}') and location = '" . curDevice() . "' order by minPageid asc limit 1;");
$bChrono = $earliestLabel == $gLabel1;
// Find the topmost URLs in both crawls:
$limitgoogle = "(url = 'http://www.google.com/' OR url not like '%://www.google.%')";
// There are 10+ sites that all look the same from Google intl sites
$maxRank = 5 * $gNumUrls;
// we get back MORE results than needed so we can filter out adult content
$query = "select url, min(pageid) as minid, max(pageid) as maxid, count(*) as num, _adult_site from {$gPagesTable}, {$gUrlsTable} as u where (label = '{$gLabel1}' or label = '{$gLabel2}') and url=urlOrig and u.rank > 0 and u.rank < {$maxRank} and {$limitgoogle} group by url having num=2 order by u.rank asc;";
$result = doQuery($query);
$i = 0;
$imgs1 = "";
$imgs2 = "";
while ($row = mysql_fetch_assoc($result)) {
    $url = $row['url'];
    $minid = $row['minid'];
    $maxid = $row['maxid'];