require_once "batch_lib.inc"; require_once "bootstrap.inc"; $date = getdate(); $label = substr($date['month'], 0, 3) . " " . $date['mday'] . " " . $date['year']; $aCrawlnames = array("dev", "mobile", "android", "chrome"); foreach ($aCrawlnames as $crawlname) { $sProblems = ""; // we fill the status table before creating the crawl $numStatus = doSimpleQuery("select count(*) from status{$crawlname} where label = '{$label}';"); if (0 === $numStatus) { $sProblems .= " No URLs have been queued up in the status{$crawlname} table.\n"; } else { if (("dev" === $crawlname || "chrome" === $crawlname) && 490000 > $numStatus || ("mobile" === $crawlname || "android" === $crawlname) && 4900 > $numStatus) { $sProblems .= " Only {$numStatus} URLs have been queued up in the status{$crawlname} table for crawl \"{$label}\".\n"; } } // check that the crawl exists and has the right number of URLs $device = curDevice($crawlname); $crawl = getCrawl($label, null, $device); if (!$crawl) { $sProblems .= " Could not find the crawl for \"{$label}\".\n"; } else { $numUrls = $crawl['numUrls']; if (("dev" === $crawlname || "chrome" === $crawlname) && $numStatus !== $numUrls || ("mobile" === $crawlname || "android" === $crawlname) && $numStatus !== $numUrls) { $sProblems .= " Only {$numUrls} URLs (instead of {$numStatus}) have been set for the {$crawlname} crawl \"{$label}\".\n"; } } if ($sProblems) { echo "Problems with the {$crawlname} crawl:\n{$sProblems}\n"; } }
require_once "../utils.inc"; require_once "../dbapi.inc"; require_once "../requests.inc"; require_once "../crawls.inc"; require_once "../stats.inc"; require_once "batch_lib.inc"; // Even tho we run this from DEV we want to take action on the production tables. if ($gbDev) { $gbDev = false; $gRequestsTable = "requests"; $gPagesTable = "pages"; $gStatsTable = "stats"; } $pagesTable = $gPagesTable; $requestsTable = $gRequestsTable; $device = curDevice(); $label = null; if (array_key_exists(1, $argv)) { $label = $argv[1]; } if (!$label) { tprint("ERROR: you must specify the label, eg, \"Nov 15 2012\"."); exit; } $crawl = getCrawl($label, "All", $device); if (FALSE === $crawl) { tprint("ERROR: Crawl \"{$label}\" for archive \"All\" and location \"{$device}\" wasn't found."); } $minPageid = $crawl['minPageid']; $maxPageid = $crawl['maxPageid']; $pageidCond = "pageid >= {$minPageid} and pageid <= {$maxPageid}";
<td id=rightarrow class=arrow></td> </table> <div> <a href="about.php#bigquery">Write your own custom queries!</a> </div> </center> <script type="text/javascript"> // HTML strings for each image var gaSnippets = new Array(); <?php $gLabel = latestLabel(); require_once "stats.inc"; require_once "charts.inc"; $hStats = getStats($gLabel, "All", curDevice()); ?> gaSnippets.push("<?php echo bytesContentTypeChart($hStats); ?> "); gaSnippets.push("<?php echo responseSizes($hStats); ?> "); gaSnippets.push("<?php echo percentGoogleLibrariesAPI($hStats); ?> "); gaSnippets.push("<?php echo percentFlash($hStats);
</form> </div> <form> <label>Choose URLs:</label> <?php echo selectSlice($gSlice, "onchange='document.location=\"?a={$gArchive}&l={$gLabel}&s=\"+escape(this.options[this.selectedIndex].value)'"); ?> </form> <div id=interesting style="margin-top: 40px;"> <?php require_once "stats.inc"; require_once "charts.inc"; $hStats = getStats($gLabel, $gSlice, curDevice()); $hCdf = getCdfData($gLabel, $gSlice, curDevice()); echo bytesContentTypeChart($hStats) . "\n"; echo responseSizes($hStats) . "\n"; echo histogram($hCdf, "bytesHtmlDoc", "HTML Document Transfer Size", "bytesHtmlDoc", 5 * 1024) . "\n"; echo histogram($hCdf, "numDomElements", "# of DOM Elements per Page", "numDomElements", 400, 2) . "\n"; echo percentGoogleLibrariesAPI($hStats) . "\n"; echo percentFlash($hStats) . "\n"; echo percentFonts($hStats) . "\n"; echo popularImageFormats($hStats) . "\n"; echo maxage($hStats) . "\n"; echo histogram($hCdf, "numRedirects", "Redirects per Page", "redirects") . "\n"; echo histogram($hCdf, "_connections", "Connections per Page", "connections", 10) . "\n"; echo histogram($hCdf, "avg_dom_depth", "Avg DOM Depth", "avgdomdepth") . "\n"; echo histogram($hCdf, "document_height", "Document Height (pixels)", "docheight", 1000) . "\n"; echo histogram($hCdf, "localstorage_size", "Size of localStorage (chars)", "localstorage", 50) . "\n"; echo histogram($hCdf, "sessionstorage_size", "Size of sessionStorage (chars)", "sessionstorage", 50) . "\n";
?> .<?php echo $wptrun; ?> .0">watch video</a> </ul> <h2 id=sitestats>Stats</h2> <?php $gSlice = "url"; $gUrl = $url; require_once "stats.inc"; require_once "charts.inc"; $hStats = getStats($gLabel, $gSlice, curDevice(), $url); echo bytesContentTypeChart($hStats); echo responseSizes($hStats); echo popularImageFormats($hStats); echo maxage($hStats); echo percentByProtocol($hStats); ?> <h2 id=trends>Trends</h2> <?php // trends.inc is REALLY SLOW so we flush the buffer first. ob_flush(); flush(); require_once 'trends.inc'; ?>
<option value=200 <?php echo 200 == $gW ? "selected" : ""; ?> > medium <option value=400 <?php echo 400 == $gW ? "selected" : ""; ?> > large </select> </form> <a href="javascript:gotoLink()" style="margin-left: 1em; font-size: 0.8em;" class=txt>link</a> </div> <?php // Figure out which crawl is earliest: $earliestLabel = doSimpleQuery("select label from crawls where (label = '{$gLabel1}' or label = '{$gLabel2}') and location = '" . curDevice() . "' order by minPageid asc limit 1;"); $bChrono = $earliestLabel == $gLabel1; // Find the topmost URLs in both crawls: $limitgoogle = "(url = 'http://www.google.com/' OR url not like '%://www.google.%')"; // There are 10+ sites that all look the same from Google intl sites $maxRank = 5 * $gNumUrls; // we get back MORE results than needed so we can filter out adult content $query = "select url, min(pageid) as minid, max(pageid) as maxid, count(*) as num, _adult_site from {$gPagesTable}, {$gUrlsTable} as u where (label = '{$gLabel1}' or label = '{$gLabel2}') and url=urlOrig and u.rank > 0 and u.rank < {$maxRank} and {$limitgoogle} group by url having num=2 order by u.rank asc;"; $result = doQuery($query); $i = 0; $imgs1 = ""; $imgs2 = ""; while ($row = mysql_fetch_assoc($result)) { $url = $row['url']; $minid = $row['minid']; $maxid = $row['maxid'];