Example #1
0
function listFiles($hFiles)
{
    $sHtml = "";
    $aKeys = array_keys($hFiles);
    sort($aKeys, SORT_NUMERIC);
    foreach (array_reverse($aKeys) as $epoch) {
        $label = date("M j Y", $epoch);
        $crawl = getCrawl($label);
        if ("All" === $crawl['archive']) {
            $sHtml .= "  <li> {$label}: " . (array_key_exists('IE', $hFiles[$epoch]) ? "<br>&nbsp;&nbsp;" . $hFiles[$epoch]['IE'] : "") . (array_key_exists('iPhone', $hFiles[$epoch]) ? "<br>&nbsp;&nbsp;" . $hFiles[$epoch]['iPhone'] : "") . "\n";
        }
    }
    return $sHtml;
}
Example #2
0
If the crawl IS finished, then the "pages" property is an array containing 
an array of information about each page: pageid, wptid, & medianrun. With 
that you can construct the HA URL (from which you can find the crawl):
  http://httparchive.org/viewsite.php?pageid=[pageid]
As well as the HAR URL:
  http://httparchive.webpagetest.org/export.php?test=[wptid]&run=[medianrun]&cached=0&pretty=1
The most typical usage is simply:
  http://dev.httparchive.org/crawl-data.php
*/
require_once "ui.inc";
require_once "utils.inc";
if (getParam("crawlid")) {
    $crawl = getCrawlFromId(getParam("crawlid"));
} else {
    if (getParam("label")) {
        $crawl = getCrawl(getParam("label"));
    } else {
        // This is the latest crawl regardless of whether it's finished.
        $crawl = latestCrawl(null, null, false);
    }
}
if ($crawl["finishedDateTime"]) {
    // Add all the info about pages crawled.
    $crawl["pages"] = array();
    $query = "select pageid, wptid, wptrun from {$gPagesTable} where crawlid = {$crawl['crawlid']} order by pageid asc;";
    $result = doQuery($query);
    while ($row = mysql_fetch_row($result)) {
        array_push($crawl["pages"], $row);
    }
    mysql_free_result($result);
}
Example #3
0
require_once "batch_lib.inc";
require_once "bootstrap.inc";
$date = getdate();
$label = substr($date['month'], 0, 3) . " " . $date['mday'] . " " . $date['year'];
$aCrawlnames = array("dev", "mobile", "android", "chrome");
foreach ($aCrawlnames as $crawlname) {
    $sProblems = "";
    // we fill the status table before creating the crawl
    $numStatus = doSimpleQuery("select count(*) from status{$crawlname} where label = '{$label}';");
    if (0 === $numStatus) {
        $sProblems .= "    No URLs have been queued up in the status{$crawlname} table.\n";
    } else {
        if (("dev" === $crawlname || "chrome" === $crawlname) && 490000 > $numStatus || ("mobile" === $crawlname || "android" === $crawlname) && 4900 > $numStatus) {
            $sProblems .= "    Only {$numStatus} URLs have been queued up in the status{$crawlname} table for crawl \"{$label}\".\n";
        }
    }
    // check that the crawl exists and has the right number of URLs
    $device = curDevice($crawlname);
    $crawl = getCrawl($label, null, $device);
    if (!$crawl) {
        $sProblems .= "    Could not find the crawl for \"{$label}\".\n";
    } else {
        $numUrls = $crawl['numUrls'];
        if (("dev" === $crawlname || "chrome" === $crawlname) && $numStatus !== $numUrls || ("mobile" === $crawlname || "android" === $crawlname) && $numStatus !== $numUrls) {
            $sProblems .= "    Only {$numUrls} URLs (instead of {$numStatus}) have been set for the {$crawlname} crawl \"{$label}\".\n";
        }
    }
    if ($sProblems) {
        echo "Problems with the {$crawlname} crawl:\n{$sProblems}\n";
    }
}
Example #4
0
    $gRequestsTable = "requests";
    $gPagesTable = "pages";
    $gStatsTable = "stats";
}
$pagesTable = $gPagesTable;
$requestsTable = $gRequestsTable;
$device = curDevice();
$label = null;
if (array_key_exists(1, $argv)) {
    $label = $argv[1];
}
if (!$label) {
    tprint("ERROR: you must specify the label, eg, \"Nov 15 2012\".");
    exit;
}
$crawl = getCrawl($label, "All", $device);
if (FALSE === $crawl) {
    tprint("ERROR: Crawl \"{$label}\" for archive \"All\" and location \"{$device}\" wasn't found.");
}
$minPageid = $crawl['minPageid'];
$maxPageid = $crawl['maxPageid'];
$pageidCond = "pageid >= {$minPageid} and pageid <= {$maxPageid}";
tprint("{$label}: {$pageidCond}");
echo doSimpleQuery("select count(*) from pages where {$pageidCond};") . " = count(*) in pages: " . "\n";
echo doSimpleQuery("select count(*) from pages where {$pageidCond} and maxDomainReqs != 0;") . " = count(*) in pages with maxDomainReqs != 0: " . "\n";
echo doSimpleQuery("select count(*) from pagestmp where {$pageidCond};") . " = count(*) in pagestmp: " . "\n";
echo doSimpleQuery("select count(*) from pagestmp where {$pageidCond} and maxDomainReqs != 0;") . " = count(*) in pagestmp with maxDomainReqs != 0: " . "\n";
echo doSimpleQuery("select count(*) from pagesdev where {$pageidCond};") . " = count(*) in pagesdev: " . "\n";
echo doSimpleQuery("select count(*) from pagesdev where {$pageidCond} and maxDomainReqs != 0;") . " = count(*) in pagesdev with maxDomainReqs != 0: " . "\n";
// 1. RESTORE DUMP FILE?
tprint("\n1. Checking if dumpfile needs to be restored...");
Example #5
0
        doSimpleCommand($cmd);
        lprint("done.");
        cprint("done.");
    }
}
// Empty the status table
lprint("Clear status table...");
cprint("Clear status table...");
removeAllStatusData();
// START THE CRAWL
// create a partial crawl record - we'll fill out the missing fields as we get them
// WARNING: Two runs submitted on the same day will have the same label.
$date = getdate();
$label = substr($date['month'], 0, 3) . " " . $date['mday'] . " " . $date['year'] . $gSublabel;
createCrawl(array("label" => $label, "archive" => $gArchive, "location" => $locations[0], "video" => $video, "docComplete" => $docComplete, "fvonly" => $fvonly, "runs" => $runs, "startedDateTime" => $startedDateTime, "passes" => 0));
$crawl = getCrawl($label, $gArchive, $locations[0]);
$crawlid = $crawl['crawlid'];
lprint("Created crawl {$crawlid}.");
cprint("Created crawl {$crawlid}.");
lprint("Load URLs...");
cprint("Load URLs...");
if ($gUrlsFile && $gbUrlsFileSpecified) {
    // we set $gUrlsFile in importurls.php, so need a boolean to indicate if it was specified
    loadUrlsFromFile($crawlid, $label, $gUrlsFile);
} else {
    if ($gNumUrls) {
        loadUrlsFromDb($crawlid, $label, $gNumUrls, false);
    } else {
        if ($gbMobile) {
            loadUrlsFromDB($crawlid, $label, 5000, false);
        } else {
Example #6
0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
require_once "../settings.inc";
require_once "../utils.inc";
// $gParamLabel is a hack to allow for require(copy.php)
$gLabel = isset($gParamLabel) ? $gParamLabel : $argv[1];
if (!$gLabel) {
    lprint("You must specify a label.");
    exit;
}
// find min & max pageid of the specified run
$crawl = getCrawl($gLabel);
$minid = $crawl['minPageid'];
$maxid = $crawl['maxPageid'];
lprint("Run \"{$gLabel}\": min pageid = {$minid}, max pageid = {$maxid}");
// copy the rows to production
$pageidCond = "pageid >= {$minid} and pageid <= {$maxid}";
if ($gbDev && $gPagesTableDesktop != $gPagesTableDev) {
    $count = doSimpleQuery("select count(*) from {$gPagesTableDesktop} where {$pageidCond};");
    if ($count) {
        lprint("Rows already copied.");
    } else {
        lprint("Copy 'requests' rows to production...");
        doSimpleCommand("insert into {$gRequestsTableDesktop} select * from {$gRequestsTableDev} where {$pageidCond};");
        lprint("Copy 'pages' rows to production...");
        doSimpleCommand("insert into {$gPagesTableDesktop} select * from {$gPagesTableDev} where {$pageidCond};");
        lprint("...DONE.");