function calculate($control_number_visitors, $control_number_conversions, $treatment_number_visitors, $treatment_number_conversions)
{
    $c = array($control_number_visitors, $control_number_conversions);
    $tA = array($treatment_number_visitors, $treatment_number_conversions);
    // Calculate conversion rates.
    $c_conversion_rate = $control_number_conversions / $control_number_visitors * 100;
    $tA_conversion_rate = $treatment_number_conversions / $treatment_number_visitors * 100;
    $c_conversion_rate = $c_conversion_rate . '%';
    $tA_conversion_rate = $tA_conversion_rate . '%';
    // The z-score is ... [explain]
    $zscore = zscore($c, $tA);
    // Calculate the 'cumulative normal distribution' (confidence ratio)
    $confidence = cumnormdist($zscore);
    // If the 'confidence interval is >95%', the test is statistically significant.
    $confidence_as_percentage = $confidence * 100;
    // Pad the strings for output
    $cV = str_pad($control_number_visitors, 16, ' ', STR_PAD_BOTH);
    $cC = str_pad($control_number_conversions, 11, ' ', STR_PAD_BOTH);
    $tV = str_pad($treatment_number_visitors, 16, ' ', STR_PAD_BOTH);
    $tC = str_pad($treatment_number_conversions, 11, ' ', STR_PAD_BOTH);
    $cr_c = str_pad(sprintf('%0.2f', $c_conversion_rate), 15, ' ', STR_PAD_BOTH);
    $cr_t = str_pad($tA_conversion_rate, 15, ' ', STR_PAD_BOTH);
    $zs = str_pad($zscore, 15, ' ', STR_PAD_BOTH);
    $cratio = str_pad(sprintf('%0.2f', $confidence) * 100 . '%', 10, ' ', STR_PAD_BOTH);
    print "Split and AB Testing Confidence Calculator\n\n";
    print "------------------------------------------\n\n";
    print "Treatment | Visitors Treated | Conversions | Conversion Rate |     Z-Score     | Confidence\n";
    print "-------------------------------------------------------------------------------------------\n";
    print "Control   | {$cV} | {$cC} | {$cr_c} |                 |        \n";
    print "Treatment | {$tV} | {$tC} | {$cr_t} | {$zs} | {$cratio}        \n";
    print "-------------------------------------------------------------------------------------------\n";
    if ($cratio >= 95) {
        print "STATISTICAL SIGNIFICANCE ACHIEVED!\n\n";
    } else {
        print "Warning: Less than 95% statistical significance.\n\n";
    }
}
Esempio n. 2
0
function getTopDeviations($account = null, $count = null)
{
    $pcount = $count;
    $qcount = tryGET('count');
    $vcount = 10;
    if (isset($pcount)) {
        $vcount = $pcount;
    } else {
        if (isset($qcount)) {
            $vcount = $qcount;
        }
    }
    if ($vcount > 100) {
        $vcount = 100;
    }
    $settings = getSettings();
    if (!isset($account)) {
        $account = $settings["Account"];
    }
    $start = tryGET('start');
    $end = tryGET('end');
    if (!isset($start) || !isset($end)) {
        return null;
    }
    $ndays = getDays($start, $end);
    $start = GoogleDate($start);
    $end = GoogleDate($end);
    $analytics = getAnalytics();
    $filter = "";
    //More than 1 pageview an hour to cut down on outliers and processing
    $dims = "ga:hostname,ga:pagePath,ga:date";
    $metric = "ga:pageviews";
    $sort = "-ga:pageviews";
    $count = 10000;
    //max
    $data = runQuery($analytics, $account, $start, $end, $metric, $dims, $sort, $count, $filter);
    if (isset($data->ga_error)) {
        return DoNotCache();
    }
    $data = $data->getRows();
    $values = array();
    $path = '';
    $tvals = array();
    foreach ($data as $key => $row) {
        if (!isset($values[$row[0] . $row[1]])) {
            $values[$row[0] . $row[1]] = array();
        }
        array_push($values[$row[0] . $row[1]], floatval($row[3]));
    }
    foreach ($values as $key => $val) {
        $rem = $ndays - count($val);
        for ($i = 0; $i < $rem; $i++) {
            array_push($val, 0);
        }
        $mean = mean($val);
        if ($mean < 1) {
            continue;
        }
        //Aviod super low page averages
        $sd = stdev($mean, $val);
        if ($sd == 0) {
            continue;
        }
        //Let's not deal with how this is even possible for right now
        $stdevs[$key] = array('mean' => $mean, 'stdev' => $sd, 'values' => $val);
    }
    $count = 100 * $vcount;
    $filter = "";
    $dims = "ga:date,ga:hour,ga:hostname,ga:pagePath,ga:pageTitle";
    $metric = "ga:pageviews";
    $sort = "-ga:pageviews";
    $data = runQuery($analytics, $account, $start, $end, $metric, $dims, $sort, $count, $filter);
    if (isset($data->ga_error)) {
        return DoNotCache();
    }
    $data = $data->getRows();
    $result = array();
    foreach ($data as $key => $row) {
        $path = $row[2] . $row[3];
        if (!isset($stdevs[$path])) {
            continue;
        }
        //if($sd['mean'] <= 0) continue;
        $sd = $stdevs[$path];
        $z = zscore($sd['stdev'], $sd['mean'], $row[5]);
        $y = substr($row[0], 0, 4);
        $m = substr($row[0], 4, 2);
        $d = substr($row[0], 6, 2);
        $time = "{$y}-{$m}-{$d} " . $row[1] . ":00";
        $ts = strtotime($time);
        $result[] = array('path' => $path, 'title' => $row[4], 'mean' => $sd['mean'], 'stdev' => $sd['stdev'], 'pageviews' => $row[5], 'z' => $z, 'timestamp' => $ts, "time" => $time, "values" => $sd['values']);
    }
    usort($result, "zsort");
    $ret = array_splice($result, 0, $vcount);
    return $ret;
}