public function getLinePartnerAndEntry($line, $partnerID, $entryID = null) { $partnerID = -1; $entryID = -1; $xSnameIndex = 26; $fmsFields = explode("\t", $line); if (count($fmsFields) > $xSnameIndex) { preg_match('/^\\/?p\\/([0-9]+)\\/(.*)/', $fmsFields[$xSnameIndex], $matches); if (count($matches) > 2) { $partnerID = $matches[1]; preg_match('/.*\\/(flavorId|entry_id)\\/([01]_,?[^\\.,\\/]+)?(.*\\/)?([01]_,?[^\\.,\\/]+).*/', $matches[2], $objectsMatches); if (count($objectsMatches) > 4) { $objectType = $objectsMatches[1]; $objectID = $objectsMatches[2] ? $objectsMatches[2] : $objectsMatches[4]; $objectID = substr($objectID, 2, 1) == ',' ? substr($objectID, 0, 2) + substr($objectID, 3) : $objectID; if ($objectType == "flavorId") { $entryIDResult = DWHInspector::getEntryIDByFlavorID($objectID); if ($entryIDResult != null) { $entryID = $entryIDResult; } } else { $entryID = $objectID; } return true; } } } return false; }
public function compareAggregation($factTables, $aggrTables, $maxDiffInPercent = 0, $factFilter = '1=1', $aggrFilter = '1=1') { $aggrGroups = DWHInspector::groupBy($aggrTables, $aggrFilter); $factGroups = DWHInspector::groupBy($factTables, $factFilter); foreach ($factGroups as $id => $measure) { if (!array_key_exists($id, $aggrGroups)) { $this->assertEquals(0, $measure, "For the following group (larger than 0 in fact, doesn't exist in aggregation): " . $id . ". Fact = " . $measure); } else { if ($aggrGroups[$id] == 0) { $this->assertEquals(0, $measure, "For the following group:" . $id . ".Aggr = " . $aggrGroups[$id] . ". Fact = " . $measure); } else { $this->assertLessThanOrEqual($maxDiffInPercent, abs(100 - $measure / $aggrGroups[$id] * 100), "For the following group:" . $id . ".Aggr = " . $aggrGroups[$id] . ". Fact = " . $measure); } } } }
public function getLinePartnerAndEntry($line, $partnerID, $entryID = null) { $partnerID = 0; $entryID = ""; $xSnameIndex = 26; $fmsFields = explode("\t", $line); if (count($fmsFields) > $xSnameIndex) { preg_match('/^(.*)_\\d+@\\d+$/', $fmsFields[$xSnameIndex], $matches); if (count($matches) > 1) { $entryID = $matches[1]; $partnerID = DWHInspector::getPartnerIDByEntryID($entryID); return true; } } return false; }
public function simulateUsage() { # Month with overage bandwidth MySQLRunner::execute("INSERT INTO kalturadw.dwh_hourly_partner_usage (partner_id, \n\t\t\tdate_id, hour_id, bandwidth_source_id, count_bandwidth_kb , aggr_storage_mb)\n\t\t\tVALUES(?, ?,0,8,250,0)", array(0 => $this->partnerId, 1 => self::BW_MONTH)); MySQLRunner::execute("INSERT INTO kalturadw.dwh_hourly_partner_usage (partner_id, \n\t\t\tdate_id, hour_id, bandwidth_source_id, count_bandwidth_kb , aggr_storage_mb)\n\t\t\tVALUES(?, ?,0,9,250,0)", array(0 => $this->partnerId, 1 => self::BW_MONTH)); # Month with overage storage for ($i = 0; $i < 28; $i++) { MySQLRunner::execute("INSERT INTO kalturadw.dwh_hourly_partner_usage (partner_id, \n date_id, hour_id, bandwidth_source_id, count_bandwidth_kb , aggr_storage_mb)\n VALUES(?, ?,0,8,0,500)", array(0 => $this->partnerId, 1 => self::ST_MONTH + $i)); } # Month with overage storage + bandwidth for ($i = 0; $i < 31; $i++) { MySQLRunner::execute("INSERT INTO kalturadw.dwh_hourly_partner_usage (partner_id, \n date_id, hour_id, bandwidth_source_id, count_bandwidth_kb , aggr_storage_mb)\n VALUES(?, ?,0,8,290*1024/31,290)", array(0 => $this->partnerId, 1 => self::BW_ST_MONTH + $i)); } # Month with overage plays MySQLRunner::execute("INSERT INTO kalturadw.dwh_hourly_partner (partner_id, \n\t\t\tdate_id, hour_id, count_plays)\n\t\t\tVALUES(?, ?,0,500)", array(0 => $this->partnerId, 1 => self::PLAY_MONTH)); # Month with overage entries for ($i = 0; $i < 10; $i++) { DWHInspector::createNewEntry($this->partnerId, $i, self::ENTRY_MONTH); } }
public function testProcess() { parent::testProcess(); global $CONF; $cycleID = DWHInspector::getCycle('LOADED'); $files = DWHInspector::getFiles($cycleID); foreach ($files as $fileID) { $filename = $CONF->ProcessPath . "/" . $cycleID . '/' . DWHInspector::getFileName($fileID); // compare rows in ds_bandwidth_usage to rows in file $this->assertEquals(DWHInspector::countRows('kalturadw_ds.ds_bandwidth_usage', $fileID), $this->countRows($filename, array($this, 'validBWLine'))); $this->assertEquals(DWHInspector::countRows('kalturadw_ds.ds_bandwidth_usage', $fileID, 'and bandwidth_source_id = ' . $this->getBandwidthSourceID()), $this->countRows($filename, array($this, 'validBWLine'))); // compare bandwidth_bytes in ds_bandwidth_usage to bandwidth bytes consumed in file $this->assertEquals(DWHInspector::sumRows('kalturadw_ds.ds_bandwidth_usage', $fileID, "bandwidth_bytes"), $this->sumBytes($filename, array($this, 'validBWLine'), $this->getBWRegex())); // compare bw consumption per partner $bwPartners = $this->countBWEventsPerPartner($filename); $this->assertEquals(count($bwPartners), DWHInspector::countDistinct('kalturadw_ds.ds_bandwidth_usage', $fileID, 'partner_id')); foreach ($bwPartners as $partner => $val) { $res = DWHInspector::sumRows('kalturadw_ds.ds_bandwidth_usage', $fileID, 'bandwidth_bytes', ' and partner_id=\'' . $partner . '\''); $this->assertEquals($res, $val); } // make sure there are very little invalid lines $this->assertEquals($this->countInvalidLines($filename, array($this, 'validBWLine'), array($this, 'ignoredInvalidBWLine')), DWHInspector::countRows('kalturadw_ds.invalid_ds_lines', $fileID)); } }
public function testTransfer() { $cycleID = DWHInspector::getCycle('LOADED'); $ds_lines = array(); $files = DWHInspector::getFiles($cycleID); $dsTablesToFactTables = $this->getDSTablesToFactTables(); foreach ($files as $fileID) { foreach (array_keys($dsTablesToFactTables) as $dsTable) { $ds_lines[$fileID][$dsTable] = DWHInspector::countRows('kalturadw_ds.' . $dsTable, $fileID); } } DWHInspector::markAllAsAggregated(); $transferParams = $this->getTransferParams(); KettleRunner::execute($this->getTransferJob(), $transferParams); $this->assertEquals($cycleID, DWHInspector::getCycle('DONE')); $this->isCycleDirExists($cycleID, false); $files = DWHInspector::getFiles($cycleID); foreach ($files as $fileID) { foreach ($dsTablesToFactTables as $dsTable => $factTable) { // compare rows in ds_events and dwh_fact_events $this->assertEquals($ds_lines[$fileID][$dsTable], DWHInspector::countRows('kalturadw.' . $factTable, $fileID)); // make sure ds_events was emptied $this->assertEquals(0, DWHInspector::countRows('kalturadw_ds.' . $dsTable, $fileID)); } } foreach (DWHInspector::getAggrDatesAndHours($cycleID) as $table => $arr) { $minDateID = DWHInspector::getResetAggregationsMinDateID($cycleID, $table); $postTransferAggregationTypes = DWHInspector::getPostTransferAggregationTypes($transferParams[self::TRANSFER_PARAM_PROCESS_ID], $table); foreach ($arr as $dateID => $hours) { foreach ($hours as $hourID) { foreach ($postTransferAggregationTypes as $aggrType) { $filter = 'aggr_name = \'' . $aggrType . '\' and date_id = ' . $dateID . ' and hour_id = ' . $hourID . ' and ifnull(start_time,date(19700101)) < data_insert_time'; $rowExists = DWHInspector::rowExists('kalturadw.aggr_managment', $filter); $this->assertEquals($dateID >= $minDateID, $rowExists, "Row Date: {$dateID} {$hourID} {$aggrType} . Min Date: {$minDateID}"); } } } } }
private function AssertFMSEntity($countPerEntityCallBack, $filename, $fileID, $tableEntityName) { $collection = call_user_func($countPerEntityCallBack, $filename); $this->assertEquals(count($collection), DWHInspector::countDistinct('kalturadw_ds.ds_fms_session_events', $fileID, $tableEntityName), $countPerEntityCallBack[1]); foreach ($collection as $objectID => $val) { $res = DWHInspector::countRows('kalturadw_ds.ds_fms_session_events', $fileID, " and {$tableEntityName} = '{$objectID}'"); $this->assertEquals($res, $val, "Expected(db): {$res}, Actual(file): {$val} {$tableEntityName}: {$objectID}"); } }
public function testAggregation() { // create entries for aggregation according to fact DWHInspector::createEntriesFromFact(); parent::testAggregation(); $cycleID = DWHInspector::getCycle('DONE'); $factsToHours = DWHInspector::getAggrDatesAndHours($cycleID); $factTable = 'kalturadw.dwh_fact_events'; $minDateID = DWHInspector::getResetAggregationsMinDateID($cycleID, $factTable); foreach ($factsToHours[$factTable] as $dateID => $hours) { if ($dateID < $minDateID) { continue; } foreach ($hours as $hourID) { $this->compareAggregation(array(new ComparedTable('partner_id', 'kalturadw.dwh_fact_events', 'if(event_type_id=3,1,0)')), array(new ComparedTable('partner_id', 'kalturadw.dwh_hourly_partner', 'ifnull(count_plays, 0)')), 0, 'event_date_id = ' . $dateID . ' and event_hour_id = ' . $hourID, 'date_id = ' . $dateID . ' and hour_id = ' . $hourID); $this->compareAggregation(array(new ComparedTable('entry_id', 'kalturadw.dwh_fact_events', 'if(event_type_id=3,1,0)')), array(new ComparedTable('entry_id', 'kalturadw.dwh_hourly_events_entry', 'ifnull(count_plays, 0)')), 0, 'event_date_id = ' . $dateID . ' and event_hour_id = ' . $hourID, 'date_id = ' . $dateID . ' and hour_id = ' . $hourID); $this->compareAggregation(array(new ComparedTable('domain_id', 'kalturadw.dwh_fact_events', 'if(event_type_id=3,1,0)')), array(new ComparedTable('domain_id', 'kalturadw.dwh_hourly_events_domain', 'ifnull(count_plays, 0)')), 0, 'event_date_id = ' . $dateID . ' and event_hour_id = ' . $hourID, 'date_id = ' . $dateID . ' and hour_id = ' . $hourID); $this->compareAggregation(array(new ComparedTable('referrer_id', 'kalturadw.dwh_fact_events', 'if(event_type_id=3,1,0)')), array(new ComparedTable('referrer_id', 'kalturadw.dwh_hourly_events_domain_referrer', 'ifnull(count_plays, 0)')), 0, 'event_date_id = ' . $dateID . ' and event_hour_id = ' . $hourID, 'date_id = ' . $dateID . ' and hour_id = ' . $hourID); $this->compareAggregation(array(new ComparedTable('location_id', 'kalturadw.dwh_fact_events', 'if(event_type_id=3,1,0)')), array(new ComparedTable('location_id', 'kalturadw.dwh_hourly_events_country', 'ifnull(count_plays, 0)')), 0, 'event_date_id = ' . $dateID . ' and event_hour_id = ' . $hourID, 'date_id = ' . $dateID . ' and hour_id = ' . $hourID); $this->compareAggregation(array(new ComparedTable('country_id', 'kalturadw.dwh_fact_events', 'if(event_type_id=3,1,0)')), array(new ComparedTable('country_id', 'kalturadw.dwh_hourly_events_country', 'ifnull(count_plays, 0)')), 0, 'event_date_id = ' . $dateID . ' and event_hour_id = ' . $hourID, 'date_id = ' . $dateID . ' and hour_id = ' . $hourID); $this->compareAggregation(array(new ComparedTable('widget_id', 'kalturadw.dwh_fact_events', 'if(event_type_id=3,1,0)')), array(new ComparedTable('widget_id', 'kalturadw.dwh_hourly_events_widget', 'ifnull(count_plays, 0)')), 0, 'event_date_id = ' . $dateID . ' and event_hour_id = ' . $hourID, 'date_id = ' . $dateID . ' and hour_id = ' . $hourID); $this->compareAggregation(array(new ComparedTable('os_id', 'kalturadw.dwh_fact_events', 'if(event_type_id=3,1,0)')), array(new ComparedTable('os_id', 'kalturadw.dwh_hourly_events_devices', 'ifnull(count_plays, 0)')), 0, 'event_date_id = ' . $dateID . ' and event_hour_id = ' . $hourID, 'date_id = ' . $dateID . ' and hour_id = ' . $hourID); $this->compareAggregation(array(new ComparedTable('browser_id', 'kalturadw.dwh_fact_events', 'if(event_type_id=3,1,0)')), array(new ComparedTable('browser_id', 'kalturadw.dwh_hourly_events_devices', 'ifnull(count_plays, 0)')), 0, 'event_date_id = ' . $dateID . ' and event_hour_id = ' . $hourID, 'date_id = ' . $dateID . ' and hour_id = ' . $hourID); } $this->compareAggregation(array(new ComparedTable('partner_id', 'kalturadw.dwh_fact_bandwidth_usage', '(bandwidth_bytes/1024)'), new ComparedTable('session_partner_id', 'kalturadw.dwh_fact_fms_sessions', '(total_bytes/1024)')), array(new ComparedTable('partner_id', 'kalturadw.dwh_hourly_partner_usage', 'ifnull(count_bandwidth_kb, 0)')), 1, 'activity_date_id = ' . $dateID, 'date_id = ' . $dateID); $this->compareAggregation(array(new ComparedTable('bandwidth_source_id', 'kalturadw.dwh_fact_bandwidth_usage', '(bandwidth_bytes/1024)'), new ComparedTable('bandwidth_source_id', 'kalturadw.dwh_fact_fms_sessions', '(total_bytes/1024)')), array(new ComparedTable('bandwidth_source_id', 'kalturadw.dwh_hourly_partner_usage', 'ifnull(count_bandwidth_kb,0)')), 1, 'activity_date_id = ' . $dateID, 'date_id = ' . $dateID); $this->compareAggregation(array(new ComparedTable('location_id', 'kalturadw.dwh_fact_bandwidth_usage', '(bandwidth_bytes/1024)'), new ComparedTable('location_id', 'kalturadw.dwh_fact_fms_sessions', '(total_bytes/1024)')), array(new ComparedTable('location_id', 'kalturadw.dwh_hourly_events_devices', 'ifnull(count_bandwidth_kb, 0)')), 1, 'activity_date_id = ' . $dateID, 'date_id = ' . $dateID); $this->compareAggregation(array(new ComparedTable('country_id', 'kalturadw.dwh_fact_bandwidth_usage', '(bandwidth_bytes/1024)'), new ComparedTable('country_id', 'kalturadw.dwh_fact_fms_sessions', '(total_bytes/1024)')), array(new ComparedTable('country_id', 'kalturadw.dwh_hourly_events_devices', 'ifnull(count_bandwidth_kb, 0)')), 1, 'activity_date_id = ' . $dateID, 'date_id = ' . $dateID); } $this->compareAggregation(array(new ComparedTable('entry_id', 'kalturadw.dwh_hourly_events_entry', 'ifnull(count_plays,0)')), array(new ComparedTable('entry_id', 'kalturadw.dwh_entry_plays_views', 'ifnull(plays,0)'))); $this->compareAggregation(array(new ComparedTable('entry_id', 'kalturadw.dwh_hourly_events_entry', 'ifnull(count_loads,0)')), array(new ComparedTable('entry_id', 'kalturadw.dwh_entry_plays_views', 'ifnull(views,0)'))); }
public static function tearDownAfterClass() { DWHInspector::cleanEtlServers(); }