public function getLinePartnerAndEntry($line, $partnerID, $entryID = null)
 {
     $partnerID = -1;
     $entryID = -1;
     $xSnameIndex = 26;
     $fmsFields = explode("\t", $line);
     if (count($fmsFields) > $xSnameIndex) {
         preg_match('/^\\/?p\\/([0-9]+)\\/(.*)/', $fmsFields[$xSnameIndex], $matches);
         if (count($matches) > 2) {
             $partnerID = $matches[1];
             preg_match('/.*\\/(flavorId|entry_id)\\/([01]_,?[^\\.,\\/]+)?(.*\\/)?([01]_,?[^\\.,\\/]+).*/', $matches[2], $objectsMatches);
             if (count($objectsMatches) > 4) {
                 $objectType = $objectsMatches[1];
                 $objectID = $objectsMatches[2] ? $objectsMatches[2] : $objectsMatches[4];
                 $objectID = substr($objectID, 2, 1) == ',' ? substr($objectID, 0, 2) + substr($objectID, 3) : $objectID;
                 if ($objectType == "flavorId") {
                     $entryIDResult = DWHInspector::getEntryIDByFlavorID($objectID);
                     if ($entryIDResult != null) {
                         $entryID = $entryIDResult;
                     }
                 } else {
                     $entryID = $objectID;
                 }
                 return true;
             }
         }
     }
     return false;
 }
 public function compareAggregation($factTables, $aggrTables, $maxDiffInPercent = 0, $factFilter = '1=1', $aggrFilter = '1=1')
 {
     $aggrGroups = DWHInspector::groupBy($aggrTables, $aggrFilter);
     $factGroups = DWHInspector::groupBy($factTables, $factFilter);
     foreach ($factGroups as $id => $measure) {
         if (!array_key_exists($id, $aggrGroups)) {
             $this->assertEquals(0, $measure, "For the following group (larger than 0 in fact, doesn't exist in aggregation): " . $id . ". Fact = " . $measure);
         } else {
             if ($aggrGroups[$id] == 0) {
                 $this->assertEquals(0, $measure, "For the following group:" . $id . ".Aggr = " . $aggrGroups[$id] . ". Fact = " . $measure);
             } else {
                 $this->assertLessThanOrEqual($maxDiffInPercent, abs(100 - $measure / $aggrGroups[$id] * 100), "For the following group:" . $id . ".Aggr = " . $aggrGroups[$id] . ". Fact = " . $measure);
             }
         }
     }
 }
 public function getLinePartnerAndEntry($line, $partnerID, $entryID = null)
 {
     $partnerID = 0;
     $entryID = "";
     $xSnameIndex = 26;
     $fmsFields = explode("\t", $line);
     if (count($fmsFields) > $xSnameIndex) {
         preg_match('/^(.*)_\\d+@\\d+$/', $fmsFields[$xSnameIndex], $matches);
         if (count($matches) > 1) {
             $entryID = $matches[1];
             $partnerID = DWHInspector::getPartnerIDByEntryID($entryID);
             return true;
         }
     }
     return false;
 }
 public function simulateUsage()
 {
     # Month with overage  bandwidth
     MySQLRunner::execute("INSERT INTO kalturadw.dwh_hourly_partner_usage (partner_id, \n\t\t\tdate_id, hour_id, bandwidth_source_id, count_bandwidth_kb , aggr_storage_mb)\n\t\t\tVALUES(?, ?,0,8,250,0)", array(0 => $this->partnerId, 1 => self::BW_MONTH));
     MySQLRunner::execute("INSERT INTO kalturadw.dwh_hourly_partner_usage (partner_id, \n\t\t\tdate_id, hour_id, bandwidth_source_id, count_bandwidth_kb , aggr_storage_mb)\n\t\t\tVALUES(?, ?,0,9,250,0)", array(0 => $this->partnerId, 1 => self::BW_MONTH));
     # Month with overage storage
     for ($i = 0; $i < 28; $i++) {
         MySQLRunner::execute("INSERT INTO kalturadw.dwh_hourly_partner_usage (partner_id, \n                date_id, hour_id, bandwidth_source_id, count_bandwidth_kb , aggr_storage_mb)\n                VALUES(?, ?,0,8,0,500)", array(0 => $this->partnerId, 1 => self::ST_MONTH + $i));
     }
     # Month with overage storage + bandwidth
     for ($i = 0; $i < 31; $i++) {
         MySQLRunner::execute("INSERT INTO kalturadw.dwh_hourly_partner_usage (partner_id, \n                date_id, hour_id, bandwidth_source_id, count_bandwidth_kb , aggr_storage_mb)\n                VALUES(?, ?,0,8,290*1024/31,290)", array(0 => $this->partnerId, 1 => self::BW_ST_MONTH + $i));
     }
     # Month with overage plays
     MySQLRunner::execute("INSERT INTO kalturadw.dwh_hourly_partner (partner_id, \n\t\t\tdate_id, hour_id, count_plays)\n\t\t\tVALUES(?, ?,0,500)", array(0 => $this->partnerId, 1 => self::PLAY_MONTH));
     # Month with overage entries
     for ($i = 0; $i < 10; $i++) {
         DWHInspector::createNewEntry($this->partnerId, $i, self::ENTRY_MONTH);
     }
 }
 public function testProcess()
 {
     parent::testProcess();
     global $CONF;
     $cycleID = DWHInspector::getCycle('LOADED');
     $files = DWHInspector::getFiles($cycleID);
     foreach ($files as $fileID) {
         $filename = $CONF->ProcessPath . "/" . $cycleID . '/' . DWHInspector::getFileName($fileID);
         // compare rows in ds_bandwidth_usage to rows in file
         $this->assertEquals(DWHInspector::countRows('kalturadw_ds.ds_bandwidth_usage', $fileID), $this->countRows($filename, array($this, 'validBWLine')));
         $this->assertEquals(DWHInspector::countRows('kalturadw_ds.ds_bandwidth_usage', $fileID, 'and bandwidth_source_id = ' . $this->getBandwidthSourceID()), $this->countRows($filename, array($this, 'validBWLine')));
         // compare bandwidth_bytes in ds_bandwidth_usage to bandwidth bytes consumed in file
         $this->assertEquals(DWHInspector::sumRows('kalturadw_ds.ds_bandwidth_usage', $fileID, "bandwidth_bytes"), $this->sumBytes($filename, array($this, 'validBWLine'), $this->getBWRegex()));
         // compare bw consumption per partner
         $bwPartners = $this->countBWEventsPerPartner($filename);
         $this->assertEquals(count($bwPartners), DWHInspector::countDistinct('kalturadw_ds.ds_bandwidth_usage', $fileID, 'partner_id'));
         foreach ($bwPartners as $partner => $val) {
             $res = DWHInspector::sumRows('kalturadw_ds.ds_bandwidth_usage', $fileID, 'bandwidth_bytes', ' and partner_id=\'' . $partner . '\'');
             $this->assertEquals($res, $val);
         }
         // make sure there are very little invalid lines
         $this->assertEquals($this->countInvalidLines($filename, array($this, 'validBWLine'), array($this, 'ignoredInvalidBWLine')), DWHInspector::countRows('kalturadw_ds.invalid_ds_lines', $fileID));
     }
 }
 public function testTransfer()
 {
     $cycleID = DWHInspector::getCycle('LOADED');
     $ds_lines = array();
     $files = DWHInspector::getFiles($cycleID);
     $dsTablesToFactTables = $this->getDSTablesToFactTables();
     foreach ($files as $fileID) {
         foreach (array_keys($dsTablesToFactTables) as $dsTable) {
             $ds_lines[$fileID][$dsTable] = DWHInspector::countRows('kalturadw_ds.' . $dsTable, $fileID);
         }
     }
     DWHInspector::markAllAsAggregated();
     $transferParams = $this->getTransferParams();
     KettleRunner::execute($this->getTransferJob(), $transferParams);
     $this->assertEquals($cycleID, DWHInspector::getCycle('DONE'));
     $this->isCycleDirExists($cycleID, false);
     $files = DWHInspector::getFiles($cycleID);
     foreach ($files as $fileID) {
         foreach ($dsTablesToFactTables as $dsTable => $factTable) {
             // compare rows in ds_events and dwh_fact_events
             $this->assertEquals($ds_lines[$fileID][$dsTable], DWHInspector::countRows('kalturadw.' . $factTable, $fileID));
             // make sure ds_events was emptied
             $this->assertEquals(0, DWHInspector::countRows('kalturadw_ds.' . $dsTable, $fileID));
         }
     }
     foreach (DWHInspector::getAggrDatesAndHours($cycleID) as $table => $arr) {
         $minDateID = DWHInspector::getResetAggregationsMinDateID($cycleID, $table);
         $postTransferAggregationTypes = DWHInspector::getPostTransferAggregationTypes($transferParams[self::TRANSFER_PARAM_PROCESS_ID], $table);
         foreach ($arr as $dateID => $hours) {
             foreach ($hours as $hourID) {
                 foreach ($postTransferAggregationTypes as $aggrType) {
                     $filter = 'aggr_name = \'' . $aggrType . '\' and date_id = ' . $dateID . ' and hour_id = ' . $hourID . ' and ifnull(start_time,date(19700101)) < data_insert_time';
                     $rowExists = DWHInspector::rowExists('kalturadw.aggr_managment', $filter);
                     $this->assertEquals($dateID >= $minDateID, $rowExists, "Row Date: {$dateID} {$hourID} {$aggrType} . Min Date: {$minDateID}");
                 }
             }
         }
     }
 }
 private function AssertFMSEntity($countPerEntityCallBack, $filename, $fileID, $tableEntityName)
 {
     $collection = call_user_func($countPerEntityCallBack, $filename);
     $this->assertEquals(count($collection), DWHInspector::countDistinct('kalturadw_ds.ds_fms_session_events', $fileID, $tableEntityName), $countPerEntityCallBack[1]);
     foreach ($collection as $objectID => $val) {
         $res = DWHInspector::countRows('kalturadw_ds.ds_fms_session_events', $fileID, " and {$tableEntityName} = '{$objectID}'");
         $this->assertEquals($res, $val, "Expected(db): {$res}, Actual(file): {$val} {$tableEntityName}: {$objectID}");
     }
 }
 public function testAggregation()
 {
     // create entries for aggregation according to fact
     DWHInspector::createEntriesFromFact();
     parent::testAggregation();
     $cycleID = DWHInspector::getCycle('DONE');
     $factsToHours = DWHInspector::getAggrDatesAndHours($cycleID);
     $factTable = 'kalturadw.dwh_fact_events';
     $minDateID = DWHInspector::getResetAggregationsMinDateID($cycleID, $factTable);
     foreach ($factsToHours[$factTable] as $dateID => $hours) {
         if ($dateID < $minDateID) {
             continue;
         }
         foreach ($hours as $hourID) {
             $this->compareAggregation(array(new ComparedTable('partner_id', 'kalturadw.dwh_fact_events', 'if(event_type_id=3,1,0)')), array(new ComparedTable('partner_id', 'kalturadw.dwh_hourly_partner', 'ifnull(count_plays, 0)')), 0, 'event_date_id = ' . $dateID . ' and event_hour_id = ' . $hourID, 'date_id = ' . $dateID . ' and hour_id = ' . $hourID);
             $this->compareAggregation(array(new ComparedTable('entry_id', 'kalturadw.dwh_fact_events', 'if(event_type_id=3,1,0)')), array(new ComparedTable('entry_id', 'kalturadw.dwh_hourly_events_entry', 'ifnull(count_plays, 0)')), 0, 'event_date_id = ' . $dateID . ' and event_hour_id = ' . $hourID, 'date_id = ' . $dateID . ' and hour_id = ' . $hourID);
             $this->compareAggregation(array(new ComparedTable('domain_id', 'kalturadw.dwh_fact_events', 'if(event_type_id=3,1,0)')), array(new ComparedTable('domain_id', 'kalturadw.dwh_hourly_events_domain', 'ifnull(count_plays, 0)')), 0, 'event_date_id = ' . $dateID . ' and event_hour_id = ' . $hourID, 'date_id = ' . $dateID . ' and hour_id = ' . $hourID);
             $this->compareAggregation(array(new ComparedTable('referrer_id', 'kalturadw.dwh_fact_events', 'if(event_type_id=3,1,0)')), array(new ComparedTable('referrer_id', 'kalturadw.dwh_hourly_events_domain_referrer', 'ifnull(count_plays, 0)')), 0, 'event_date_id = ' . $dateID . ' and event_hour_id = ' . $hourID, 'date_id = ' . $dateID . ' and hour_id = ' . $hourID);
             $this->compareAggregation(array(new ComparedTable('location_id', 'kalturadw.dwh_fact_events', 'if(event_type_id=3,1,0)')), array(new ComparedTable('location_id', 'kalturadw.dwh_hourly_events_country', 'ifnull(count_plays, 0)')), 0, 'event_date_id = ' . $dateID . ' and event_hour_id = ' . $hourID, 'date_id = ' . $dateID . ' and hour_id = ' . $hourID);
             $this->compareAggregation(array(new ComparedTable('country_id', 'kalturadw.dwh_fact_events', 'if(event_type_id=3,1,0)')), array(new ComparedTable('country_id', 'kalturadw.dwh_hourly_events_country', 'ifnull(count_plays, 0)')), 0, 'event_date_id = ' . $dateID . ' and event_hour_id = ' . $hourID, 'date_id = ' . $dateID . ' and hour_id = ' . $hourID);
             $this->compareAggregation(array(new ComparedTable('widget_id', 'kalturadw.dwh_fact_events', 'if(event_type_id=3,1,0)')), array(new ComparedTable('widget_id', 'kalturadw.dwh_hourly_events_widget', 'ifnull(count_plays, 0)')), 0, 'event_date_id = ' . $dateID . ' and event_hour_id = ' . $hourID, 'date_id = ' . $dateID . ' and hour_id = ' . $hourID);
             $this->compareAggregation(array(new ComparedTable('os_id', 'kalturadw.dwh_fact_events', 'if(event_type_id=3,1,0)')), array(new ComparedTable('os_id', 'kalturadw.dwh_hourly_events_devices', 'ifnull(count_plays, 0)')), 0, 'event_date_id = ' . $dateID . ' and event_hour_id = ' . $hourID, 'date_id = ' . $dateID . ' and hour_id = ' . $hourID);
             $this->compareAggregation(array(new ComparedTable('browser_id', 'kalturadw.dwh_fact_events', 'if(event_type_id=3,1,0)')), array(new ComparedTable('browser_id', 'kalturadw.dwh_hourly_events_devices', 'ifnull(count_plays, 0)')), 0, 'event_date_id = ' . $dateID . ' and event_hour_id = ' . $hourID, 'date_id = ' . $dateID . ' and hour_id = ' . $hourID);
         }
         $this->compareAggregation(array(new ComparedTable('partner_id', 'kalturadw.dwh_fact_bandwidth_usage', '(bandwidth_bytes/1024)'), new ComparedTable('session_partner_id', 'kalturadw.dwh_fact_fms_sessions', '(total_bytes/1024)')), array(new ComparedTable('partner_id', 'kalturadw.dwh_hourly_partner_usage', 'ifnull(count_bandwidth_kb, 0)')), 1, 'activity_date_id = ' . $dateID, 'date_id = ' . $dateID);
         $this->compareAggregation(array(new ComparedTable('bandwidth_source_id', 'kalturadw.dwh_fact_bandwidth_usage', '(bandwidth_bytes/1024)'), new ComparedTable('bandwidth_source_id', 'kalturadw.dwh_fact_fms_sessions', '(total_bytes/1024)')), array(new ComparedTable('bandwidth_source_id', 'kalturadw.dwh_hourly_partner_usage', 'ifnull(count_bandwidth_kb,0)')), 1, 'activity_date_id = ' . $dateID, 'date_id = ' . $dateID);
         $this->compareAggregation(array(new ComparedTable('location_id', 'kalturadw.dwh_fact_bandwidth_usage', '(bandwidth_bytes/1024)'), new ComparedTable('location_id', 'kalturadw.dwh_fact_fms_sessions', '(total_bytes/1024)')), array(new ComparedTable('location_id', 'kalturadw.dwh_hourly_events_devices', 'ifnull(count_bandwidth_kb, 0)')), 1, 'activity_date_id = ' . $dateID, 'date_id = ' . $dateID);
         $this->compareAggregation(array(new ComparedTable('country_id', 'kalturadw.dwh_fact_bandwidth_usage', '(bandwidth_bytes/1024)'), new ComparedTable('country_id', 'kalturadw.dwh_fact_fms_sessions', '(total_bytes/1024)')), array(new ComparedTable('country_id', 'kalturadw.dwh_hourly_events_devices', 'ifnull(count_bandwidth_kb, 0)')), 1, 'activity_date_id = ' . $dateID, 'date_id = ' . $dateID);
     }
     $this->compareAggregation(array(new ComparedTable('entry_id', 'kalturadw.dwh_hourly_events_entry', 'ifnull(count_plays,0)')), array(new ComparedTable('entry_id', 'kalturadw.dwh_entry_plays_views', 'ifnull(plays,0)')));
     $this->compareAggregation(array(new ComparedTable('entry_id', 'kalturadw.dwh_hourly_events_entry', 'ifnull(count_loads,0)')), array(new ComparedTable('entry_id', 'kalturadw.dwh_entry_plays_views', 'ifnull(views,0)')));
 }
 public static function tearDownAfterClass()
 {
     DWHInspector::cleanEtlServers();
 }