/** * @param InputInterface $input * @param OutputInterface $output * @return void|int */ protected function execute(InputInterface $input, OutputInterface $output) { list($from, $to) = $this->getDateRangeToAttribute($input); $this->visitorGeolocator = $this->createGeolocator($output, $input); $this->percentStep = $this->getPercentStep($input); $this->amountOfVisits = $this->dao->countVisitsWithDatesLimit($from, $to); $output->writeln(sprintf('Re-attribution for date range: %s to %s. %d visits to process with provider "%s".', $from, $to, $this->amountOfVisits, $this->visitorGeolocator->getProvider()->getId())); $this->timer = new Timer(); $this->processSpecifiedLogsInChunks($output, $from, $to, $input->getOption(self::SEGMENT_LIMIT_OPTION)); $output->writeln("Completed. <comment>" . $this->timer->__toString() . "</comment>"); return 0; }
/** * Purges old data from the following tables: * - log_visit * - log_link_visit_action * - log_conversion * - log_conversion_item * - log_action * * @param int $deleteLogsOlderThan The number of days after which log entires are considered old. * Visits and related data whose age is greater than this number * will be purged. */ public function purgeData($deleteLogsOlderThan) { $dateUpperLimit = Date::factory("today")->subDay($deleteLogsOlderThan); $this->logDeleter->deleteVisitsFor($start = null, $dateUpperLimit->getDatetime()); $logTables = self::getDeleteTableLogTables(); // delete unused actions from the log_action table (but only if we can lock tables) if (Db::isLockPrivilegeGranted()) { $this->rawLogDao->deleteUnusedLogActions(); } else { $logMessage = get_class($this) . ": LOCK TABLES privilege not granted; skipping unused actions purge"; Log::warning($logMessage); } // optimize table overhead after deletion Db::optimizeTables($logTables); }
/** * Deletes visits within the specified date range and belonging to the specified site (if any). Visits are * deleted in chunks, so only `$iterationStep` visits are deleted at a time. * * @param string|null $startDatetime A datetime string. Visits that occur at this time or after are deleted. If not supplied, * visits from the beginning of time are deleted. * @param string|null $endDatetime A datetime string. Visits that occur before this time are deleted. If not supplied, * visits from the end of time are deleted. * @param int|null $idSite The site to delete visits from. * @param int $iterationStep The number of visits to delete at a single time. * @param callable $afterChunkDeleted Callback executed after every chunk of visits are deleted. * @return int The number of visits deleted. */ public function deleteVisitsFor($startDatetime, $endDatetime, $idSite = null, $iterationStep = 1000, $afterChunkDeleted = null) { $fields = array('idvisit'); $conditions = array(); if (!empty($startDatetime)) { $conditions[] = array('visit_last_action_time', '>=', $startDatetime); } if (!empty($endDatetime)) { $conditions[] = array('visit_last_action_time', '<', $endDatetime); } if (!empty($idSite)) { $conditions[] = array('idsite', '=', $idSite); } $logsDeleted = 0; $logPurger = $this; $this->rawLogDao->forAllLogs('log_visit', $fields, $conditions, $iterationStep, function ($logs) use($logPurger, &$logsDeleted, $afterChunkDeleted) { $ids = array_map(function ($row) { return reset($row); }, $logs); $logsDeleted += $logPurger->deleteVisits($ids); if (!empty($afterChunkDeleted)) { $afterChunkDeleted($logsDeleted); } }); return $logsDeleted; }
protected function processSpecifiedLogsInChunks(OutputInterface $output, $from, $to, $segmentLimit) { $visitFieldsToSelect = array_merge(array('idvisit', 'location_ip'), array_keys(VisitorGeolocator::$logVisitFieldsToUpdate)); $lastId = 0; do { $logs = $this->dao->getVisitsWithDatesLimit($from, $to, $visitFieldsToSelect, $lastId, $segmentLimit); if (!empty($logs)) { $lastId = $logs[count($logs) - 1]['idvisit']; $this->reattributeVisitLogs($output, $logs); } } while (count($logs) == $segmentLimit); }
protected function insertActionsToKeep($maxIds, $olderThan = true, $insertIntoTempIterationStep = 100000) { parent::insertActionsToKeep($maxIds, $olderThan, 2); // we use 2 to force iterations during tests // allow code to be executed after data is inserted. for concurrency testing purposes. if ($olderThan && $this->insertActionsOlderThanCallback) { $callback = $this->insertActionsOlderThanCallback; $callback(); } else { if ($this->insertActionsNewerThanCallback) { $callback = $this->insertActionsNewerThanCallback; $callback(); } } }
/** * Re-geolocate visits within a date range for a specified site (if any). * * @param string $from A datetime string to treat as the lower bound. Visits newer than this date are processed. * @param string $to A datetime string to treat as the upper bound. Visits older than this date are processed. * @param int|null $idSite If supplied, only visits for this site are re-attributed. * @param int $iterationStep The number of visits to re-attribute at the same time. * @param callable|null $onLogProcessed If supplied, this callback is called after every row is processed. * The processed visit and the updated values are passed to the callback. */ public function reattributeVisitLogs($from, $to, $idSite = null, $iterationStep = 1000, $onLogProcessed = null) { $visitFieldsToSelect = array_merge(array('idvisit', 'location_ip'), array_keys(VisitorGeolocator::$logVisitFieldsToUpdate)); $conditions = array(array('visit_last_action_time', '>=', $from), array('visit_last_action_time', '<', $to)); if (!empty($idSite)) { $conditions[] = array('idsite', '=', $idSite); } $self = $this; $this->dao->forAllLogs('log_visit', $visitFieldsToSelect, $conditions, $iterationStep, function ($logs) use($self, $onLogProcessed) { foreach ($logs as $row) { $updatedValues = $self->attributeExistingVisit($row); if (!empty($onLogProcessed)) { $onLogProcessed($row, $updatedValues); } } }); }
/** * Geolcates an existing visit and then updates it if it's current attributes are different than * what was geolocated. Also updates all conversions of a visit. * * **This method should NOT be used from within the tracker.** * * @param array $visit The visit information. Must contain an `"idvisit"` element and `"location_ip"` element. * @param bool $useClassCache * @return array|null The visit properties that were updated in the DB mapped to the updated values. If null, * required information was missing from `$visit`. */ public function attributeExistingVisit($visit, $useClassCache = true) { if (empty($visit['idvisit'])) { $this->logger->debug('Empty idvisit field. Skipping re-attribution..'); return null; } $idVisit = $visit['idvisit']; if (empty($visit['location_ip'])) { $this->logger->debug('Empty location_ip field for idvisit = %s. Skipping re-attribution.', array('idvisit' => $idVisit)); return null; } $ip = IPUtils::binaryToStringIP($visit['location_ip']); $location = $this->getLocation(array('ip' => $ip), $useClassCache); $valuesToUpdate = $this->getVisitFieldsToUpdate($visit, $location); if (!empty($valuesToUpdate)) { $this->logger->debug('Updating visit with idvisit = {idVisit} (IP = {ip}). Changes: {changes}', array('idVisit' => $idVisit, 'ip' => $ip, 'changes' => $valuesToUpdate)); $this->dao->updateVisits($valuesToUpdate, $idVisit); $this->dao->updateConversions($valuesToUpdate, $idVisit); } else { $this->logger->debug('Nothing to update for idvisit = %s (IP = {ip}). Existing location info is same as geolocated.', array('idVisit' => $idVisit, 'ip' => $ip)); } return $valuesToUpdate; }
/** * Detects whether a site had visits since midnight in the websites timezone * * @return bool */ private function hadWebsiteTrafficSinceMidnightInTimezone($idSite) { $timezone = Site::getTimezoneFor($idSite); $nowInTimezone = Date::factory('now', $timezone); $midnightInTimezone = $nowInTimezone->setTime('00:00:00'); $secondsSinceMidnight = $nowInTimezone->getTimestamp() - $midnightInTimezone->getTimestamp(); $secondsSinceLastArchive = $this->getSecondsSinceLastArchive(); if ($secondsSinceLastArchive < $secondsSinceMidnight) { $secondsSinceMidnight = $secondsSinceLastArchive; } $from = Date::now()->subSeconds($secondsSinceMidnight)->getDatetime(); $to = Date::now()->addHour(1)->getDatetime(); $dao = new RawLogDao(); $hasVisits = $dao->hasSiteVisitsBetweenTimeframe($from, $to, $idSite); if ($hasVisits) { $this->logger->info("{$idSite} has visits between {$from} and {$to}"); } else { $this->logger->info("{$idSite} has no visits between {$from} and {$to}"); } return $hasVisits; }
/** * @dataProvider getVisitsInTimeFrameData */ public function test_hasSiteVisitsInTimeframe_shouldDetectWhetherThereAreVisitsInCertainTimeframe($from, $to, $idSite, $expectedHasVisits) { Fixture::getTracker($this->idSite, '2015-01-25 05:35:27')->doTrackPageView('/test'); $hasVisits = $this->dao->hasSiteVisitsBetweenTimeframe($from, $to, $idSite); $this->assertSame($expectedHasVisits, $hasVisits); }
/** * Detects whether a site had visits since midnight in the websites timezone * * @param $idSite * @return bool */ private function hadWebsiteTrafficSinceMidnightInTimezone($idSite) { $timezone = Site::getTimezoneFor($idSite); $nowInTimezone = Date::factory('now', $timezone); $midnightInTimezone = $nowInTimezone->setTime('00:00:00'); $secondsSinceMidnight = $nowInTimezone->getTimestamp() - $midnightInTimezone->getTimestamp(); $secondsSinceLastArchive = $this->getSecondsSinceLastArchive(); if ($secondsSinceLastArchive < $secondsSinceMidnight) { $secondsBackToLookForVisits = $secondsSinceLastArchive; $sinceInfo = "(since the last successful archiving)"; } else { $secondsBackToLookForVisits = $secondsSinceMidnight; $sinceInfo = "(since midnight)"; } $from = Date::now()->subSeconds($secondsBackToLookForVisits)->getDatetime(); $to = Date::now()->addHour(1)->getDatetime(); $dao = new RawLogDao(); $hasVisits = $dao->hasSiteVisitsBetweenTimeframe($from, $to, $idSite); if ($hasVisits) { $this->logger->info("- tracking data found for website id {$idSite} since {$from} UTC {$sinceInfo}"); } else { $this->logger->info("- no new tracking data for website id {$idSite} since {$from} UTC {$sinceInfo}"); } return $hasVisits; }