/**
  * @param InputInterface $input
  * @param OutputInterface $output
  * @return void|int
  */
 protected function execute(InputInterface $input, OutputInterface $output)
 {
     list($from, $to) = $this->getDateRangeToAttribute($input);
     $this->visitorGeolocator = $this->createGeolocator($output, $input);
     $this->percentStep = $this->getPercentStep($input);
     $this->amountOfVisits = $this->dao->countVisitsWithDatesLimit($from, $to);
     $output->writeln(sprintf('Re-attribution for date range: %s to %s. %d visits to process with provider "%s".', $from, $to, $this->amountOfVisits, $this->visitorGeolocator->getProvider()->getId()));
     $this->timer = new Timer();
     $this->processSpecifiedLogsInChunks($output, $from, $to, $input->getOption(self::SEGMENT_LIMIT_OPTION));
     $output->writeln("Completed. <comment>" . $this->timer->__toString() . "</comment>");
     return 0;
 }
Exemple #2
0
 /**
  * Purges old data from the following tables:
  * - log_visit
  * - log_link_visit_action
  * - log_conversion
  * - log_conversion_item
  * - log_action
  *
  * @param int $deleteLogsOlderThan The number of days after which log entires are considered old.
  *                                 Visits and related data whose age is greater than this number
  *                                 will be purged.
  */
 public function purgeData($deleteLogsOlderThan)
 {
     $dateUpperLimit = Date::factory("today")->subDay($deleteLogsOlderThan);
     $this->logDeleter->deleteVisitsFor($start = null, $dateUpperLimit->getDatetime());
     $logTables = self::getDeleteTableLogTables();
     // delete unused actions from the log_action table (but only if we can lock tables)
     if (Db::isLockPrivilegeGranted()) {
         $this->rawLogDao->deleteUnusedLogActions();
     } else {
         $logMessage = get_class($this) . ": LOCK TABLES privilege not granted; skipping unused actions purge";
         Log::warning($logMessage);
     }
     // optimize table overhead after deletion
     Db::optimizeTables($logTables);
 }
 /**
  * Deletes visits within the specified date range and belonging to the specified site (if any). Visits are
  * deleted in chunks, so only `$iterationStep` visits are deleted at a time.
  *
  * @param string|null $startDatetime A datetime string. Visits that occur at this time or after are deleted. If not supplied,
  *                                   visits from the beginning of time are deleted.
  * @param string|null $endDatetime A datetime string. Visits that occur before this time are deleted. If not supplied,
  *                                 visits from the end of time are deleted.
  * @param int|null $idSite The site to delete visits from.
  * @param int $iterationStep The number of visits to delete at a single time.
  * @param callable $afterChunkDeleted Callback executed after every chunk of visits are deleted.
  * @return int The number of visits deleted.
  */
 public function deleteVisitsFor($startDatetime, $endDatetime, $idSite = null, $iterationStep = 1000, $afterChunkDeleted = null)
 {
     $fields = array('idvisit');
     $conditions = array();
     if (!empty($startDatetime)) {
         $conditions[] = array('visit_last_action_time', '>=', $startDatetime);
     }
     if (!empty($endDatetime)) {
         $conditions[] = array('visit_last_action_time', '<', $endDatetime);
     }
     if (!empty($idSite)) {
         $conditions[] = array('idsite', '=', $idSite);
     }
     $logsDeleted = 0;
     $logPurger = $this;
     $this->rawLogDao->forAllLogs('log_visit', $fields, $conditions, $iterationStep, function ($logs) use($logPurger, &$logsDeleted, $afterChunkDeleted) {
         $ids = array_map(function ($row) {
             return reset($row);
         }, $logs);
         $logsDeleted += $logPurger->deleteVisits($ids);
         if (!empty($afterChunkDeleted)) {
             $afterChunkDeleted($logsDeleted);
         }
     });
     return $logsDeleted;
 }
 protected function processSpecifiedLogsInChunks(OutputInterface $output, $from, $to, $segmentLimit)
 {
     $visitFieldsToSelect = array_merge(array('idvisit', 'location_ip'), array_keys(VisitorGeolocator::$logVisitFieldsToUpdate));
     $lastId = 0;
     do {
         $logs = $this->dao->getVisitsWithDatesLimit($from, $to, $visitFieldsToSelect, $lastId, $segmentLimit);
         if (!empty($logs)) {
             $lastId = $logs[count($logs) - 1]['idvisit'];
             $this->reattributeVisitLogs($output, $logs);
         }
     } while (count($logs) == $segmentLimit);
 }
 protected function insertActionsToKeep($maxIds, $olderThan = true, $insertIntoTempIterationStep = 100000)
 {
     parent::insertActionsToKeep($maxIds, $olderThan, 2);
     // we use 2 to force iterations during tests
     // allow code to be executed after data is inserted. for concurrency testing purposes.
     if ($olderThan && $this->insertActionsOlderThanCallback) {
         $callback = $this->insertActionsOlderThanCallback;
         $callback();
     } else {
         if ($this->insertActionsNewerThanCallback) {
             $callback = $this->insertActionsNewerThanCallback;
             $callback();
         }
     }
 }
 /**
  * Re-geolocate visits within a date range for a specified site (if any).
  *
  * @param string $from A datetime string to treat as the lower bound. Visits newer than this date are processed.
  * @param string $to A datetime string to treat as the upper bound. Visits older than this date are processed.
  * @param int|null $idSite If supplied, only visits for this site are re-attributed.
  * @param int $iterationStep The number of visits to re-attribute at the same time.
  * @param callable|null $onLogProcessed If supplied, this callback is called after every row is processed.
  *                                      The processed visit and the updated values are passed to the callback.
  */
 public function reattributeVisitLogs($from, $to, $idSite = null, $iterationStep = 1000, $onLogProcessed = null)
 {
     $visitFieldsToSelect = array_merge(array('idvisit', 'location_ip'), array_keys(VisitorGeolocator::$logVisitFieldsToUpdate));
     $conditions = array(array('visit_last_action_time', '>=', $from), array('visit_last_action_time', '<', $to));
     if (!empty($idSite)) {
         $conditions[] = array('idsite', '=', $idSite);
     }
     $self = $this;
     $this->dao->forAllLogs('log_visit', $visitFieldsToSelect, $conditions, $iterationStep, function ($logs) use($self, $onLogProcessed) {
         foreach ($logs as $row) {
             $updatedValues = $self->attributeExistingVisit($row);
             if (!empty($onLogProcessed)) {
                 $onLogProcessed($row, $updatedValues);
             }
         }
     });
 }
 /**
  * Geolcates an existing visit and then updates it if it's current attributes are different than
  * what was geolocated. Also updates all conversions of a visit.
  *
  * **This method should NOT be used from within the tracker.**
  *
  * @param array $visit The visit information. Must contain an `"idvisit"` element and `"location_ip"` element.
  * @param bool $useClassCache
  * @return array|null The visit properties that were updated in the DB mapped to the updated values. If null,
  *                    required information was missing from `$visit`.
  */
 public function attributeExistingVisit($visit, $useClassCache = true)
 {
     if (empty($visit['idvisit'])) {
         $this->logger->debug('Empty idvisit field. Skipping re-attribution..');
         return null;
     }
     $idVisit = $visit['idvisit'];
     if (empty($visit['location_ip'])) {
         $this->logger->debug('Empty location_ip field for idvisit = %s. Skipping re-attribution.', array('idvisit' => $idVisit));
         return null;
     }
     $ip = IPUtils::binaryToStringIP($visit['location_ip']);
     $location = $this->getLocation(array('ip' => $ip), $useClassCache);
     $valuesToUpdate = $this->getVisitFieldsToUpdate($visit, $location);
     if (!empty($valuesToUpdate)) {
         $this->logger->debug('Updating visit with idvisit = {idVisit} (IP = {ip}). Changes: {changes}', array('idVisit' => $idVisit, 'ip' => $ip, 'changes' => $valuesToUpdate));
         $this->dao->updateVisits($valuesToUpdate, $idVisit);
         $this->dao->updateConversions($valuesToUpdate, $idVisit);
     } else {
         $this->logger->debug('Nothing to update for idvisit = %s (IP = {ip}). Existing location info is same as geolocated.', array('idVisit' => $idVisit, 'ip' => $ip));
     }
     return $valuesToUpdate;
 }
Exemple #8
0
 /**
  * Detects whether a site had visits since midnight in the websites timezone
  *
  * @return bool
  */
 private function hadWebsiteTrafficSinceMidnightInTimezone($idSite)
 {
     $timezone = Site::getTimezoneFor($idSite);
     $nowInTimezone = Date::factory('now', $timezone);
     $midnightInTimezone = $nowInTimezone->setTime('00:00:00');
     $secondsSinceMidnight = $nowInTimezone->getTimestamp() - $midnightInTimezone->getTimestamp();
     $secondsSinceLastArchive = $this->getSecondsSinceLastArchive();
     if ($secondsSinceLastArchive < $secondsSinceMidnight) {
         $secondsSinceMidnight = $secondsSinceLastArchive;
     }
     $from = Date::now()->subSeconds($secondsSinceMidnight)->getDatetime();
     $to = Date::now()->addHour(1)->getDatetime();
     $dao = new RawLogDao();
     $hasVisits = $dao->hasSiteVisitsBetweenTimeframe($from, $to, $idSite);
     if ($hasVisits) {
         $this->logger->info("{$idSite} has visits between {$from} and {$to}");
     } else {
         $this->logger->info("{$idSite} has no visits between {$from} and {$to}");
     }
     return $hasVisits;
 }
 /**
  * @dataProvider getVisitsInTimeFrameData
  */
 public function test_hasSiteVisitsInTimeframe_shouldDetectWhetherThereAreVisitsInCertainTimeframe($from, $to, $idSite, $expectedHasVisits)
 {
     Fixture::getTracker($this->idSite, '2015-01-25 05:35:27')->doTrackPageView('/test');
     $hasVisits = $this->dao->hasSiteVisitsBetweenTimeframe($from, $to, $idSite);
     $this->assertSame($expectedHasVisits, $hasVisits);
 }
Exemple #10
0
 /**
  * Detects whether a site had visits since midnight in the websites timezone
  *
  * @param $idSite
  * @return bool
  */
 private function hadWebsiteTrafficSinceMidnightInTimezone($idSite)
 {
     $timezone = Site::getTimezoneFor($idSite);
     $nowInTimezone = Date::factory('now', $timezone);
     $midnightInTimezone = $nowInTimezone->setTime('00:00:00');
     $secondsSinceMidnight = $nowInTimezone->getTimestamp() - $midnightInTimezone->getTimestamp();
     $secondsSinceLastArchive = $this->getSecondsSinceLastArchive();
     if ($secondsSinceLastArchive < $secondsSinceMidnight) {
         $secondsBackToLookForVisits = $secondsSinceLastArchive;
         $sinceInfo = "(since the last successful archiving)";
     } else {
         $secondsBackToLookForVisits = $secondsSinceMidnight;
         $sinceInfo = "(since midnight)";
     }
     $from = Date::now()->subSeconds($secondsBackToLookForVisits)->getDatetime();
     $to = Date::now()->addHour(1)->getDatetime();
     $dao = new RawLogDao();
     $hasVisits = $dao->hasSiteVisitsBetweenTimeframe($from, $to, $idSite);
     if ($hasVisits) {
         $this->logger->info("- tracking data found for website id {$idSite} since {$from} UTC {$sinceInfo}");
     } else {
         $this->logger->info("- no new tracking data for website id {$idSite} since {$from} UTC {$sinceInfo}");
     }
     return $hasVisits;
 }