private function archiveSingleSite($idSite, $requestsBefore) { $timerWebsite = new Timer(); $lastTimestampWebsiteProcessedPeriods = $lastTimestampWebsiteProcessedDay = false; if ($this->archiveAndRespectTTL) { Option::clearCachedOption($this->lastRunKey($idSite, "periods")); $lastTimestampWebsiteProcessedPeriods = $this->getPeriodLastProcessedTimestamp($idSite); Option::clearCachedOption($this->lastRunKey($idSite, "day")); $lastTimestampWebsiteProcessedDay = $this->getDayLastProcessedTimestamp($idSite); } $this->updateIdSitesInvalidatedOldReports(); // For period other than days, we only re-process the reports at most // 1) every $processPeriodsMaximumEverySeconds $secondsSinceLastExecution = time() - $lastTimestampWebsiteProcessedPeriods; // if timeout is more than 10 min, we account for a 5 min processing time, and allow trigger 1 min earlier if ($this->processPeriodsMaximumEverySeconds > 10 * 60) { $secondsSinceLastExecution += 5 * 60; } $shouldArchivePeriods = $secondsSinceLastExecution > $this->processPeriodsMaximumEverySeconds; if (empty($lastTimestampWebsiteProcessedPeriods)) { // 2) OR always if script never executed for this website before $shouldArchivePeriods = true; } // (*) If the website is archived because it is a new day in its timezone // We make sure all periods are archived, even if there is 0 visit today $dayHasEndedMustReprocess = in_array($idSite, $this->websiteDayHasFinishedSinceLastRun); if ($dayHasEndedMustReprocess) { $shouldArchivePeriods = true; } // (*) If there was some old reports invalidated for this website // we make sure all these old reports are triggered at least once $websiteInvalidatedShouldReprocess = $this->isOldReportInvalidatedForWebsite($idSite); if ($websiteInvalidatedShouldReprocess) { $shouldArchivePeriods = true; } $websiteIdIsForced = in_array($idSite, $this->shouldArchiveSpecifiedSites); if ($websiteIdIsForced) { $shouldArchivePeriods = true; } // Test if we should process this website at all $elapsedSinceLastArchiving = time() - $lastTimestampWebsiteProcessedDay; // Skip this day archive if last archive was older than TTL $existingArchiveIsValid = $elapsedSinceLastArchiving < $this->todayArchiveTimeToLive; $skipDayArchive = $existingArchiveIsValid; // Invalidate old website forces the archiving for this site $skipDayArchive = $skipDayArchive && !$websiteInvalidatedShouldReprocess; // Also reprocess when day has ended since last run if ($dayHasEndedMustReprocess && !$this->hasBeenProcessedSinceMidnight($idSite, $lastTimestampWebsiteProcessedDay) && !$existingArchiveIsValid) { $skipDayArchive = false; } if ($websiteIdIsForced) { $skipDayArchive = false; } if ($skipDayArchive) { $this->logger->info("Skipped website id {$idSite}, already done " . $this->formatter->getPrettyTimeFromSeconds($elapsedSinceLastArchiving, true) . " ago, " . $timerWebsite->__toString()); $this->skippedDayArchivesWebsites++; $this->skipped++; return false; } try { $shouldProceed = $this->processArchiveDays($idSite, $lastTimestampWebsiteProcessedDay, $shouldArchivePeriods, $timerWebsite); } catch (UnexpectedWebsiteFoundException $e) { // this website was deleted in the meantime $shouldProceed = false; $this->logger->info("Skipped website id {$idSite}, got: UnexpectedWebsiteFoundException, " . $timerWebsite->__toString()); } if (!$shouldProceed) { return false; } if (!$shouldArchivePeriods) { $this->logger->info("Skipped website id {$idSite} periods processing, already done " . $this->formatter->getPrettyTimeFromSeconds($elapsedSinceLastArchiving, true) . " ago, " . $timerWebsite->__toString()); $this->skippedDayArchivesWebsites++; $this->skipped++; return false; } $success = $this->processArchiveForPeriods($idSite, $lastTimestampWebsiteProcessedPeriods); // Record succesful run of this website's periods archiving if ($success) { Option::set($this->lastRunKey($idSite, "periods"), time()); } if (!$success) { // cancel marking the site as reprocessed if ($websiteInvalidatedShouldReprocess) { $store = new SitesToReprocessDistributedList(); $store->add($idSite); } } $this->archivedPeriodsArchivesWebsite++; $requestsWebsite = $this->requests - $requestsBefore; $this->logger->info("Archived website id = {$idSite}, " . $requestsWebsite . " API requests, " . $timerWebsite->__toString() . " [" . $this->websites->getNumProcessedWebsites() . "/" . $this->websites->getNumSites() . " done]"); return true; }
public function test_getNumProcessedWebsites_getNextSiteId() { $this->assertEquals(0, $this->fixedSiteIds->getNumProcessedWebsites()); $this->assertEquals(1, $this->fixedSiteIds->getNextSiteId()); $this->assertEquals(1, $this->fixedSiteIds->getNumProcessedWebsites()); $this->assertEquals(2, $this->fixedSiteIds->getNextSiteId()); $this->assertEquals(2, $this->fixedSiteIds->getNumProcessedWebsites()); $this->assertEquals(5, $this->fixedSiteIds->getNextSiteId()); $this->assertEquals(3, $this->fixedSiteIds->getNumProcessedWebsites()); $this->assertEquals(9, $this->fixedSiteIds->getNextSiteId()); $this->assertEquals(4, $this->fixedSiteIds->getNumProcessedWebsites()); $this->assertNull($this->fixedSiteIds->getNextSiteId()); $this->assertEquals(4, $this->fixedSiteIds->getNumProcessedWebsites()); }
private function archiveSingleSite($idSite, $requestsBefore) { $timerWebsite = new Timer(); $lastTimestampWebsiteProcessedPeriods = $lastTimestampWebsiteProcessedDay = false; if ($this->archiveAndRespectTTL) { Option::clearCachedOption($this->lastRunKey($idSite, "periods")); $lastTimestampWebsiteProcessedPeriods = Option::get($this->lastRunKey($idSite, "periods")); Option::clearCachedOption($this->lastRunKey($idSite, "day")); $lastTimestampWebsiteProcessedDay = Option::get($this->lastRunKey($idSite, "day")); } $this->updateIdSitesInvalidatedOldReports(); // For period other than days, we only re-process the reports at most // 1) every $processPeriodsMaximumEverySeconds $secondsSinceLastExecution = time() - $lastTimestampWebsiteProcessedPeriods; // if timeout is more than 10 min, we account for a 5 min processing time, and allow trigger 1 min earlier if ($this->processPeriodsMaximumEverySeconds > 10 * 60) { $secondsSinceLastExecution += 5 * 60; } $shouldArchivePeriods = $secondsSinceLastExecution > $this->processPeriodsMaximumEverySeconds; if (empty($lastTimestampWebsiteProcessedPeriods)) { // 2) OR always if script never executed for this website before $shouldArchivePeriods = true; } // (*) If the website is archived because it is a new day in its timezone // We make sure all periods are archived, even if there is 0 visit today $dayHasEndedMustReprocess = in_array($idSite, $this->websiteDayHasFinishedSinceLastRun); if ($dayHasEndedMustReprocess) { $shouldArchivePeriods = true; } // (*) If there was some old reports invalidated for this website // we make sure all these old reports are triggered at least once $websiteIsOldDataInvalidate = in_array($idSite, $this->idSitesInvalidatedOldReports); if ($websiteIsOldDataInvalidate) { $shouldArchivePeriods = true; } $websiteIdIsForced = in_array($idSite, $this->shouldArchiveSpecifiedSites); if ($websiteIdIsForced) { $shouldArchivePeriods = true; } // Test if we should process this website at all $elapsedSinceLastArchiving = time() - $lastTimestampWebsiteProcessedDay; // Skip this day archive if last archive was older than TTL $existingArchiveIsValid = $elapsedSinceLastArchiving < $this->todayArchiveTimeToLive; $skipDayArchive = $existingArchiveIsValid; // Invalidate old website forces the archiving for this site $skipDayArchive = $skipDayArchive && !$websiteIsOldDataInvalidate; // Also reprocess when day has ended since last run if ($dayHasEndedMustReprocess && !$this->hasBeenProcessedSinceMidnight($idSite, $lastTimestampWebsiteProcessedDay) && !$existingArchiveIsValid) { $skipDayArchive = false; } if ($websiteIdIsForced) { $skipDayArchive = false; } if ($skipDayArchive) { $this->log("Skipped website id {$idSite}, already done " . \Piwik\MetricsFormatter::getPrettyTimeFromSeconds($elapsedSinceLastArchiving, true, $isHtml = false) . " ago, " . $timerWebsite->__toString()); $this->skippedDayArchivesWebsites++; $this->skipped++; return false; } // Fake that the request is already done, so that other core:archive commands // running do not grab the same website from the queue Option::set($this->lastRunKey($idSite, "day"), time()); // Remove this website from the list of websites to be invalidated // since it's now just about to being re-processed, makes sure another running cron archiving process // does not archive the same idSite if ($websiteIsOldDataInvalidate) { $this->setSiteIsArchived($idSite); } // when some data was purged from this website // we make sure we query all previous days/weeks/months $processDaysSince = $lastTimestampWebsiteProcessedDay; if ($websiteIsOldDataInvalidate || $this->shouldArchiveAllSites) { $processDaysSince = false; } $timer = new Timer(); $dateLast = $this->getApiDateLastParameter($idSite, "day", $processDaysSince); $url = $this->getVisitsRequestUrl($idSite, "day", $dateLast); $content = $this->request($url); $response = @unserialize($content); $visitsToday = $this->getVisitsLastPeriodFromApiResponse($response); $visitsLastDays = $this->getVisitsFromApiResponse($response); if (empty($content) || !is_array($response) || count($response) == 0) { // cancel the succesful run flag Option::set($this->lastRunKey($idSite, "day"), 0); $this->log("WARNING: Empty or invalid response '{$content}' for website id {$idSite}, " . $timerWebsite->__toString() . ", skipping"); $this->skipped++; return false; } $this->requests++; $this->processed++; // If there is no visit today and we don't need to process this website, we can skip remaining archives if ($visitsToday == 0 && !$shouldArchivePeriods) { $this->log("Skipped website id {$idSite}, no visit today, " . $timerWebsite->__toString()); $this->skipped++; return false; } if ($visitsLastDays == 0 && !$shouldArchivePeriods && $this->shouldArchiveAllSites) { $this->log("Skipped website id {$idSite}, no visits in the last " . $dateLast . " days, " . $timerWebsite->__toString()); $this->skipped++; return false; } $this->visitsToday += $visitsToday; $this->websitesWithVisitsSinceLastRun++; $this->archiveVisitsAndSegments($idSite, "day", $lastTimestampWebsiteProcessedDay); $this->logArchivedWebsite($idSite, "day", $dateLast, $visitsLastDays, $visitsToday, $timer); if (!$shouldArchivePeriods) { $this->log("Skipped website id {$idSite} periods processing, already done " . \Piwik\MetricsFormatter::getPrettyTimeFromSeconds($elapsedSinceLastArchiving, true, $isHtml = false) . " ago, " . $timerWebsite->__toString()); $this->skippedDayArchivesWebsites++; $this->skipped++; return false; } $success = true; foreach (array('week', 'month', 'year') as $period) { $success = $this->archiveVisitsAndSegments($idSite, $period, $lastTimestampWebsiteProcessedPeriods) && $success; } // Record succesful run of this website's periods archiving if ($success) { Option::set($this->lastRunKey($idSite, "periods"), time()); } $this->archivedPeriodsArchivesWebsite++; $requestsWebsite = $this->requests - $requestsBefore; Log::info("Archived website id = {$idSite}, " . $requestsWebsite . " API requests, " . $timerWebsite->__toString() . " [" . $this->websites->getNumProcessedWebsites() . "/" . $this->websites->getNumSites() . " done]"); return true; }