/** * @return bool `true` if the purge was executed, `false` if it was skipped. * @throws \Exception */ public function purgeOutdatedArchives() { if ($this->willPurgingCausePotentialProblemInUI()) { $this->logger->info("Purging temporary archives: skipped (browser triggered archiving not enabled & not running after core:archive)"); return false; } $archiveTables = ArchiveTableCreator::getTablesArchivesInstalled(); $this->logger->info("Purging archives in {tableCount} archive tables.", array('tableCount' => count($archiveTables))); // keep track of dates we purge for, since getTablesArchivesInstalled() will return numeric & blob // tables (so dates will appear two times, and we should only purge once per date) $datesPurged = array(); foreach ($archiveTables as $table) { $date = ArchiveTableCreator::getDateFromTableName($table); list($year, $month) = explode('_', $date); // Somehow we may have archive tables created with older dates, prevent exception from being thrown if ($year > 1990) { if (empty($datesPurged[$date])) { $dateObj = Date::factory("{$year}-{$month}-15"); $this->archivePurger->purgeOutdatedArchives($dateObj); $this->archivePurger->purgeArchivesWithPeriodRange($dateObj); $datesPurged[$date] = true; } else { $this->logger->debug("Date {date} already purged.", array('date' => $date)); } } else { $this->logger->info("Skipping purging of archive tables *_{year}_{month}, year <= 1990.", array('year' => $year, 'month' => $month)); } } return true; }
public static function purgeInvalidatedArchives() { $archiveTables = ArchiveTableCreator::getTablesArchivesInstalled(); foreach ($archiveTables as $archiveTable) { /** * Select the archives that have already been invalidated and have been since re-processed. * It purges records for each distinct { archive name (includes segment hash) , idsite, date, period } tuple. */ $query = ' SELECT t1.idarchive FROM `' . $archiveTable . '` t1 INNER JOIN `' . $archiveTable . '` t2 ON t1.name = t2.name AND t1.idsite=t2.idsite AND t1.date1=t2.date1 AND t1.date2=t2.date2 AND t1.period=t2.period WHERE t1.value = ' . ArchiveWriter::DONE_INVALIDATED . ' AND t2.value IN(' . ArchiveWriter::DONE_OK . ', ' . ArchiveWriter::DONE_OK_TEMPORARY . ') AND t1.ts_archived < t2.ts_archived AND t1.name LIKE \'done%\''; $result = Db::fetchAll($query); if (count($result) > 0) { $archiveIds = array_map(function ($elm) { return $elm['idarchive']; }, $result); $date = ArchiveTableCreator::getDateFromTableName($archiveTable); $date = Date::factory(str_replace('_', '-', $date) . '-01'); self::deleteArchiveIds($date, $archiveIds); } } }
public function purgeOutdatedArchives() { $archiveTables = ArchiveTableCreator::getTablesArchivesInstalled(); foreach ($archiveTables as $table) { $date = ArchiveTableCreator::getDateFromTableName($table); list($year, $month) = explode('_', $date); // Somehow we may have archive tables created with older dates, prevent exception from being thrown if ($year > 1990) { ArchivePurger::purgeOutdatedArchives(Date::factory("{$year}-{$month}-15")); } } }
public static function purgeInvalidatedArchives() { $archiveTables = ArchiveTableCreator::getTablesArchivesInstalled(); foreach ($archiveTables as $archiveTable) { /** * Select the archives that have already been invalidated and have been since re-processed. * It purges records for each distinct { archive name (includes segment hash) , idsite, date, period } tuple. */ $result = self::getModel()->purgeInvalidatedArchiveTable($archiveTable); if (count($result) > 0) { $archiveIds = array_map(function ($elm) { return $elm['idarchive']; }, $result); $date = ArchiveTableCreator::getDateFromTableName($archiveTable); $date = Date::factory(str_replace('_', '-', $date) . '-01'); self::deleteArchiveIds($date, $archiveIds); } } }
/** * When tracking data in the past (using Tracking API), this function * can be used to invalidate reports for the idSites and dates where new data * was added. * DEV: If you call this API, the UI should display the data correctly, but will process * in real time, which could be very slow after large data imports. * After calling this function via REST, you can manually force all data * to be reprocessed by visiting the script as the Super User: * http://example.net/piwik/misc/cron/archive.php?token_auth=$SUPER_USER_TOKEN_AUTH_HERE * REQUIREMENTS: On large piwik setups, you will need in PHP configuration: max_execution_time = 0 * We recommend to use an hourly schedule of the script. * More information: http://piwik.org/setup-auto-archiving/ * * @param string $idSites Comma separated list of idSite that have had data imported for the specified dates * @param string $dates Comma separated list of dates to invalidate for all these websites * @throws Exception * @return array */ public function invalidateArchivedReports($idSites, $dates) { $idSites = Site::getIdSitesFromIdSitesString($idSites); if (empty($idSites)) { throw new Exception("Specify a value for &idSites= as a comma separated list of website IDs, for which your token_auth has 'admin' permission"); } Piwik::checkUserHasAdminAccess($idSites); // Ensure the specified dates are valid $toInvalidate = $invalidDates = array(); $dates = explode(',', trim($dates)); $dates = array_unique($dates); foreach ($dates as $theDate) { $theDate = trim($theDate); try { $date = Date::factory($theDate); } catch (Exception $e) { $invalidDates[] = $theDate; continue; } if ($date->toString() == $theDate) { $toInvalidate[] = $date; } else { $invalidDates[] = $theDate; } } // If using the feature "Delete logs older than N days"... $purgeDataSettings = PrivacyManager::getPurgeDataSettings(); $logsAreDeletedBeforeThisDate = $purgeDataSettings['delete_logs_schedule_lowest_interval']; $logsDeleteEnabled = $purgeDataSettings['delete_logs_enable']; $minimumDateWithLogs = false; if ($logsDeleteEnabled && $logsAreDeletedBeforeThisDate) { $minimumDateWithLogs = Date::factory('today')->subDay($logsAreDeletedBeforeThisDate); } // Given the list of dates, process which tables they should be deleted from $minDate = false; $warningDates = $processedDates = array(); /* @var $date Date */ foreach ($toInvalidate as $date) { // we should only delete reports for dates that are more recent than N days if ($minimumDateWithLogs && $date->isEarlier($minimumDateWithLogs)) { $warningDates[] = $date->toString(); } else { $processedDates[] = $date->toString(); } $month = $date->toString('Y_m'); // For a given date, we must invalidate in the monthly archive table $datesByMonth[$month][] = $date->toString(); // But also the year stored in January $year = $date->toString('Y_01'); $datesByMonth[$year][] = $date->toString(); // but also weeks overlapping several months stored in the month where the week is starting /* @var $week Week */ $week = Period\Factory::build('week', $date); $weekAsString = $week->getDateStart()->toString('Y_m'); $datesByMonth[$weekAsString][] = $date->toString(); // Keep track of the minimum date for each website if ($minDate === false || $date->isEarlier($minDate)) { $minDate = $date; } } if (empty($minDate)) { throw new Exception("Check the 'dates' parameter is a valid date."); } // In each table, invalidate day/week/month/year containing this date $archiveTables = ArchiveTableCreator::getTablesArchivesInstalled(); foreach ($archiveTables as $table) { // Extract Y_m from table name $suffix = ArchiveTableCreator::getDateFromTableName($table); if (!isset($datesByMonth[$suffix])) { continue; } // Dates which are to be deleted from this table $datesToDeleteInTable = $datesByMonth[$suffix]; // Build one statement to delete all dates from the given table $sql = $bind = array(); $datesToDeleteInTable = array_unique($datesToDeleteInTable); foreach ($datesToDeleteInTable as $dateToDelete) { $sql[] = '(date1 <= ? AND ? <= date2)'; $bind[] = $dateToDelete; $bind[] = $dateToDelete; } $sql = implode(" OR ", $sql); $query = "DELETE FROM {$table} " . " WHERE ( {$sql} ) " . " AND idsite IN (" . implode(",", $idSites) . ")"; Db::query($query, $bind); } \Piwik\Plugins\SitesManager\API::getInstance()->updateSiteCreatedTime($idSites, $minDate); // Force to re-process data for these websites in the next cron core:archive command run $invalidatedIdSites = self::getWebsiteIdsToInvalidate(); $invalidatedIdSites = array_merge($invalidatedIdSites, $idSites); $invalidatedIdSites = array_unique($invalidatedIdSites); $invalidatedIdSites = array_values($invalidatedIdSites); Option::set(self::OPTION_INVALIDATED_IDSITES, serialize($invalidatedIdSites)); Site::clearCache(); $output = array(); // output logs if ($warningDates) { $output[] = 'Warning: the following Dates have not been invalidated, because they are earlier than your Log Deletion limit: ' . implode(", ", $warningDates) . "\n The last day with logs is " . $minimumDateWithLogs . ". " . "\n Please disable 'Delete old Logs' or set it to a higher deletion threshold (eg. 180 days or 365 years).'."; } $output[] = "Success. The following dates were invalidated successfully: " . implode(", ", $processedDates); return $output; }
private function getInvalidatedArchives() { $result = array(); foreach (ArchiveTableCreator::getTablesArchivesInstalled(ArchiveTableCreator::NUMERIC_TABLE) as $table) { $date = ArchiveTableCreator::getDateFromTableName($table); $sql = "SELECT CONCAT(idsite, '.', date1, '.', date2, '.', period, '.', name) FROM {$table} WHERE name LIKE 'done%' AND value = ?"; $archiveSpecs = Db::fetchAll($sql, array(ArchiveWriter::DONE_INVALIDATED)); $archiveSpecs = array_map('reset', $archiveSpecs); $result[$date] = $archiveSpecs; } return $result; }
/** * @param $idSites * @param $period string * @param $datesByMonth array * @throws \Exception */ private function markArchivesInvalidatedFor($idSites, $period, $datesByMonth) { $invalidateForPeriodId = $this->getPeriodId($period); // In each table, invalidate day/week/month/year containing this date $archiveTables = ArchiveTableCreator::getTablesArchivesInstalled(); $archiveNumericTables = array_filter($archiveTables, function ($name) { return ArchiveTableCreator::getTypeFromTableName($name) == ArchiveTableCreator::NUMERIC_TABLE; }); foreach ($archiveNumericTables as $table) { // Extract Y_m from table name $suffix = ArchiveTableCreator::getDateFromTableName($table); if (!isset($datesByMonth[$suffix])) { continue; } // Dates which are to be deleted from this table $datesToDelete = $datesByMonth[$suffix]; self::getModel()->updateArchiveAsInvalidated($table, $idSites, $invalidateForPeriodId, $datesToDelete); } }
private function getAllArchiveTableMonths() { $tableMonths = array(); foreach (ArchiveTableCreator::getTablesArchivesInstalled() as $table) { $tableMonths[] = ArchiveTableCreator::getDateFromTableName($table); } return $tableMonths; }
/** * If we're going to keep segmented reports, we need to know which archives are * for segments. This info is only in the numeric tables, so we must query them. */ private function findSegmentArchives($numericTables) { if (!is_null($this->segmentArchiveIds)) { return; } foreach ($numericTables as $table) { $tableDate = ArchiveTableCreator::getDateFromTableName($table); $maxIdArchive = Db::fetchOne("SELECT MAX(idarchive) FROM {$table}"); $sql = "SELECT idarchive\n\t\t\t\t\t FROM {$table}\n\t\t\t\t\t WHERE name != 'done'\n\t\t\t\t\t AND name LIKE 'done_%.%'\n\t\t\t\t\t AND idarchive >= ?\n\t\t\t\t\t AND idarchive < ?"; $this->segmentArchiveIds[$tableDate] = array(); foreach (Db::segmentedFetchAll($sql, 0, $maxIdArchive, self::$selectSegmentSize) as $row) { $this->segmentArchiveIds[$tableDate][] = $row['idarchive']; } } }
/** * @param int[] $idSites * @param string[][][] $dates * @throws \Exception */ private function markArchivesInvalidated($idSites, $dates, Segment $segment = null) { $archiveNumericTables = ArchiveTableCreator::getTablesArchivesInstalled($type = ArchiveTableCreator::NUMERIC_TABLE); foreach ($archiveNumericTables as $table) { $tableDate = ArchiveTableCreator::getDateFromTableName($table); if (empty($dates[$tableDate])) { continue; } $this->model->updateArchiveAsInvalidated($table, $idSites, $dates[$tableDate], $segment); } }
/** * If we're going to keep segmented reports, we need to know which archives are * for segments. This info is only in the numeric tables, so we must query them. */ private function findSegmentArchives($numericTables) { if (!is_null($this->segmentArchiveIds) || empty($numericTables)) { return; } $Generic = Factory::getGeneric(); foreach ($numericTables as $table) { $tableDate = ArchiveTableCreator::getDateFromTableName($table); $maxIdArchive = $Generic->getMax($table, 'idarchive'); $sql = "SELECT idarchive\n FROM {$table}\n WHERE name != 'done'\n AND name LIKE 'done_%.%'\n AND idarchive >= ?\n AND idarchive < ?"; if (is_null($this->segmentArchiveIds)) { $this->segmentArchiveIds = array(); } $this->segmentArchiveIds[$tableDate] = array(); foreach ($Generic->segmentedFetchAll($sql, 0, $maxIdArchive, self::$selectSegmentSize) as $row) { $this->segmentArchiveIds[$tableDate][] = $row['idarchive']; } } }
/** * @param InputInterface $input * @return Date[] */ private function getDatesToPurgeFor(InputInterface $input) { $dates = array(); $dateSpecifier = $input->getArgument('dates'); if (count($dateSpecifier) === 1 && reset($dateSpecifier) == self::ALL_DATES_STRING) { foreach (ArchiveTableCreator::getTablesArchivesInstalled() as $table) { $tableDate = ArchiveTableCreator::getDateFromTableName($table); list($year, $month) = explode('_', $tableDate); $dates[] = Date::factory($year . '-' . $month . '-' . '01'); } } else { $includeYearArchives = $input->getOption('include-year-archives'); foreach ($dateSpecifier as $date) { $dateObj = Date::factory($date); $yearMonth = $dateObj->toString('Y-m'); $dates[$yearMonth] = $dateObj; // if --include-year-archives is supplied, add a date for the january table for this date's year // so year archives will be purged if ($includeYearArchives) { $janYearMonth = $dateObj->toString('Y') . '-01'; if (empty($dates[$janYearMonth])) { $dates[$janYearMonth] = Date::factory($janYearMonth . '-01'); } } } $dates = array_values($dates); } return $dates; }