/** * Called when a page is imported. Force a full index of the page. Use the MassIndex * job since there's likely to be a bunch and we'll prioritize them well but use * INDEX_EVERYTHING since we won't get a chance at a second pass. * * @param Title $title The page title we've just imported * @return bool */ public static function onAfterImportPage($title) { // The title can be null if the import failed. Nothing to do in that case. if ($title === null) { return; } JobQueueGroup::singleton()->push(Job\MassIndex::build(array(WikiPage::factory($title)), Updater::INDEX_EVERYTHING)); return true; }
public function execute() { global $wgPoolCounterConf, $wgCirrusSearchMaintenanceTimeout; $wiki = sprintf("[%20s]", wfWikiId()); // Set the timeout for maintenance actions $this->getConnection()->setTimeout($wgCirrusSearchMaintenanceTimeout); // Make sure we've actually got indicies to populate if (!$this->simpleCheckIndexes()) { $this->error("{$wiki} index(es) do not exist. Did you forget to run updateSearchIndexConfig?", 1); } // Make sure we don't flood the pool counter unset($wgPoolCounterConf['CirrusSearch-Search']); if (!is_null($this->getOption('from')) || !is_null($this->getOption('to'))) { // 0 is falsy so MWTimestamp makes that `now`. '00' is epoch 0. $this->fromDate = new MWTimestamp($this->getOption('from', '00')); $this->toDate = new MWTimestamp($this->getOption('to', false)); } $this->toId = $this->getOption('toId'); $this->indexUpdates = !$this->getOption('deletes', false); $this->limit = $this->getOption('limit'); $buildChunks = $this->getOption('buildChunks'); if ($buildChunks !== null) { $this->buildChunks($buildChunks); return; } $this->queue = $this->getOption('queue'); $this->maxJobs = $this->getOption('maxJobs') ? intval($this->getOption('maxJobs')) : null; $this->pauseForJobs = $this->getOption('pauseForJobs') ? intval($this->getOption('pauseForJobs')) : $this->maxJobs; $updateFlags = 0; if ($this->getOption('indexOnSkip')) { $updateFlags |= Updater::INDEX_ON_SKIP; } if ($this->getOption('skipParse')) { $updateFlags |= Updater::SKIP_PARSE; if (!$this->getOption('batch-size')) { $this->setBatchSize(50); } } if ($this->getOption('skipLinks')) { $updateFlags |= Updater::SKIP_LINKS; } $this->namespace = $this->hasOption('namespace') ? intval($this->getOption('namespace')) : null; if ($this->indexUpdates) { if ($this->queue) { $operationName = 'Queued'; } else { $operationName = 'Indexed'; } } else { $operationName = 'Deleted'; } $operationStartTime = microtime(true); $lastJobQueueCheckTime = 0; $completed = 0; $rate = 0; $minUpdate = $this->fromDate; if ($this->indexUpdates) { $minId = $this->getOption('fromId', -1); } else { $minNamespace = -100000000; $minTitle = ''; } while (is_null($this->limit) || $this->limit > $completed) { if ($this->indexUpdates) { $updates = $this->findUpdates($minUpdate, $minId, $this->toDate); $size = count($updates); // Note that we'll strip invalid updates after checking to the loop break condition // because we don't want a batch the contains only invalid updates to cause early // termination of the process.... } else { $deletes = $this->findDeletes($minUpdate, $minNamespace, $minTitle, $this->toDate); $size = count($deletes); } if ($size == 0) { break; } if ($this->indexUpdates) { $last = $updates[$size - 1]; // We make sure to set this if we need it but don't bother when we don't because // it requires loading the revision. if (isset($last['update'])) { $minUpdate = $last['update']; } $minId = $last['id']; // Strip updates down to just pages $pages = array(); foreach ($updates as $update) { if (isset($update['page'])) { $pages[] = $update['page']; } } if ($this->queue) { $now = microtime(true); if ($now - $lastJobQueueCheckTime > self::SECONDS_BETWEEN_JOB_QUEUE_LENGTH_CHECKS) { $lastJobQueueCheckTime = $now; $queueSize = $this->getUpdatesInQueue(); if ($this->maxJobs !== null && $this->maxJobs < $queueSize) { do { $this->output("{$wiki} Waiting while job queue shrinks: {$this->pauseForJobs} > {$queueSize}\n"); usleep(self::SECONDS_BETWEEN_JOB_QUEUE_LENGTH_CHECKS * 1000000); $queueSize = $this->getUpdatesInQueue(); } while ($this->pauseForJobs < $queueSize); } } JobQueueGroup::singleton()->push(Job\MassIndex::build($pages, $updateFlags, $this->getOption('cluster'))); } else { // Update size with the actual number of updated documents. $updater = $this->createUpdater(); $size = $updater->updatePages($pages, null, null, $updateFlags); } } else { $titlesToDelete = array(); $idsToDelete = array(); foreach ($deletes as $delete) { $titlesToDelete[] = $delete['title']; $idsToDelete[] = $delete['page']; $lastDelete = $delete; } $minUpdate = $lastDelete['timestamp']; $minNamespace = $lastDelete['title']->getNamespace(); $minTitle = $lastDelete['title']->getText(); $updater = $this->createUpdater(); $updater->deletePages($titlesToDelete, $idsToDelete); } $completed += $size; $rate = round($completed / (microtime(true) - $operationStartTime)); if (is_null($this->toDate)) { $endingAt = $minId; } else { $endingAt = $minUpdate->getTimestamp(TS_ISO_8601); } $this->output("{$wiki} {$operationName} {$size} pages ending at {$endingAt} at {$rate}/second\n"); } $this->output("{$operationName} a total of {$completed} pages at {$rate}/second\n"); $lastQueueSizeForOurJob = PHP_INT_MAX; $waitStartTime = microtime(true); if ($this->queue) { $this->output("Waiting for jobs to drain from the queue\n"); while (true) { $queueSizeForOurJob = $this->getUpdatesInQueue(); if ($queueSizeForOurJob === 0) { break; } // We subtract 5 because we some jobs may be added by deletes if ($queueSizeForOurJob > $lastQueueSizeForOurJob) { $this->output("Queue size went up. Another script is likely adding jobs " . "and it'll wait for them to empty.\n"); break; } if (microtime(true) - $waitStartTime > 120) { // Wait at least two full minutes before we check if the job count went down. // Less then that and we might be seeing lag from redis's counts. $lastQueueSizeForOurJob = $queueSizeForOurJob; } $this->output("{$wiki} {$queueSizeForOurJob} jobs left on the queue.\n"); usleep(self::SECONDS_BETWEEN_JOB_QUEUE_LENGTH_CHECKS * 1000000); } } }