/** * Delete pages from the elasticsearch index. $titles and $ids must point to the * same pages and should point to them in the same order. * * @param $titles array(Title) of titles to delete. If empty then skipped other index * maintenance is skipped. * @param $ids array(integer) of ids to delete * @param $clientSideTimeout null|int timeout in seconds to update pages or null to not * change the configured timeout which defaults to 300 seconds. * @param string $indexType index from which to delete * @return bool True if nothing happened or we successfully deleted, false on failure */ public function deletePages($titles, $ids, $clientSideTimeout = null, $indexType = null) { Job\OtherIndex::queueIfRequired($titles, $this->writeToClusterName); $job = new Job\ElasticaWrite($titles ? reset($titles) : Title::makeTitle(0, ""), array('clientSideTimeout' => $clientSideTimeout, 'method' => 'sendDeletes', 'arguments' => array($ids, $indexType), 'cluster' => $this->writeToClusterName)); // This job type will insert itself into the job queue // with a delay if writes to ES are currently paused $job->run(); }
/** * Update the indexes for other wiki that also store information about $titles. * @param Title[] $titles array of titles in other indexes to update */ public function updateOtherIndex($titles) { global $wgCirrusSearchWikimediaExtraPlugin; if (!isset($wgCirrusSearchWikimediaExtraPlugin['super_detect_noop'])) { $this->logFailure($titles, 'super_detect_noop plugin not enabled'); return; } $updates = array(); // Build multisearch to find ids to update $findIdsMultiSearch = new \Elastica\Multi\Search($this->connection->getClient()); $findIdsClosures = array(); foreach ($titles as $title) { foreach (OtherIndexes::getExternalIndexes($title) as $otherIndex) { if ($otherIndex === null) { continue; } $type = $this->connection->getPageType($otherIndex); $bool = new \Elastica\Filter\Bool(); // Note that we need to use the keyword indexing of title so the analyzer gets out of the way. $bool->addMust(new \Elastica\Filter\Term(array('title.keyword' => $title->getText()))); $bool->addMust(new \Elastica\Filter\Term(array('namespace' => $title->getNamespace()))); $filtered = new \Elastica\Query\Filtered(new \Elastica\Query\MatchAll(), $bool); $query = new \Elastica\Query($filtered); $query->setFields(array()); // We only need the _id so don't load the _source $query->setSize(1); $findIdsMultiSearch->addSearch($type->createSearch($query)); $findIdsClosures[] = function ($id) use($otherIndex, &$updates, $title) { $updates[$otherIndex][] = array('id' => $id, 'ns' => $title->getNamespace(), 'dbKey' => $title->getDBkey()); }; } } $findIdsClosuresCount = count($findIdsClosures); if ($findIdsClosuresCount === 0) { // No other indexes to check. return; } // Look up the ids and run all closures to build the list of updates $this->start("searching for {numIds} ids in other indexes", array('numIds' => $findIdsClosuresCount)); $findIdsMultiSearchResult = $findIdsMultiSearch->search(); try { $this->success(); for ($i = 0; $i < $findIdsClosuresCount; $i++) { $results = $findIdsMultiSearchResult[$i]->getResults(); if (count($results) === 0) { continue; } $result = $results[0]; call_user_func($findIdsClosures[$i], $result->getId()); } } catch (\Elastica\Exception\ExceptionInterface $e) { $this->failure($e); return; } if (!$updates) { return; } // These are split into a job per index so one index // being frozen doesn't block updates to other indexes // in the same update. foreach ($updates as $indexName => $actions) { $job = new Job\ElasticaWrite(reset($titles), array('clientSideTimeout' => false, 'method' => 'sendOtherIndexUpdates', 'arguments' => array($this->localSite, $indexName, $actions), 'cluster' => $this->writeToClusterName)); $job->run(); } }