/** * @param string $indexType type of index to which to send $data * @param array(\Elastica\Script or \Elastica\Document) $data documents to send * @param null|string $shardTimeout How long should elaticsearch wait for an offline * shard. Defaults to null, meaning don't wait. Null is more efficient when sending * multiple pages because Cirrus will use Elasticsearch's bulk API. Timeout is in * Elasticsearch's time format. * @return Status */ public function sendData($indexType, $data, $shardTimeout) { $documentCount = count($data); if ($documentCount === 0) { return Status::newGood(); } if (!$this->areIndexesAvailableForWrites(array($indexType))) { return Status::newFatal('cirrussearch-indexes-frozen'); } $exception = null; try { $pageType = $this->connection->getPageType(wfWikiId(), $indexType); $this->start("sending {numBulk} documents to the {indexType} index", array('numBulk' => $documentCount, 'indexType' => $indexType)); $bulk = new \Elastica\Bulk($this->connection->getClient()); if ($shardTimeout) { $bulk->setShardTimeout($shardTimeout); } $bulk->setType($pageType); $bulk->addData($data, 'update'); $bulk->send(); } catch (ResponseException $e) { $cirrusLog = $this->log; $missing = $this->bulkResponseExceptionIsJustDocumentMissing($e, function ($id) use($cirrusLog, $e) { $cirrusLog->info("Updating a page that doesn't yet exist in Elasticsearch: {id}", array('id' => $id)); }); if (!$missing) { $exception = $e; } } catch (\Elastica\Exception\ExceptionInterface $e) { $exception = $e; } if ($exception === null) { $this->success(); return Status::newGood(); } else { $this->failure($exception); $documentIds = array_map(function ($d) { return $d->getId(); }, $data); $this->failedLog->warning('Update for doc ids: ' . implode(',', $documentIds), array('exception' => $exception)); return Status::newFatal('cirrussearch-failed-send-data'); } }