/** * Disallow writes to the specified indexes. * * @param string[]|null $indexes List of index types to disallow writes to. * null means to prevent indexing in all indexes across all wikis. */ public function freezeIndexes(array $indexes = null) { global $wgCirrusSearchUpdateConflictRetryCount; if ($indexes === null) { $names = array(self::ALL_INDEXES_FROZEN_NAME); } elseif (count($indexes) === 0) { return; } else { $names = $this->indexesToIndexNames($indexes); } $this->log->info("Freezing writes to: " . implode(',', $names)); $documents = array(); foreach ($names as $indexName) { $doc = new \Elastica\Document($indexName, array('name' => $indexName)); $doc->setDocAsUpsert(true); $doc->setRetryOnConflict($wgCirrusSearchUpdateConflictRetryCount); $documents[] = $doc; } $client = $this->connection->getClient(); $type = $this->connection->getFrozenIndexNameType(); // Elasticsearch has a queue capacity of 50 so if $data // contains 50 documents it could bump up against the max. So // we chunk it and do them sequentially. foreach (array_chunk($documents, 30) as $data) { $bulk = new \Elastica\Bulk($client); $bulk->setType($type); $bulk->addData($data, 'update'); $bulk->send(); } // Ensure our freeze is immediatly seen (mostly for testing // purposes) $type->getIndex()->refresh(); }
/** * Convert a log message into an Elastica Document * * @param array $record Log message * @return \Elastica\Document */ protected function getDocument($record) { $document = new Elastica\Document(); $document->setData($record); $document->setType($this->type); $document->setIndex($this->index); return $document; }
protected function _getDocument(array $data) { $doc = new Elastica\Document(null, array('level' => (int) $data['level'], 'id' => (int) $data['id'], 'ids' => array('1' => $data['1Id'], '2' => $data['2Id'], '3' => $data['3Id'], '4' => $data['4Id']), 'name' => $data['name'], 'nameFull' => $data['nameFull'])); if (isset($data['lat']) && isset($data['lon'])) { $doc->addGeoPoint('coordinates', (double) $data['lat'], (double) $data['lon']); } return $doc; }
private static function indexItemInElasticsearch($libraryID, $key, $version, $timestamp, $content, $stats = array()) { $type = self::getWriteType(); $id = $libraryID . "/" . $key; $doc = ['id' => $id, 'libraryID' => $libraryID, 'content' => (string) $content, 'version' => $version, 'timestamp' => str_replace(" ", "T", $timestamp)]; if ($stats) { foreach (self::$metadata as $prop) { if (isset($stats[$prop])) { $doc[$prop] = (int) $stats[$prop]; } } } $start = microtime(true); $doc = new \Elastica\Document($id, $doc, self::$elasticsearchType); $doc->setVersion($version); $doc->setVersionType('external'); try { $response = $type->addDocument($doc); } catch (Exception $e) { $msg = $e->getMessage(); if (preg_match('/version conflict, current \\[([0-9]+)\\], provided \\[([0-9]+)\\]/', $msg, $matches)) { if ($matches[1] == $matches[2]) { error_log("WARNING: " . $msg); return; } } throw $e; } StatsD::timing("elasticsearch.client.item_fulltext.add", (microtime(true) - $start) * 1000); if ($response->hasError()) { $msg = $response->getError(); if (preg_match('/version conflict, current \\[([0-9]+)\\], provided \\[([0-9]+)\\]/', $msg, $matches)) { if ($matches[1] == $matches[2]) { error_log("WARNING: " . $msg); return; } } throw new Exception($response->getError()); } }
/** * @param \WikiPage[] $pages * @param int $flags */ private function buildDocumentsForPages($pages, $flags) { global $wgCirrusSearchUpdateConflictRetryCount; $indexOnSkip = $flags & self::INDEX_ON_SKIP; $skipParse = $flags & self::SKIP_PARSE; $skipLinks = $flags & self::SKIP_LINKS; $forceParse = $flags & self::FORCE_PARSE; $fullDocument = !($skipParse || $skipLinks); $documents = array(); foreach ($pages as $page) { $title = $page->getTitle(); if (!$page->exists()) { LoggerFactory::getInstance('CirrusSearch')->warning('Attempted to build a document for a page that doesn\'t exist. This should be caught ' . "earlier but wasn't. Page: {title}", array('title' => $title)); continue; } $doc = new \Elastica\Document($page->getId(), array('version' => $page->getLatest(), 'version_type' => 'external', 'namespace' => $title->getNamespace(), 'namespace_text' => Util::getNamespaceText($title), 'title' => $title->getText(), 'timestamp' => wfTimestamp(TS_ISO_8601, $page->getTimestamp()))); // Everything as sent as an update to prevent overwriting fields maintained in other processes like // OtherIndex::updateOtherIndex. // But we need a way to index documents that don't already exist. We're willing to upsert any full // documents or any documents that we've been explicitly told it is ok to index when they aren't full. // This is typically just done during the first phase of the initial index build. // A quick note about docAsUpsert's merging behavior: It overwrites all fields provided by doc unless they // are objects in both doc and the indexed source. We're ok with this because all of our fields are either // regular types or lists of objects and lists are overwritten. $doc->setDocAsUpsert($fullDocument || $indexOnSkip); $doc->setRetryOnConflict($wgCirrusSearchUpdateConflictRetryCount); if (!$skipParse) { // Get text to index, based on content and parser output list($content, $parserOutput) = $this->getContentAndParserOutput($page, $forceParse); // Build our page data $pageBuilder = new PageDataBuilder($doc, $title, $content, $parserOutput); $doc = $pageBuilder->build(); // And build the page text itself $textBuilder = new PageTextBuilder($doc, $content, $parserOutput); $doc = $textBuilder->build(); // If we're a file, build its metadata too if ($title->getNamespace() === NS_FILE) { $fileBuilder = new FileDataBuilder($doc, $title); $doc = $fileBuilder->build(); } // Then let hooks have a go MWHooks::run('CirrusSearchBuildDocumentParse', array($doc, $title, $content, $parserOutput, $this->connection)); } if (!$skipLinks) { MWHooks::run('CirrusSearchBuildDocumentLinks', array($doc, $title, $this->connection)); } $documents[] = $doc; } MWHooks::run('CirrusSearchBuildDocumentFinishBatch', array($pages)); return $documents; }
/** * Returns Document * * @return \Elastica\Document */ public function getDocument() { $doc = new \Elastica\Document(); $doc->setData($this->getSource()); $hit = $this->getHit(); if ($this->hasParam('_source')) { unset($hit['_source']); } if ($this->hasParam('_explanation')) { unset($hit['_explanation']); } if ($this->hasParam('highlight')) { unset($hit['highlight']); } if ($this->hasParam('_score')) { unset($hit['_score']); } $doc->setParams($hit); return $doc; }
/** * Index a Resource. * * @param array $fields * @return boolean */ public function index(array $fields = array()) { if (isset($fields['searchable']) && empty($fields['searchable'])) { return false; } if (isset($fields['published']) && empty($fields['published'])) { return false; } if (isset($fields['deleted']) && !empty($fields['deleted'])) { return false; } $type = $this->index->getType($fields['context_key']); $document = new \Elastica\Document(); $dateFields = array('createdon', 'editedon', 'deletedon', 'publishedon'); foreach ($fields as $fieldName => $value) { if (is_string($fieldName) && !is_array($value) && !is_object($value)) { if (in_array($fieldName, $dateFields)) { $value = '' . strftime('%Y-%m-%dT%H:%M:%SZ', strtotime($value)); $fields[$fieldName] = $value; } if ($fieldName == 'id') { $document->setId($value); } $document->set($fieldName, $value); } } $this->modx->log(modX::LOG_LEVEL_DEBUG, '[SimpleSearch] Indexing Resource: ' . print_r($fields, true)); $response = $type->addDocument($document); $type->getIndex()->refresh(); return $response->isOk(); }
private function autocompleteDocument($document) { try { # autocomplete_document $elasticaClient = new \Elastica\Client(); $elasticaIndex = $elasticaClient->getIndex('website'); $elasticaType = $elasticaIndex->getType('autocomplete_document'); $id = $document->getDocumentID(); $document = new \Elastica\Document($id + 100000, array('name' => $id . ' - ' . $this->strip($document->getName()))); $elasticaType->addDocument($document); $elasticaType->getIndex()->refresh(); } catch (\Exception $e) { } }