/**
  * Disallow writes to the specified indexes.
  *
  * @param string[]|null $indexes List of index types to disallow writes to.
  *  null means to prevent indexing in all indexes across all wikis.
  */
 public function freezeIndexes(array $indexes = null)
 {
     global $wgCirrusSearchUpdateConflictRetryCount;
     if ($indexes === null) {
         $names = array(self::ALL_INDEXES_FROZEN_NAME);
     } elseif (count($indexes) === 0) {
         return;
     } else {
         $names = $this->indexesToIndexNames($indexes);
     }
     $this->log->info("Freezing writes to: " . implode(',', $names));
     $documents = array();
     foreach ($names as $indexName) {
         $doc = new \Elastica\Document($indexName, array('name' => $indexName));
         $doc->setDocAsUpsert(true);
         $doc->setRetryOnConflict($wgCirrusSearchUpdateConflictRetryCount);
         $documents[] = $doc;
     }
     $client = $this->connection->getClient();
     $type = $this->connection->getFrozenIndexNameType();
     // Elasticsearch has a queue capacity of 50 so if $data
     // contains 50 documents it could bump up against the max.  So
     // we chunk it and do them sequentially.
     foreach (array_chunk($documents, 30) as $data) {
         $bulk = new \Elastica\Bulk($client);
         $bulk->setType($type);
         $bulk->addData($data, 'update');
         $bulk->send();
     }
     // Ensure our freeze is immediatly seen (mostly for testing
     // purposes)
     $type->getIndex()->refresh();
 }
Example #2
0
 /**
  * Convert a log message into an Elastica Document
  *
  * @param  array    $record Log message
  * @return \Elastica\Document
  */
 protected function getDocument($record)
 {
     $document = new Elastica\Document();
     $document->setData($record);
     $document->setType($this->type);
     $document->setIndex($this->index);
     return $document;
 }
Example #3
0
 protected function _getDocument(array $data)
 {
     $doc = new Elastica\Document(null, array('level' => (int) $data['level'], 'id' => (int) $data['id'], 'ids' => array('1' => $data['1Id'], '2' => $data['2Id'], '3' => $data['3Id'], '4' => $data['4Id']), 'name' => $data['name'], 'nameFull' => $data['nameFull']));
     if (isset($data['lat']) && isset($data['lon'])) {
         $doc->addGeoPoint('coordinates', (double) $data['lat'], (double) $data['lon']);
     }
     return $doc;
 }
Example #4
0
 private static function indexItemInElasticsearch($libraryID, $key, $version, $timestamp, $content, $stats = array())
 {
     $type = self::getWriteType();
     $id = $libraryID . "/" . $key;
     $doc = ['id' => $id, 'libraryID' => $libraryID, 'content' => (string) $content, 'version' => $version, 'timestamp' => str_replace(" ", "T", $timestamp)];
     if ($stats) {
         foreach (self::$metadata as $prop) {
             if (isset($stats[$prop])) {
                 $doc[$prop] = (int) $stats[$prop];
             }
         }
     }
     $start = microtime(true);
     $doc = new \Elastica\Document($id, $doc, self::$elasticsearchType);
     $doc->setVersion($version);
     $doc->setVersionType('external');
     try {
         $response = $type->addDocument($doc);
     } catch (Exception $e) {
         $msg = $e->getMessage();
         if (preg_match('/version conflict, current \\[([0-9]+)\\], provided \\[([0-9]+)\\]/', $msg, $matches)) {
             if ($matches[1] == $matches[2]) {
                 error_log("WARNING: " . $msg);
                 return;
             }
         }
         throw $e;
     }
     StatsD::timing("elasticsearch.client.item_fulltext.add", (microtime(true) - $start) * 1000);
     if ($response->hasError()) {
         $msg = $response->getError();
         if (preg_match('/version conflict, current \\[([0-9]+)\\], provided \\[([0-9]+)\\]/', $msg, $matches)) {
             if ($matches[1] == $matches[2]) {
                 error_log("WARNING: " . $msg);
                 return;
             }
         }
         throw new Exception($response->getError());
     }
 }
 /**
  * @param \WikiPage[] $pages
  * @param int $flags
  */
 private function buildDocumentsForPages($pages, $flags)
 {
     global $wgCirrusSearchUpdateConflictRetryCount;
     $indexOnSkip = $flags & self::INDEX_ON_SKIP;
     $skipParse = $flags & self::SKIP_PARSE;
     $skipLinks = $flags & self::SKIP_LINKS;
     $forceParse = $flags & self::FORCE_PARSE;
     $fullDocument = !($skipParse || $skipLinks);
     $documents = array();
     foreach ($pages as $page) {
         $title = $page->getTitle();
         if (!$page->exists()) {
             LoggerFactory::getInstance('CirrusSearch')->warning('Attempted to build a document for a page that doesn\'t exist.  This should be caught ' . "earlier but wasn't.  Page: {title}", array('title' => $title));
             continue;
         }
         $doc = new \Elastica\Document($page->getId(), array('version' => $page->getLatest(), 'version_type' => 'external', 'namespace' => $title->getNamespace(), 'namespace_text' => Util::getNamespaceText($title), 'title' => $title->getText(), 'timestamp' => wfTimestamp(TS_ISO_8601, $page->getTimestamp())));
         // Everything as sent as an update to prevent overwriting fields maintained in other processes like
         // OtherIndex::updateOtherIndex.
         // But we need a way to index documents that don't already exist.  We're willing to upsert any full
         // documents or any documents that we've been explicitly told it is ok to index when they aren't full.
         // This is typically just done during the first phase of the initial index build.
         // A quick note about docAsUpsert's merging behavior:  It overwrites all fields provided by doc unless they
         // are objects in both doc and the indexed source.  We're ok with this because all of our fields are either
         // regular types or lists of objects and lists are overwritten.
         $doc->setDocAsUpsert($fullDocument || $indexOnSkip);
         $doc->setRetryOnConflict($wgCirrusSearchUpdateConflictRetryCount);
         if (!$skipParse) {
             // Get text to index, based on content and parser output
             list($content, $parserOutput) = $this->getContentAndParserOutput($page, $forceParse);
             // Build our page data
             $pageBuilder = new PageDataBuilder($doc, $title, $content, $parserOutput);
             $doc = $pageBuilder->build();
             // And build the page text itself
             $textBuilder = new PageTextBuilder($doc, $content, $parserOutput);
             $doc = $textBuilder->build();
             // If we're a file, build its metadata too
             if ($title->getNamespace() === NS_FILE) {
                 $fileBuilder = new FileDataBuilder($doc, $title);
                 $doc = $fileBuilder->build();
             }
             // Then let hooks have a go
             MWHooks::run('CirrusSearchBuildDocumentParse', array($doc, $title, $content, $parserOutput, $this->connection));
         }
         if (!$skipLinks) {
             MWHooks::run('CirrusSearchBuildDocumentLinks', array($doc, $title, $this->connection));
         }
         $documents[] = $doc;
     }
     MWHooks::run('CirrusSearchBuildDocumentFinishBatch', array($pages));
     return $documents;
 }
Example #6
0
 /**
  * Returns Document
  * 
  * @return \Elastica\Document
  */
 public function getDocument()
 {
     $doc = new \Elastica\Document();
     $doc->setData($this->getSource());
     $hit = $this->getHit();
     if ($this->hasParam('_source')) {
         unset($hit['_source']);
     }
     if ($this->hasParam('_explanation')) {
         unset($hit['_explanation']);
     }
     if ($this->hasParam('highlight')) {
         unset($hit['highlight']);
     }
     if ($this->hasParam('_score')) {
         unset($hit['_score']);
     }
     $doc->setParams($hit);
     return $doc;
 }
 /**
  * Index a Resource.
  *
  * @param array $fields
  * @return boolean
  */
 public function index(array $fields = array())
 {
     if (isset($fields['searchable']) && empty($fields['searchable'])) {
         return false;
     }
     if (isset($fields['published']) && empty($fields['published'])) {
         return false;
     }
     if (isset($fields['deleted']) && !empty($fields['deleted'])) {
         return false;
     }
     $type = $this->index->getType($fields['context_key']);
     $document = new \Elastica\Document();
     $dateFields = array('createdon', 'editedon', 'deletedon', 'publishedon');
     foreach ($fields as $fieldName => $value) {
         if (is_string($fieldName) && !is_array($value) && !is_object($value)) {
             if (in_array($fieldName, $dateFields)) {
                 $value = '' . strftime('%Y-%m-%dT%H:%M:%SZ', strtotime($value));
                 $fields[$fieldName] = $value;
             }
             if ($fieldName == 'id') {
                 $document->setId($value);
             }
             $document->set($fieldName, $value);
         }
     }
     $this->modx->log(modX::LOG_LEVEL_DEBUG, '[SimpleSearch] Indexing Resource: ' . print_r($fields, true));
     $response = $type->addDocument($document);
     $type->getIndex()->refresh();
     return $response->isOk();
 }
Example #8
0
 private function autocompleteDocument($document)
 {
     try {
         # autocomplete_document
         $elasticaClient = new \Elastica\Client();
         $elasticaIndex = $elasticaClient->getIndex('website');
         $elasticaType = $elasticaIndex->getType('autocomplete_document');
         $id = $document->getDocumentID();
         $document = new \Elastica\Document($id + 100000, array('name' => $id . ' - ' . $this->strip($document->getName())));
         $elasticaType->addDocument($document);
         $elasticaType->getIndex()->refresh();
     } catch (\Exception $e) {
     }
 }