/** * Starts an Opus console. */ public function run() { global $argv, $argc; if (true === in_array('--help', $argv) || true === in_array('-h', $argv)) { $this->printHelpMessage($argv); exit; } $this->evaluateArguments($argc, $argv); $this->forceSyncMode(); $docIds = Opus_Document::getAllPublishedIds($this->start, $this->end); $indexer = new Opus_SolrSearch_Index_Indexer($this->deleteAllDocs); //$indexer = new Opus_SolrSearch_Index_Indexer(); echo date('Y-m-d H:i:s') . " Start indexing of " . count($docIds) . " documents.\n"; $numOfDocs = 0; $runtime = microtime(true); foreach ($docIds as $docId) { $time_start = microtime(true); $doc = new Opus_Document($docId); // dirty hack: disable implicit reindexing of documents in case of cache misses $doc->unregisterPlugin('Opus_Document_Plugin_Index'); $indexer->addDocumentToEntryIndex($doc); $time_delta = microtime(true) - $time_start; if ($time_delta > 30) { echo date('Y-m-d H:i:s') . " WARNING: Indexing document {$docId} took {$time_delta} seconds.\n"; } $numOfDocs++; if ($numOfDocs % 10 == 0) { $mem_now = round(memory_get_usage() / 1024 / 1024); $mem_peak = round(memory_get_peak_usage() / 1024 / 1024); $delta_t = microtime(true) - $runtime; $doc_per_second = round($delta_t) == 0 ? 'inf' : round($numOfDocs / $delta_t, 2); $seconds_per_doc = round($delta_t / $numOfDocs, 2); echo date('Y-m-d H:i:s') . " Stats after {$numOfDocs} documents -- memory {$mem_now} MB, peak memory {$mem_peak} (MB), {$doc_per_second} docs/second, {$seconds_per_doc} seconds/doc\n"; } } $runtime = microtime(true) - $runtime; echo "\n" . date('Y-m-d H:i:s') . " Finished indexing.\n"; $indexer->commit(); echo "\n\nErrors appeared in " . $indexer->getErrorFileCount() . " of " . $indexer->getTotalFileCount() . " files." . " Details were written to opus-console.log"; $this->resetMode(); return $runtime; }
/** * Helper method to add document to index. * * @param Opus_Document $document * @return void */ private function addDocumentToIndex(Opus_Document $document) { $log = Zend_Registry::get('Zend_Log'); // create job if asynchronous is set if (isset($this->config->runjobs->asynchronous) && $this->config->runjobs->asynchronous) { $log->debug(__METHOD__ . ': ' . 'Adding index job for document ' . $document->getId() . '.'); $job = new Opus_Job(); $job->setLabel(Opus_Job_Worker_IndexOpusDocument::LABEL); $job->setData(array('documentId' => $document->getId(), 'task' => 'index')); // skip creating job if equal job already exists if (true === $job->isUniqueInQueue()) { $job->store(); } else { $log->debug(__METHOD__ . ': ' . 'Indexing job for document ' . $document->getId() . ' already exists!'); } } else { $log->debug(__METHOD__ . ': ' . 'Index document ' . $document->getId() . '.'); try { $indexer = new Opus_SolrSearch_Index_Indexer(); $indexer->addDocumentToEntryIndex($document); $indexer->commit(); } catch (Opus_SolrSearch_Index_Exception $e) { $log->debug(__METHOD__ . ': ' . 'Indexing document ' . $document->getId() . ' failed: ' . $e->getMessage()); } catch (InvalidArgumentException $e) { $log->warn(__METHOD__ . ': ' . $e->getMessage()); } } }
/** * Remove the model instance from the database. * This only means: set state to deleted * * @return void */ public function delete() { // De-fatalize Search Index errors. try { // Remove from index $indexer = new Opus_SolrSearch_Index_Indexer(); $indexer->removeDocumentFromEntryIndex($this); } catch (Exception $e) { $this->logger("removeDocumentFromIndex failed: " . $e->getMessage()); } $this->setServerState('deleted'); $this->store(); }