/** * TODO fix for Solr Update */ public function testJobSuccessWithInconsistency() { $service = Opus_Search_Service::selectIndexingService(null, 'solr'); $service->removeAllDocumentsFromIndex(); $this->createJob(Opus_Job_Worker_ConsistencyCheck::LABEL); $this->executeScript('cron-check-consistency.php'); $allJobs = Opus_Job::getByLabels(array(Opus_Job_Worker_ConsistencyCheck::LABEL), null, Opus_Job::STATE_UNDEFINED); $this->assertTrue(empty($allJobs), 'Expected no more jobs in queue: found ' . count($allJobs) . ' jobs'); $failedJobs = Opus_Job::getByLabels(array(Opus_Job_Worker_ConsistencyCheck::LABEL), null, Opus_Job::STATE_FAILED); $this->assertTrue(empty($failedJobs), 'Expected no failed jobs in queue: found ' . count($failedJobs) . ' jobs'); $logPath = parent::$scriptPath . '/../../workspace/log/'; $this->assertFileExists($logPath . 'opus_consistency-check.log', 'Logfile opus_consistency-check.log does not exist'); $this->assertFileNotExists($logPath . 'opus_consistency-check.log.lock', 'Lockfile opus_consistency-check.log.lock was not removed'); $publishedDocsCount = $this->getPublishedDocumentCount(); $contents = file_get_contents($logPath . 'opus_consistency-check.log'); $this->assertFalse(strpos($contents, 'checking ' . $publishedDocsCount . ' published documents for consistency.') === false, "Logfile opus_consistency-check.log does not contain 'checking ' . {$publishedDocsCount}\n . ' ...' [{$contents}]."); $this->assertFalse(strpos($contents, 'inconsistency found for document 1: document is in database, but is not in Solr index.') === false, 'Logfile opus_consistency-check.log does not contain "inconsistency found for document 1: ...".'); $this->assertFalse(strpos($contents, 'inconsistency found for document 200: document is in database, but is not in Solr index.') === false, 'Logfile opus_consistency-check.log does not contain "inconsistency found for document 200: ...".'); $this->assertFalse(strpos($contents, $publishedDocsCount . ' inconsistencies were detected: ' . $publishedDocsCount . ' of them were resolved.') === false, 'Logfile opus_consistency-check.log does not contain "' . $publishedDocsCount . ' inconsistencies ...".'); $this->assertFalse(strpos($contents, 'number of updates: ' . $publishedDocsCount) === false, 'Logfile opus_consistency-check.log does not contain "number of updates: ' . $publishedDocsCount . '".'); $this->assertFalse(strpos($contents, 'number of deletions: 0') === false, 'Logfile opus_consistency-check.log does not contain "number of deletions: 0".'); $this->assertFalse(strpos($contents, 'Completed operation after') === false, 'Logfile opus_consistency-check.log does not contain "Completed operation after".'); unlink($logPath . 'opus_consistency-check.log'); }
/** * * Dieses Skript findet alle Dokumente mit ServerState=published, deren ServerDateModified im Solr-Index kleiner ist * als das Datum in der Datenbank. Ist ein Dokument nicht im Index vorhanden, wird eine entsprechende * Fehlermeldung pro Dokument ausgegeben. * * Siehe dazu auch das Ticket OPUSVIER-2853. * */ $numOfModified = 0; $numOfErrors = 0; $finder = new Opus_DocumentFinder(); $finder->setServerState('published'); foreach ($finder->ids() as $docId) { // check if document with id $docId is already persisted in search index $search = Opus_Search_Service::selectSearchingService(); $query = Opus_Search_QueryFactory::selectDocumentById($search, $docId); if ($search->customSearch($query)->getAllMatchesCount() != 1) { echo "ERROR: document # {$docId} is not stored in search index\n"; $numOfErrors++; } else { $result = $search->getResults(); $solrModificationDate = $result[0]->getServerDateModified(); $document = new Opus_Document($docId); $docModificationDate = $document->getServerDateModified()->getUnixTimestamp(); if ($solrModificationDate != $docModificationDate) { $numOfModified++; echo "document # {$docId} is modified\n"; } } }
private function extract($startId, $endId) { $this->forceSyncMode(); $docIds = $this->getDocumentIds($startId, $endId); $extractor = Opus_Search_Service::selectIndexingService('indexBuilder'); echo date('Y-m-d H:i:s') . " Start indexing of " . count($docIds) . " documents.\n"; $numOfDocs = 0; $runtime = microtime(true); // measure time for each document foreach ($docIds as $docId) { $timeStart = microtime(true); $doc = new Opus_Document($docId); foreach ($doc->getFile() as $file) { try { $extractor->extractDocumentFile($file, $doc); } catch (Opus_Search_Exception $e) { echo date('Y-m-d H:i:s') . " ERROR: Failed extracting document {$docId}.\n"; echo date('Y-m-d H:i:s') . " {$e->getMessage()}\n"; } catch (Opus_Storage_Exception $e) { echo date('Y-m-d H:i:s') . " ERROR: Failed extracting unavailable file on document {$docId}.\n"; echo date('Y-m-d H:i:s') . " {$e->getMessage()}\n"; } } $timeDelta = microtime(true) - $timeStart; if ($timeDelta > 30) { echo date('Y-m-d H:i:s') . " WARNING: Extracting document {$docId} took {$timeDelta} seconds.\n"; } $numOfDocs++; if ($numOfDocs % 10 == 0) { $this->outputProgress($runtime, $numOfDocs); } } $runtime = microtime(true) - $runtime; echo PHP_EOL . date('Y-m-d H:i:s') . ' Finished extracting.' . PHP_EOL; // new search API doesn't track number of indexed files, but issues are kept written to log file //echo "\n\nErrors appeared in " . $indexer->getErrorFileCount() . " of " . $indexer->getTotalFileCount() // . " files. Details were written to opus-console.log"; echo PHP_EOL . PHP_EOL . 'Details were written to opus-console.log'; $this->resetMode(); return $runtime; }
private function index($startId, $endId) { $this->forceSyncMode(); $docIds = $this->getDocumentIds($startId, $endId); $indexer = Opus_Search_Service::selectIndexingService('indexBuilder'); if (!$this->_deleteAllDocs) { $indexer->removeAllDocumentsFromIndex(); } echo date('Y-m-d H:i:s') . " Start indexing of " . count($docIds) . " documents.\n"; $numOfDocs = 0; $runtime = microtime(true); $docs = array(); // measure time for each document foreach ($docIds as $docId) { $timeStart = microtime(true); $doc = new Opus_Document($docId); // dirty hack: disable implicit reindexing of documents in case of cache misses $doc->unregisterPlugin('Opus_Document_Plugin_Index'); $docs[] = $doc; $timeDelta = microtime(true) - $timeStart; if ($timeDelta > 30) { echo date('Y-m-d H:i:s') . " WARNING: Indexing document {$docId} took {$timeDelta} seconds.\n"; } $numOfDocs++; if ($numOfDocs % 10 == 0) { $this->addDocumentsToIndex($indexer, $docs); $docs = array(); $this->outputProgress($runtime, $numOfDocs); } } // Index leftover documents if (count($docs) > 0) { $this->addDocumentsToIndex($indexer, $docs); $this->outputProgress($runtime, $numOfDocs); } $runtime = microtime(true) - $runtime; echo PHP_EOL . date('Y-m-d H:i:s') . ' Finished indexing.' . PHP_EOL; // new search API doesn't track number of indexed files, but issues are kept written to log file //echo "\n\nErrors appeared in " . $indexer->getErrorFileCount() . " of " . $indexer->getTotalFileCount() // . " files. Details were written to opus-console.log"; echo PHP_EOL . PHP_EOL . 'Details were written to opus-console.log'; $this->resetMode(); return $runtime; }
public function resetSearch() { Opus_Search_Config::dropCached(); Opus_Search_Service::dropCached(); }
/** * Regression test for OPUSVIER-1726 */ public function testSolrIndexIsNotUpToDate() { // add a document to the search index that is not stored in database $doc1 = $this->createTestDocument(); $doc1->setServerState('published'); $doc1->setLanguage('eng'); $title = new Opus_Title(); $title->setValue('test document for OPUSVIER-1726'); $title->setLanguage('eng'); $doc1->setTitleMain($title); // unregister index plugin: database changes are not reflected in search index $doc1->unregisterPlugin('Opus_Document_Plugin_Index'); $doc1->store(); $docId1 = $doc1->getId(); $date = new Zend_Date($doc1->getServerDatePublished()); $dateValue1 = $date->get(Zend_Date::RFC_2822); $indexer = Opus_Search_Service::selectIndexingService(null, 'solr'); $indexer->addDocumentsToIndex($doc1); // delete document from database $doc1->deletePermanent(); sleep(2); // make sure $doc2 do not get the same value for server_date_published $doc2 = $this->createTestDocument(); $doc2->setServerState('published'); $doc2->setLanguage('eng'); $title = new Opus_Title(); $title->setValue('another test document for OPUSVIER-1726'); $title->setLanguage('eng'); $doc2->setTitleMain($title); $doc2->store(); $docId2 = $doc2->getId(); $date = new Zend_Date($doc2->getServerDatePublished()); $dateValue2 = $date->get(Zend_Date::RFC_2822); $this->dispatch('/rss/index/index/searchtype/all'); // make search index up to date $indexer->removeDocumentsFromIndexById($docId1); $doc2->deletePermanent(); $body = $this->getResponse()->getBody(); $this->assertNotContains("No Opus_Db_Documents with id {$docId1} in database.", $body); $this->assertNotContains('<title>test document for OPUSVIER-1726</title>', $body); $this->assertContains('<title>another test document for OPUSVIER-1726</title>', $body); $this->assertNotContains("frontdoor/index/index/docId/{$docId1}</link>", $body); $this->assertContains("frontdoor/index/index/docId/{$docId2}</link>", $body); $this->assertNotContains("<pubDate>{$dateValue1}</pubDate>", $body); $this->assertNotContains("<lastBuildDate>{$dateValue1}</lastBuildDate>", $body); $this->assertContains("<pubDate>{$dateValue2}</pubDate>", $body); $this->assertContains("<lastBuildDate>{$dateValue2}</lastBuildDate>", $body); $this->assertEquals(200, $this->getResponse()->getHttpResponseCode()); }