/**
  * TODO fix for Solr Update
  */
 public function testJobSuccessWithInconsistency()
 {
     $service = Opus_Search_Service::selectIndexingService(null, 'solr');
     $service->removeAllDocumentsFromIndex();
     $this->createJob(Opus_Job_Worker_ConsistencyCheck::LABEL);
     $this->executeScript('cron-check-consistency.php');
     $allJobs = Opus_Job::getByLabels(array(Opus_Job_Worker_ConsistencyCheck::LABEL), null, Opus_Job::STATE_UNDEFINED);
     $this->assertTrue(empty($allJobs), 'Expected no more jobs in queue: found ' . count($allJobs) . ' jobs');
     $failedJobs = Opus_Job::getByLabels(array(Opus_Job_Worker_ConsistencyCheck::LABEL), null, Opus_Job::STATE_FAILED);
     $this->assertTrue(empty($failedJobs), 'Expected no failed jobs in queue: found ' . count($failedJobs) . ' jobs');
     $logPath = parent::$scriptPath . '/../../workspace/log/';
     $this->assertFileExists($logPath . 'opus_consistency-check.log', 'Logfile opus_consistency-check.log does not exist');
     $this->assertFileNotExists($logPath . 'opus_consistency-check.log.lock', 'Lockfile opus_consistency-check.log.lock was not removed');
     $publishedDocsCount = $this->getPublishedDocumentCount();
     $contents = file_get_contents($logPath . 'opus_consistency-check.log');
     $this->assertFalse(strpos($contents, 'checking ' . $publishedDocsCount . ' published documents for consistency.') === false, "Logfile opus_consistency-check.log does not contain 'checking ' . {$publishedDocsCount}\n            . ' ...' [{$contents}].");
     $this->assertFalse(strpos($contents, 'inconsistency found for document 1: document is in database, but is not in Solr index.') === false, 'Logfile opus_consistency-check.log does not contain "inconsistency found for document 1: ...".');
     $this->assertFalse(strpos($contents, 'inconsistency found for document 200: document is in database, but is not in Solr index.') === false, 'Logfile opus_consistency-check.log does not contain "inconsistency found for document 200: ...".');
     $this->assertFalse(strpos($contents, $publishedDocsCount . ' inconsistencies were detected: ' . $publishedDocsCount . ' of them were resolved.') === false, 'Logfile opus_consistency-check.log does not contain "' . $publishedDocsCount . ' inconsistencies ...".');
     $this->assertFalse(strpos($contents, 'number of updates: ' . $publishedDocsCount) === false, 'Logfile opus_consistency-check.log does not contain "number of updates: ' . $publishedDocsCount . '".');
     $this->assertFalse(strpos($contents, 'number of deletions: 0') === false, 'Logfile opus_consistency-check.log does not contain "number of deletions: 0".');
     $this->assertFalse(strpos($contents, 'Completed operation after') === false, 'Logfile opus_consistency-check.log does not contain "Completed operation after".');
     unlink($logPath . 'opus_consistency-check.log');
 }
/**
 *
 * Dieses Skript findet alle Dokumente mit ServerState=published, deren ServerDateModified im Solr-Index kleiner ist
 * als das Datum in der Datenbank. Ist ein Dokument nicht im Index vorhanden, wird eine entsprechende
 * Fehlermeldung pro Dokument ausgegeben.
 *
 * Siehe dazu auch das Ticket OPUSVIER-2853.
 *
 */
$numOfModified = 0;
$numOfErrors = 0;
$finder = new Opus_DocumentFinder();
$finder->setServerState('published');
foreach ($finder->ids() as $docId) {
    // check if document with id $docId is already persisted in search index
    $search = Opus_Search_Service::selectSearchingService();
    $query = Opus_Search_QueryFactory::selectDocumentById($search, $docId);
    if ($search->customSearch($query)->getAllMatchesCount() != 1) {
        echo "ERROR: document # {$docId} is not stored in search index\n";
        $numOfErrors++;
    } else {
        $result = $search->getResults();
        $solrModificationDate = $result[0]->getServerDateModified();
        $document = new Opus_Document($docId);
        $docModificationDate = $document->getServerDateModified()->getUnixTimestamp();
        if ($solrModificationDate != $docModificationDate) {
            $numOfModified++;
            echo "document # {$docId} is modified\n";
        }
    }
}
 private function extract($startId, $endId)
 {
     $this->forceSyncMode();
     $docIds = $this->getDocumentIds($startId, $endId);
     $extractor = Opus_Search_Service::selectIndexingService('indexBuilder');
     echo date('Y-m-d H:i:s') . " Start indexing of " . count($docIds) . " documents.\n";
     $numOfDocs = 0;
     $runtime = microtime(true);
     // measure time for each document
     foreach ($docIds as $docId) {
         $timeStart = microtime(true);
         $doc = new Opus_Document($docId);
         foreach ($doc->getFile() as $file) {
             try {
                 $extractor->extractDocumentFile($file, $doc);
             } catch (Opus_Search_Exception $e) {
                 echo date('Y-m-d H:i:s') . " ERROR: Failed extracting document {$docId}.\n";
                 echo date('Y-m-d H:i:s') . "        {$e->getMessage()}\n";
             } catch (Opus_Storage_Exception $e) {
                 echo date('Y-m-d H:i:s') . " ERROR: Failed extracting unavailable file on document {$docId}.\n";
                 echo date('Y-m-d H:i:s') . "        {$e->getMessage()}\n";
             }
         }
         $timeDelta = microtime(true) - $timeStart;
         if ($timeDelta > 30) {
             echo date('Y-m-d H:i:s') . " WARNING: Extracting document {$docId} took {$timeDelta} seconds.\n";
         }
         $numOfDocs++;
         if ($numOfDocs % 10 == 0) {
             $this->outputProgress($runtime, $numOfDocs);
         }
     }
     $runtime = microtime(true) - $runtime;
     echo PHP_EOL . date('Y-m-d H:i:s') . ' Finished extracting.' . PHP_EOL;
     // new search API doesn't track number of indexed files, but issues are kept written to log file
     //echo "\n\nErrors appeared in " . $indexer->getErrorFileCount() . " of " . $indexer->getTotalFileCount()
     //    . " files. Details were written to opus-console.log";
     echo PHP_EOL . PHP_EOL . 'Details were written to opus-console.log';
     $this->resetMode();
     return $runtime;
 }
 private function index($startId, $endId)
 {
     $this->forceSyncMode();
     $docIds = $this->getDocumentIds($startId, $endId);
     $indexer = Opus_Search_Service::selectIndexingService('indexBuilder');
     if (!$this->_deleteAllDocs) {
         $indexer->removeAllDocumentsFromIndex();
     }
     echo date('Y-m-d H:i:s') . " Start indexing of " . count($docIds) . " documents.\n";
     $numOfDocs = 0;
     $runtime = microtime(true);
     $docs = array();
     // measure time for each document
     foreach ($docIds as $docId) {
         $timeStart = microtime(true);
         $doc = new Opus_Document($docId);
         // dirty hack: disable implicit reindexing of documents in case of cache misses
         $doc->unregisterPlugin('Opus_Document_Plugin_Index');
         $docs[] = $doc;
         $timeDelta = microtime(true) - $timeStart;
         if ($timeDelta > 30) {
             echo date('Y-m-d H:i:s') . " WARNING: Indexing document {$docId} took {$timeDelta} seconds.\n";
         }
         $numOfDocs++;
         if ($numOfDocs % 10 == 0) {
             $this->addDocumentsToIndex($indexer, $docs);
             $docs = array();
             $this->outputProgress($runtime, $numOfDocs);
         }
     }
     // Index leftover documents
     if (count($docs) > 0) {
         $this->addDocumentsToIndex($indexer, $docs);
         $this->outputProgress($runtime, $numOfDocs);
     }
     $runtime = microtime(true) - $runtime;
     echo PHP_EOL . date('Y-m-d H:i:s') . ' Finished indexing.' . PHP_EOL;
     // new search API doesn't track number of indexed files, but issues are kept written to log file
     //echo "\n\nErrors appeared in " . $indexer->getErrorFileCount() . " of " . $indexer->getTotalFileCount()
     //    . " files. Details were written to opus-console.log";
     echo PHP_EOL . PHP_EOL . 'Details were written to opus-console.log';
     $this->resetMode();
     return $runtime;
 }
 public function resetSearch()
 {
     Opus_Search_Config::dropCached();
     Opus_Search_Service::dropCached();
 }
 /**
  * Regression test for OPUSVIER-1726
  */
 public function testSolrIndexIsNotUpToDate()
 {
     // add a document to the search index that is not stored in database
     $doc1 = $this->createTestDocument();
     $doc1->setServerState('published');
     $doc1->setLanguage('eng');
     $title = new Opus_Title();
     $title->setValue('test document for OPUSVIER-1726');
     $title->setLanguage('eng');
     $doc1->setTitleMain($title);
     // unregister index plugin: database changes are not reflected in search index
     $doc1->unregisterPlugin('Opus_Document_Plugin_Index');
     $doc1->store();
     $docId1 = $doc1->getId();
     $date = new Zend_Date($doc1->getServerDatePublished());
     $dateValue1 = $date->get(Zend_Date::RFC_2822);
     $indexer = Opus_Search_Service::selectIndexingService(null, 'solr');
     $indexer->addDocumentsToIndex($doc1);
     // delete document from database
     $doc1->deletePermanent();
     sleep(2);
     // make sure $doc2 do not get the same value for server_date_published
     $doc2 = $this->createTestDocument();
     $doc2->setServerState('published');
     $doc2->setLanguage('eng');
     $title = new Opus_Title();
     $title->setValue('another test document for OPUSVIER-1726');
     $title->setLanguage('eng');
     $doc2->setTitleMain($title);
     $doc2->store();
     $docId2 = $doc2->getId();
     $date = new Zend_Date($doc2->getServerDatePublished());
     $dateValue2 = $date->get(Zend_Date::RFC_2822);
     $this->dispatch('/rss/index/index/searchtype/all');
     // make search index up to date
     $indexer->removeDocumentsFromIndexById($docId1);
     $doc2->deletePermanent();
     $body = $this->getResponse()->getBody();
     $this->assertNotContains("No Opus_Db_Documents with id {$docId1} in database.", $body);
     $this->assertNotContains('<title>test document for OPUSVIER-1726</title>', $body);
     $this->assertContains('<title>another test document for OPUSVIER-1726</title>', $body);
     $this->assertNotContains("frontdoor/index/index/docId/{$docId1}</link>", $body);
     $this->assertContains("frontdoor/index/index/docId/{$docId2}</link>", $body);
     $this->assertNotContains("<pubDate>{$dateValue1}</pubDate>", $body);
     $this->assertNotContains("<lastBuildDate>{$dateValue1}</lastBuildDate>", $body);
     $this->assertContains("<pubDate>{$dateValue2}</pubDate>", $body);
     $this->assertContains("<lastBuildDate>{$dateValue2}</lastBuildDate>", $body);
     $this->assertEquals(200, $this->getResponse()->getHttpResponseCode());
 }