public function listAction()
 {
     $this->indexAction();
     $year = trim($this->_getParam('year'));
     if (preg_match('/^\\d{4}$/', $year) > 0) {
         $f = new Opus_DocumentFinder();
         $f->setServerState('published');
         $f->setServerDatePublishedRange($year, $year + 1);
         $this->view->ids = $f->ids();
     }
     return $this->render('index');
 }
 public function listAction()
 {
     $this->indexAction();
     $year = trim($this->_getParam('year'));
     if (preg_match('/^\\d{4}$/', $year) < 1) {
         return $this->render('index');
     }
     $stats = array();
     $emailToDocIds = array();
     $f = new Opus_DocumentFinder();
     $f->setServerState('published');
     //        $f->setServerDatePublishedRange($year, $year+1);
     foreach ($f->ids() as $docId) {
         $document = new Opus_Document($docId);
         $docStats = array('id' => $docId, 'authorEmails' => $this->_getAuthorEmails($document), 'errors' => 0, 'missingProject' => '', 'missingMsc' => '', 'missingReferee' => '');
         if ($this->_countCollectionRole($document, 'projects') === 0) {
             $docStats['errors']++;
             $docStats['missingProject'] = true;
         }
         if ($this->_countSubjectMsc($document) === 0 and $this->_countCollectionRole($document, 'msc') === 0) {
             $docStats['errors']++;
             $docStats['missingMsc'] = true;
         }
         //            if (count($document->getPersonReferee()) === 0) {
         //                $docStats['errors']++;
         //                $docStats['missingReferee'] = true;
         //            }
         if ($docStats['errors'] > 0) {
             foreach ($docStats['authorEmails'] as $author) {
                 $email = trim($author['email']);
                 $name = trim($author['name']);
                 $key = $name;
                 if (!empty($email)) {
                     $key .= " <{$email}>";
                 }
                 if (!array_key_exists($key, $emailToDocIds)) {
                     $emailToDocIds[$key] = array('email' => $email, 'name' => $name, 'ids' => array());
                 }
                 $emailToDocIds[$key]['ids'][] = $docId;
                 $emailToDocIds[$key]['ids'] = array_unique($emailToDocIds[$key]['ids']);
                 sort($emailToDocIds[$key]['ids']);
             }
         }
         $stats[] = $docStats;
     }
     $this->view->stats = $stats;
     $this->view->email2docid = $emailToDocIds;
     return $this->render('index');
 }
 /**
  * make sure documents related to Collection[Role|]s in subtree are updated 
  * (XML-Cache and server_date_published)
  *
  * @param Opus_Collection Starting point for recursive update to documents
  */
 protected function updateDocuments($model)
 {
     if (is_null($model) || is_null($model->getId())) {
         // TODO explain why this is right
         return;
     }
     $collections = Opus_Db_TableGateway::getInstance('Opus_Db_Collections');
     $collectionIdSelect = $collections->selectSubtreeById($model->getId(), 'id');
     $documentFinder = new Opus_DocumentFinder();
     $documentFinder->setCollectionId($collectionIdSelect);
     // clear affected documents from cache
     $xmlCache = new Opus_Model_Xml_Cache();
     $xmlCache->removeAllEntriesWhereSubSelect($documentFinder->getSelectIds());
     // update ServerDateModified for affected documents
     $date = new Opus_Date();
     $date->setNow();
     Opus_Document::setServerDateModifiedByIds($date, $documentFinder->ids());
 }
Exemple #4
0
 /**
  * Check for each database document in serverState publish if it exists in
  * Solr index. Furthermore, compare field value of serverDateModified in
  * database and Solr index.
  * 
  */
 private function checkDatabase()
 {
     $finder = new Opus_DocumentFinder();
     $finder->setServerState('published');
     $ids = $finder->ids();
     $this->logger->info('checking ' . $finder->count() . ' published documents for consistency.');
     foreach ($ids as $id) {
         try {
             $doc = new Opus_Document($id);
         } catch (Opus_Model_NotFoundException $e) {
             // ignore: document was deleted from database in meantime
             continue;
         }
         $serverDataModified = $doc->getServerDateModified()->getUnixTimestamp();
         // retrieve document from index and compare serverDateModified fields
         $query = new Opus_SolrSearch_Query(Opus_SolrSearch_Query::DOC_ID);
         $query->setField('id', $id);
         $resultList = $this->searcher->search($query, $this->validateDocIds);
         $results = $resultList->getResults();
         if ($resultList->getNumberOfHits() == 0) {
             $this->logger->info("inconsistency found for document {$id}: document is in database, but is not in Solr index.");
             $this->numOfInconsistencies++;
             if ($this->forceReindexing($doc)) {
                 $this->numOfUpdates++;
             }
         } else {
             if ($resultList->getNumberOfHits() == 1) {
                 if ($results[0]->getServerDateModified() != $serverDataModified) {
                     $this->numOfInconsistencies++;
                     $this->logger->info("inconsistency found for document {$id}: mismatch between values of server_date_modified in database and Solr index.");
                     if ($this->forceReindexing($doc)) {
                         $this->numOfUpdates++;
                     }
                 }
             } else {
                 $this->logger->err('unexpected state: document with id ' . $id . ' exists multiple times in index.');
             }
         }
     }
 }
Exemple #5
0
 public function __construct($results = array(), $numberOfHits = 0, $queryTime = 0, $facets = array(), $validateDocIds = true, $log = null)
 {
     $this->log = $log;
     $this->numberOfHits = $numberOfHits;
     $this->queryTime = $queryTime;
     $this->facets = $facets;
     $this->results = array();
     // make sure that documents returned from index exist in database
     if (!empty($results)) {
         if ($validateDocIds) {
             $docIds = array();
             foreach ($results as $result) {
                 array_push($docIds, $result->getId());
             }
             $finder = new Opus_DocumentFinder();
             $finder->setServerState('published');
             $finder->setIdSubset($docIds);
             $docIdsDB = $finder->ids();
             $notInDB = 0;
             foreach ($results as $result) {
                 if (in_array($result->getId(), $docIdsDB)) {
                     array_push($this->results, $result);
                 } else {
                     $notInDB++;
                 }
             }
             $resultsSize = count($this->results);
             if ($notInDB > 0 && !is_null($this->log)) {
                 $inDB = $resultsSize - $notInDB;
                 $this->log->err("found inconsistency between database and solr index: index returns {$resultsSize} documents, but only " . $inDB . " found in database");
             }
         } else {
             $this->results = $results;
         }
     }
 }
 * details. You should have received a copy of the GNU General Public License 
 * along with OPUS; if not, write to the Free Software Foundation, Inc., 51 
 * Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
 *
 * @category    Application
 * @author      Edouard Simon <*****@*****.**>
 * @copyright   Copyright (c) 2011-2013, OPUS 4 development team
 * @license     http://www.gnu.org/licenses/gpl.html General Public License
 * @version     $Id: cron-update-document-cache.php 11732 2013-06-24 12:26:11Z esimon $
 */
define('APPLICATION_ENV', 'development');
// Bootstrapping
require_once dirname(__FILE__) . '/../common/bootstrap.php';
$opusDocCacheTable = new Opus_Db_DocumentXmlCache();
$db = Zend_Registry::get('db_adapter');
//
$select = $db->select();
$select->from($opusDocCacheTable->info('name'), 'document_id');
$docFinder = new Opus_DocumentFinder();
$docFinder->setSubSelectNotExists($select);
$docIds = $docFinder->ids();
echo "processing " . count($docIds) . " documents\n";
foreach ($docIds as $docId) {
    $model = new Opus_Document($docId);
    $cache = new Opus_Model_Xml_Cache();
    // xml version 1
    $omx = new Opus_Model_Xml();
    $omx->setStrategy(new Opus_Model_Xml_Version1())->excludeEmptyFields()->setModel($model)->setXmlCache($cache);
    $dom = $omx->getDomDocument();
    echo "Cache refreshed for document#{$docId}\n";
}
 * @copyright   Copyright (c) 2008-2012, OPUS 4 development team
 * @license     http://www.gnu.org/licenses/gpl.html General Public License
 * @version     $Id: find_missing_published_docs_in_searchindex.php 12011 2013-07-05 19:39:58Z sszott $
 */
/**
 *
 * Dieses Skript gibt alle IDs der Dokumente zurück, die im Server State
 * published sind, aber aufgrund eines Fehlers nicht im Index repräsentiert sind.
 *
 * Siehe dazu auch die Story OPUSVIER-2368.
 *
 */
$numOfErrors = 0;
$finder = new Opus_DocumentFinder();
$finder->setServerState('published');
foreach ($finder->ids() as $docId) {
    // check if document with id $docId is already persisted in search index
    $query = new Opus_SolrSearch_Query(Opus_SolrSearch_Query::DOC_ID);
    $query->setField('id', $docId);
    $query->setReturnIdsOnly(true);
    $query->setRows(Opus_SolrSearch_Query::MAX_ROWS);
    $searcher = new Opus_SolrSearch_Searcher();
    if ($searcher->search($query)->getNumberOfHits() != 1) {
        echo "document # {$docId} is not stored in search index\n";
        $numOfErrors++;
    }
}
if ($numOfErrors > 0) {
    echo "{$numOfErrors} missing documents were found\n";
} else {
    echo "no errors were found\n";
 /**
  * Retrieve a document id by an oai identifier.
  *
  * @param string $oaiIdentifier
  * @result int
  */
 private function getDocumentIdByIdentifier($oaiIdentifier)
 {
     $identifierParts = explode(":", $oaiIdentifier);
     $docId = null;
     switch ($identifierParts[0]) {
         case 'urn':
             $finder = new Opus_DocumentFinder();
             $finder->setIdentifierTypeValue('urn', $oaiIdentifier);
             $finder->setServerStateInList($this->_deliveringDocumentStates);
             $docIds = $finder->ids();
             $docId = $docIds[0];
             break;
         case 'oai':
             if (isset($identifierParts[2])) {
                 $docId = $identifierParts[2];
             }
             break;
         default:
             throw new Oai_Model_Exception('The prefix of the identifier argument is unknown.', Oai_Model_Error::BADARGUMENT);
             break;
     }
     if (empty($docId) or !preg_match('/^\\d+$/', $docId)) {
         throw new Oai_Model_Exception('The value of the identifier argument is unknown or illegal in this repository.', Oai_Model_Error::IDDOESNOTEXIST);
     }
     return $docId;
 }
 /**
  * Regression test for OPUSVIER-849
  */
 public function testStartPageContainsTotalNumOfDocs()
 {
     // get total number of documents from all doc search
     $this->dispatch('/solrsearch/index/search/searchtype/all');
     $document = new DOMDocument();
     $document->loadHTML($this->getResponse()->getBody());
     $element = $document->getElementById('search-result-numofhits');
     $numOfHits = $element->firstChild->textContent;
     $docsInIndex = $this->getDocsInSearchIndex();
     $numOfIndexDocs = $docsInIndex->getNumberOfHits();
     $this->assertEquals($numOfIndexDocs, $numOfHits);
     $this->getResponse()->clearBody();
     $this->dispatch('/home');
     $document = new DOMDocument();
     $document->loadHTML($this->getResponse()->getBody());
     $element = $document->getElementById('solrsearch-totalnumofdocs');
     $numOfDocs = $element->firstChild->textContent;
     $docFinder = new Opus_DocumentFinder();
     $docFinder->setServerState('published');
     $numOfDbDocs = $docFinder->count();
     $this->assertEquals($numOfDbDocs, $numOfDocs);
     // kurze Erklärung des Vorhabens: die Dokumentanzahl bei der Catch-All-Suche
     // wird auf Basis einer Indexsuche ermittelt; die Anzahl der Dokument, die
     // auf der Startseite erscheint, wird dagegen über den DocumentFinder
     // ermittelt: im Idealfall sollten diese beiden Zahlen nicht voneinander
     // abweichen
     // wenn sie abweichen, dann aufgrund einer Inkonsistenz zwischen Datenbank
     // und Suchindex (das sollte im Rahmen der Tests eigentlich nicht auftreten)
     if ($numOfDbDocs != $numOfIndexDocs) {
         // ermittle die Doc-IDs, die im Index, aber nicht in der DB existieren
         // bzw. die in der DB, aber nicht im Index existieren
         $idsIndex = array();
         $results = $docsInIndex->getResults();
         foreach ($results as $result) {
             array_push($idsIndex, $result->getId());
         }
         $idsDb = $docFinder->ids();
         $idsIndexOnly = array_diff($idsIndex, $idsDb);
         $this->assertEquals(0, count($idsIndexOnly), 'Document IDs in search index, but not in database: ' . var_export($idsIndexOnly, true));
         $idsDbOnly = array_diff($idsDb, $idsIndex);
         $this->assertEquals(0, count($idsDbOnly), 'Document IDs in database, but not in search index: ' . var_export($idsDbOnly, true));
         $this->assertEquals($numOfDbDocs, $numOfIndexDocs, "number of docs in database ({$numOfDbDocs}) and search index ({$numOfIndexDocs}) differ from each other");
     }
     $this->assertEquals($numOfDocs, $numOfHits);
 }
 * terms of the GNU General Public License as published by the Free Software
 * Foundation; either version 2 of the Licence, or any later version.
 * OPUS is distributed in the hope that it will be useful, but WITHOUT ANY
 * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
 * FOR A PARTICULAR PURPOSE. See the GNU General Public License for more
 * details. You should have received a copy of the GNU General Public License
 * along with OPUS; if not, write to the Free Software Foundation, Inc., 51
 * Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
 *
 * @category    Application
 * @author      Sascha Szott <*****@*****.**>
 * @copyright   Copyright (c) 2008-2012, OPUS 4 development team
 * @license     http://www.gnu.org/licenses/gpl.html General Public License
 * @version     $Id: delete_non-demo_docs.php 10249 2012-02-15 17:00:09Z sszott $
 */
/**
 * Erstellt die Demoinstanz, in der nur die Testdokumente mit den IDs 91 bis 110
 * enthalten sind.
 *
 */
$finder = new Opus_DocumentFinder();
foreach ($finder->ids() as $id) {
    if (intval($id) < 91 || intval($id) > 110) {
        $doc = new Opus_Document($id);
        $doc->deletePermanent();
        echo "document " . $id . " was deleted.\n";
    }
}
$finder = new Opus_DocumentFinder();
echo "done -- num of remaining docs: " . $finder->count() . "\n";
exit;
 /**
  * Regression Test for OPUSVIER-3051
  */
 public function testDocumentServerDateModifiedNotUpdatedWhenCollectionSortOrderChanged()
 {
     // check for expected test data
     $collectionRole1 = new Opus_CollectionRole(1);
     $this->assertEquals(1, $collectionRole1->getPosition(), 'Test setup changed');
     $collectionRole2 = new Opus_CollectionRole(2);
     $this->assertEquals(2, $collectionRole2->getPosition(), 'Test setup changed');
     $docfinder = new Opus_DocumentFinder();
     $docfinder->setCollectionRoleId(2);
     $collectionRoleDocs = $docfinder->ids();
     $this->assertTrue(in_array(146, $collectionRoleDocs), 'Test setup changed');
     // test if server_date_modified is altered
     $docBefore = new Opus_Document(146);
     $this->dispatch('/admin/collectionroles/move/roleid/1/pos/2');
     $docAfter = new Opus_Document(146);
     // revert change in test data
     $this->resetRequest();
     $this->resetResponse();
     $this->dispatch('/admin/collectionroles/move/roleid/1/pos/1');
     $this->assertEquals((string) $docBefore->getServerDateModified(), (string) $docAfter->getServerDateModified());
 }
 /**
  * Retrieve all document ids for a valid oai request.
  *
  * @param array &$oaiRequest
  * @return array
  */
 public function query(array $oaiRequest)
 {
     $finder = new Opus_DocumentFinder();
     // add server state restrictions
     $finder->setServerStateInList($this->deliveringDocumentStates);
     $metadataPrefix = $oaiRequest['metadataPrefix'];
     if ('xMetaDissPlus' === $metadataPrefix || 'xMetaDiss' === $metadataPrefix) {
         $finder->setFilesVisibleInOai();
     }
     if ('xMetaDiss' === $metadataPrefix) {
         $finder->setTypeInList($this->xMetaDissRestriction);
     }
     if ('epicur' === $metadataPrefix) {
         $finder->setIdentifierTypeExists('urn');
     }
     if (array_key_exists('set', $oaiRequest)) {
         $setarray = explode(':', $oaiRequest['set']);
         if (!isset($setarray[0])) {
             return array();
         }
         if ($setarray[0] == 'doc-type') {
             if (count($setarray) === 2 and !empty($setarray[1])) {
                 $finder->setType($setarray[1]);
             } else {
                 return array();
             }
         } else {
             if ($setarray[0] == 'bibliography') {
                 if (count($setarray) !== 2 or empty($setarray[1])) {
                     return array();
                 }
                 $setValue = $setarray[1];
                 $bibliographyMap = array("true" => 1, "false" => 0);
                 if (false === isset($setValue, $bibliographyMap[$setValue])) {
                     return array();
                 }
                 $finder->setBelongsToBibliography($bibliographyMap[$setValue]);
             } else {
                 if (count($setarray) < 1 or count($setarray) > 2) {
                     $msg = "Invalid SetSpec: Must be in format 'set:subset'.";
                     throw new Oai_Model_Exception($msg);
                 }
                 // Trying to locate collection role and filter documents.
                 $role = Opus_CollectionRole::fetchByOaiName($setarray[0]);
                 if (is_null($role)) {
                     $msg = "Invalid SetSpec: Top level set does not exist.";
                     throw new Oai_Model_Exception($msg);
                 }
                 $finder->setCollectionRoleId($role->getId());
                 // Trying to locate given collection and filter documents.
                 if (count($setarray) == 2) {
                     $subsetName = $setarray[1];
                     $foundSubsets = array_filter($role->getOaiSetNames(), function ($s) use($subsetName) {
                         return $s['oai_subset'] === $subsetName;
                     });
                     if (count($foundSubsets) < 1) {
                         $msg = "Invalid SetSpec: Subset does not exist.";
                         throw new Oai_Model_Exception($msg);
                     }
                     foreach ($foundSubsets as $subset) {
                         if ($subset['oai_subset'] !== $subsetName) {
                             $msg = "Invalid SetSpec: Internal error.";
                             throw new Oai_Model_Exception($msg);
                         }
                         $finder->setCollectionId($subset['id']);
                     }
                 }
             }
         }
     }
     if (array_key_exists('from', $oaiRequest) and !empty($oaiRequest['from'])) {
         $from = DateTime::createFromFormat('Y-m-d', $oaiRequest['from']);
         $finder->setServerDateModifiedAfter($from->format('Y-m-d'));
     }
     if (array_key_exists('until', $oaiRequest)) {
         $until = DateTime::createFromFormat('Y-m-d', $oaiRequest['until']);
         $until->add(new DateInterval('P1D'));
         $finder->setServerDateModifiedBefore($until->format('Y-m-d'));
     }
     return $finder->ids();
 }
Exemple #13
0
 /**
  * Returns an array of ids for all document of the specified type.
  *
  * @param  string  $typename The name of the document type.
  * @return array Array of document ids.
  *
  * @deprecated
  */
 public static function getIdsForDocType($typename)
 {
     $finder = new Opus_DocumentFinder();
     $finder->setType($typename);
     return $finder->ids();
 }
 private function getPublishedDocumentCount()
 {
     $finder = new Opus_DocumentFinder();
     $finder->setServerState('published');
     return count($finder->ids());
 }
// load collections (and check existence)
$mscRole = Opus_CollectionRole::fetchByName('msc');
if (!is_object($mscRole)) {
    $logger->warn("MSC collection does not exist.  Cannot migrate SubjectMSC.");
}
$ddcRole = Opus_CollectionRole::fetchByName('ddc');
if (!is_object($ddcRole)) {
    $logger->warn("DDC collection does not exist.  Cannot migrate SubjectDDC.");
}
// create enrichment keys (if neccessary)
createEnrichmentKey('MigrateSubjectMSC');
createEnrichmentKey('MigrateSubjectDDC');
// Iterate over all documents.
$docFinder = new Opus_DocumentFinder();
$changedDocumentIds = array();
foreach ($docFinder->ids() as $docId) {
    $doc = null;
    try {
        $doc = new Opus_Document($docId);
    } catch (Opus_Model_NotFoundException $e) {
        continue;
    }
    $removeMscSubjects = array();
    $removeDdcSubjects = array();
    try {
        if (is_object($mscRole)) {
            $removeMscSubjects = migrateSubjectToCollection($doc, 'msc', $mscRole->getId(), 'MigrateSubjectMSC');
        }
        if (is_object($ddcRole)) {
            $removeDdcSubjects = migrateSubjectToCollection($doc, 'ddc', $ddcRole->getId(), 'MigrateSubjectDDC');
        }
 * FOR A PARTICULAR PURPOSE. See the GNU General Public License for more
 * details. You should have received a copy of the GNU General Public License
 * along with OPUS; if not, write to the Free Software Foundation, Inc., 51
 * Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
 *
 * @category    Application
 * @author      Sascha Szott <*****@*****.**>
 * @copyright   Copyright (c) 2008-2011, OPUS 4 development team
 * @license     http://www.gnu.org/licenses/gpl.html General Public License
 * @version     $Id: export_import_all_docs.php 9044 2011-10-07 16:11:16Z sszott $
 */
/**
 * Tries to export and import all documents.
 */
$docFinder = new Opus_DocumentFinder();
foreach ($docFinder->ids() as $id) {
    $doc = null;
    try {
        $doc = new Opus_Document($id);
    } catch (Opus_Model_NotFoundException $e) {
        // document with id $id does not exist
        continue;
    }
    echo "try to export document {$id} ... ";
    $xmlModelOutput = new Opus_Model_Xml();
    $xmlModelOutput->setModel($doc);
    $xmlModelOutput->setStrategy(new Opus_Model_Xml_Version1());
    $xmlModelOutput->excludeEmptyFields();
    $domDocument = $xmlModelOutput->getDomDocument();
    echo "export of document {$id} was successful.\n";
    echo "try to import document based on the exported dom tree ... ";
 /**
  * Returns IDs for published documents in range.
  *
  * @param $start Start of ID range
  * @param $end End of ID range
  * @return array Array of document IDs
  */
 private function getDocumentIds($start, $end)
 {
     $finder = new Opus_DocumentFinder();
     $finder->setServerState('published');
     if (isset($start)) {
         $finder->setIdRangeStart($start);
     }
     if (isset($end)) {
         $finder->setIdRangeEnd($end);
     }
     return $finder->ids();
 }
 /**
  * Im Rahmen der Zuweisung von Dokumenten, die Collections der Collection Role
  * series zugeordnet sind, müssen verschiedene Konflikte behandelt werden.
  *
  * Im Folgenden werden nur Dokumente betrachtet, die mindestens einer Collection
  * der Collection Role series (kurz: series-Collection) zugeordnet sind.
  *
  * Fall 1 (Dokumente ohne IdentifierSerial):
  * Da die Bandnummer einer Schriftenreihe Opus_Series obligatorisch ist, können
  * Dokumente ohne IdentifierSerial nicht migriert werden. Sie verbleiben
  * unangetastet. Die Zuweisung(en) zu series-Collection(s) wird (werden) nicht
  * verändert.
  *
  * Fall 2 (Dokumente mit mehr als einem IdentifierSerial):
  * Da ein Dokument pro Schriftenreihe nur eine Bandnummer besitzen kann, können
  * Dokumente mit mehr als einem Wert für das Feld IdentifierSerial nicht
  * migriert werden. Sie verbleiben unangetastet. Die Zuweisung(en) zu
  * series-Collection(s) wird (werden) nicht verändert.
  *
  * Fall 3 (Dokumente mit einem IdentifierSerial):
  * Da in einer Schriftenreihe nicht zwei Dokumente mit der gleichen Bandnummer
  * existieren können, muss beim Zuweisen von Dokumenten darauf geachtet werden,
  * dass eine Bandnummer nicht mehrfach vergeben wird.
  * Wird versucht ein Dokument zu einer Schriftenreihe mit einer bereits
  * in Benutzung befindlichen Bandnummer zuzuweisen, so wird die Zuweisung
  * nicht durchgeführt. Die Zuweisung des Dokuments zur series-Collection wird
  * in diesem Fall unverändert beibehalten.
  *
  * Im Falle der erfolgreichen Zuweisung des Dokuments zu einer Schriftenreihe
  * wird die Verknüpfung mit der korrespondierenden series-Collection
  * entfernt. Außerdem wird das Feld IdentifierSerial entfernt.
  *
  *
  * @return array an array that contains both the number of conflicts found and
  * the number of documents that were successfully migrated
  */
 private function migrateDocuments()
 {
     $numOfConflicts = 0;
     $numOfDocsMigrated = 0;
     $finder = new Opus_DocumentFinder();
     $finder->setCollectionRoleId($this->seriesRole->getId());
     $serialIdsInUse = array();
     foreach ($finder->ids() as $docId) {
         $doc = new Opus_Document($docId);
         $serialIds = $doc->getIdentifierSerial();
         $numOfSerialIds = count($serialIds);
         if ($numOfSerialIds == 0) {
             $this->logger->warn("doc #{$docId} : does not have a field IdentifierSerial -- leave it untouched");
             $numOfConflicts++;
             continue;
         }
         if ($numOfSerialIds > 1) {
             $this->logger->warn("doc #{$docId} : has {$numOfSerialIds} values for field IdentifierSerial -- leave it untouched");
             $numOfConflicts++;
             continue;
         }
         $serialId = $serialIds[0]->getValue();
         $remainingCollections = array();
         foreach ($doc->getCollection() as $collection) {
             // only consider collection in collection role series
             if ($collection->getRoleId() != $this->seriesRole->getId()) {
                 array_push($remainingCollections, $collection);
             } else {
                 $collectionId = $collection->getId();
                 if (!$collection->isRoot()) {
                     // check for conflict
                     if (array_key_exists($collectionId, $serialIdsInUse) && in_array($serialId, $serialIdsInUse[$collectionId])) {
                         // conflict was found: serialId for series $collectionId already in use
                         $this->logger->warn("doc #{$docId} : could not assign to series #{$collectionId}: value {$serialId} already in use");
                         $this->logger->warn("doc #{$docId} : leave assignment to collection #{$collectionId} untouched");
                         array_push($remainingCollections, $collection);
                         $numOfConflicts++;
                     } else {
                         // no conflict
                         $series = new Opus_Series($collectionId);
                         $doc->addSeries($series)->setNumber($serialId);
                         $doc->setIdentifierSerial(array());
                         // mark usage of serialId for collection $collectionId
                         if (array_key_exists($collectionId, $serialIdsInUse)) {
                             array_push($serialIdsInUse[$collectionId], $serialId);
                         } else {
                             $serialIdsInUse[$collectionId] = array($serialId);
                         }
                         $this->logger->info("doc #{$docId} : assign document to series #{$collectionId} with value {$serialId}");
                         $this->logger->info("doc #{$docId} : removed assignment from collection #{$collectionId}");
                         $this->logger->info("doc #{$docId} : removed field IdentifierSerial with value " . $serialId);
                         $numOfDocsMigrated++;
                     }
                 } else {
                     // series root collection assignment will not be migrated
                     $this->logger->warn("doc #{$docId} : is assigned to root collection #{$collectionId} of collection role series: leave assignment untouched");
                     array_push($remainingCollections, $collection);
                     $numOfConflicts++;
                 }
             }
         }
         $doc->setCollection($remainingCollections);
         $doc->unregisterPlugin('Opus_Document_Plugin_Index');
         $doc->store();
     }
     return array('numOfConflicts' => $numOfConflicts, 'numOfDocsMigrated' => $numOfDocsMigrated);
 }
 * OPUS is free software; you can redistribute it and/or modify it under the
 * terms of the GNU General Public License as published by the Free Software
 * Foundation; either version 2 of the Licence, or any later version.
 * OPUS is distributed in the hope that it will be useful, but WITHOUT ANY
 * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
 * FOR A PARTICULAR PURPOSE. See the GNU General Public License for more
 * details. You should have received a copy of the GNU General Public License 
 * along with OPUS; if not, write to the Free Software Foundation, Inc., 51 
 * Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
 *
 * @category    Application
 * @author      Thoralf Klein <*****@*****.**>
 * @copyright   Copyright (c) 2011, OPUS 4 development team
 * @license     http://www.gnu.org/licenses/gpl.html General Public License
 * @version     $Id$
 */
// Bootstrapping
require_once dirname(__FILE__) . '/../common/bootstrap.php';
$date = new DateTime();
$dateString = $date->sub(new DateInterval('P2D'))->format('Y-m-d');
$f = new Opus_DocumentFinder();
$f->setServerState('temporary')->setServerDateModifiedBefore($dateString);
foreach ($f->ids() as $id) {
    $d = new Opus_Document($id);
    if ($d->getServerState() == 'temporary') {
        echo "deleting document: {$id}\n";
        $d->deletePermanent();
    } else {
        echo "NOT deleting document: {$id} because it has server state " . $d->getServerState();
    }
}