// establish a counter for how many objects we edit
$objectsChanged = 0;
// keep track of how many troublesome objects we had to skip
$skippedObjects = array();
// main loop for ALL PDF OBJECTS in the collection
drush_print("\n******Beginning main processing loop*****\n");
for ($counter = 0; $counter < $totalNumObjects; $counter++) {
    $theObject = $allPDFObjects[$counter];
    $realCount = $counter + 1;
    drush_print("Processing record {$realCount} of {$totalNumObjects}");
    //print $theObject['s']['value'];
    $objectPID = $theObject['s']['value'];
    # try to fetch PID from repo
    try {
        //drush_print("Attempting to access $objectPID from repository");
        $object = $repository->getObject($objectPID);
    } catch (Exception $e) {
        drush_print("\n\n**********#######  ERROR  #######*********");
        drush_print("***Could not get object {$objectPID} from repo***\n\n");
        $skippedObjects[] = $objectPID;
        continue;
    }
    # grab the MODS data stream
    $modsDS = $object['MODS'];
    /****************MODS RECORD**********************/
    // drush_print("Editing MODS record");
    $modsDOMDoc = new DOMDocument();
    $modsDOMDoc->preserveWhiteSpace = false;
    $modsDOMDoc->formatOutput = true;
    $modsDOMDoc->loadXML($modsDS->content);
    $modsXPath = new DOMXPath($modsDOMDoc);
/**
 * @param FedoraRepository $repository
 * @param string $identifier
 * @return array
 */
function getObjectForIdentifier($identifier, $repository)
{
    global $fedoraPassword, $fedoraUser, $solrUrl;
    //Check Solr to see if we have created the compound object yet
    $escapedIdentifer = str_replace(':', '\\:', $identifier);
    $solrQuery = "?q=mods_identifier_ms:\"{$escapedIdentifer}\"&fl=PID,dc.title";
    $context = stream_context_create(array('http' => array('header' => "Authorization: Basic " . base64_encode("{$fedoraUser}:{$fedoraPassword}"))));
    //echo("checking solr ".$solrUrl . $solrQuery."<br/>");
    $ch = curl_init();
    $connectTimeout = 5;
    $timeout = 20;
    curl_setopt($ch, CURLOPT_URL, $solrUrl . $solrQuery);
    curl_setopt($ch, CURLOPT_RETURNTRANSFER, 1);
    curl_setopt($ch, CURLOPT_CONNECTTIMEOUT, $connectTimeout);
    curl_setopt($ch, CURLOPT_TIMEOUT, $timeout);
    curl_setopt($ch, CURLOPT_SSL_VERIFYHOST, false);
    curl_setopt($ch, CURLOPT_SSL_VERIFYPEER, false);
    $curTry = 0;
    $maxTries = 3;
    while ($curTry < $maxTries) {
        $solrResponse = curl_exec($ch);
        if ($solrResponse !== false) {
            //We got a good response, stop looking.
            break;
        }
        $curTry++;
    }
    curl_close($ch);
    if (!$solrResponse) {
        die("Solr is currently down");
    } else {
        $solrResponse = json_decode($solrResponse);
        if ($solrResponse->response->numFound == 0) {
            $newObject = true;
            $existingPID = false;
        } else {
            $newObject = false;
            $existingPID = $solrResponse->response->docs[0]->PID;
        }
    }
    //Basic settings for this content type
    $namespace = 'fortlewis';
    //Create an object (this will create a new PID)
    /** @var AbstractFedoraObject $object */
    if ($newObject) {
        $object = $repository->constructObject($namespace);
    } else {
        $object = $repository->getObject($existingPID);
    }
    return array($object, $newObject);
}
Beispiel #3
0
$url = 'localhost:8080/fedora';
$username = '******';
$password = '******';
# set up connection and repository variables
$connection = new RepositoryConnection($url, $username, $password);
$api = new FedoraApi($connection);
$repository = new FedoraRepository($api, new SimpleCache());
# what is the pid you want?
$pid = 'islandora:3';
# what to change the name to?
$givenName = "Mickey";
$familyName = "Mouse";
$wholeName = $givenName . ' ' . $familyName;
# try to fetch PID from repo
try {
    $object = $repository->getObject($pid);
} catch (Exception $e) {
    drush_print('****************ERROR***************');
    exit;
}
# grab the data streams
$dublinCoreDS = $object['DC'];
$modsDS = $object['MODS'];
// drush_print('******Dublin Core********');
// drush_print(@$dublinCoreDS->getContent());
// drush_print();
# parse the datastreams into simplexml objects
$modsXML = simplexml_load_string(@$modsDS->getContent());
$dcXML = simplexml_load_string(@$dublinCoreDS->getContent());
//print_r($dcXML->getDocNamespaces());
// foreach ($dcXML->children("dc", TRUE) as $entry)
/**
 * @param FedoraRepository  $repository
 * @param string $personPid
 *
 * @return FedoraObject
 */
function getFedoraObjectByPid($repository, $personPid)
{
    //echo ("Loading Fedora object $personPid");
    $fedoraObject = $repository->getObject($personPid);
    return $fedoraObject;
}
 $people = preg_split('/\\r\\n|\\r|\\n/', $exportedItem->people);
 foreach ($people as $person) {
     $person = trim($person);
     if (strlen($person) == 0) {
         continue;
     }
     if ($numPeopleLoaded < $maxPeopleToLoad || $maxPeopleToLoad == -1) {
         //Check to see if the entity exists already
         $new = false;
         $existingPID = doesEntityExist($person);
         if ($existingPID != false) {
             if (!$updateModsForExistingEntities) {
                 continue;
             }
             //Load the object
             $entity = $repository->getObject($existingPID);
         } else {
             //Create an entity within Islandora
             $entity = $repository->constructObject('person');
             $entity->models = array('islandora:personCModel');
             $entity->relationships->add(FEDORA_RELS_EXT_URI, 'isMemberOfCollection', 'marmot:people');
             $entity->relationships->add(FEDORA_RELS_EXT_URI, 'isMemberOfCollection', 'islandora:entity_collection');
             $new = true;
         }
         echo "{$i}) Processing Person {$person} <br/>";
         $entity->label = $person;
         //Add MADS data
         if ($entity->getDatastream('MODS') == null) {
             $modsDatastream = $entity->constructDatastream('MODS');
         } else {
             $modsDatastream = $entity->getDatastream('MODS');
<?php

//set fits.sh executable first... it could be at /opt/fits
//usage
//drush php-script regen-obj [root of book object]
# repository connection parameters
$url = 'localhost:8080/fedora';
$username = '******';
$password = '******';
# set up connection and repository variables
$connection = new RepositoryConnection($url, $username, $password);
$repository = new FedoraRepository(new FedoraApi($connection), new SimpleCache());
$root_pid = drush_shift();
$parent_obj = $repository->getObject($root_pid);
$itql = 'select $page_itql from <#ri>
        where $page_itql <fedora-rels-ext:isMemberOf> <info:fedora/' . $root_pid . '>
        order by $page_itql';
$page_objects = $repository->ri->itqlQuery($itql, 'unlimited', '0');
foreach ($page_objects as $page) {
    $page_pid = $page['page_itql']['value'];
    $object = islandora_object_load($page_pid);
    if (!$object->getDataStream('JP2')) {
        echo "regenerating OBJ for {$page_pid}\n";
        $obj_ds = $object['OBJ'];
        //url of image... http://fedora_repo_url:8080/objects/[pid]/datastreams/OBJ/content
        $file_url = $repo_url . '/objects/' . $page_pid . '/datastreams/OBJ/content';
        $drupal_result = drupal_http_request($file_url);
        if (!empty($drupal_result->data)) {
            //create a temporary file
            $new_file = file_save_data($drupal_result->data, file_default_scheme() . '://');
            $path = drupal_realpath($new_file->uri);
             $newObject = false;
             $existingPID = $solrResponse->response->docs[0]->PID;
         }
         if ($processAllFiles == false) {
             continue;
         }
     }
 }
 //Basic settings for this content type
 $namespace = 'evld';
 //Create an object (this will create a new PID)
 /** @var AbstractFedoraObject $newPhoto */
 if ($newObject) {
     $newPhoto = $repository->constructObject($namespace);
 } else {
     $newPhoto = $repository->getObject($existingPID);
 }
 if (strtolower(substr($imageFilename, -3)) == 'jpg') {
     fwrite($basicImageNames, "{$imageFilename} \r\n");
     $isLargeImage = false;
     //Copy basic image to another location for derivative creation
 } else {
     $isLargeImage = true;
 }
 if ($newObject) {
     //$newPhoto->relationships->add()
     //TODO: if we get a tiff this can be a large image, otherwise it should be a basic image
     if (strtolower(substr($imageFilename, -3)) == 'jpg') {
         $newPhoto->models = array('islandora:sp_basic_image');
     } else {
         $newPhoto->models = array('islandora:sp_large_image_cmodel');
/**
 * @param $fileToLoad
 * @param $datastream
 * @param $datastreamLabel
 * @param $mimeType
 * @param $existingPID
 * @param $objectId
 * @param $datastreamFile
 * @param FedoraRepository $repository
 */
function addDatastream($fileToLoad, $datastream, $datastreamLabel, $mimeType, $existingPID, $objectId, $datastreamFile, $repository)
{
    if (!file_exists($fileToLoad)) {
        fwrite($datastreamFile, "{$existingPID},{$objectId},{$datastream},file does not exist\r\n");
    } else {
        $newPhoto = $repository->getObject($existingPID);
        $imageDatastream = $newPhoto->constructDatastream($datastream);
        $imageDatastream->label = $datastreamLabel;
        $imageDatastream->mimetype = $mimeType;
        set_time_limit(1600);
        $imageDatastream->setContentFromFile($fileToLoad);
        $newPhoto->ingestDatastream($imageDatastream);
        unset($imageDatastream);
        fwrite($datastreamFile, "{$existingPID},{$objectId},{$datastream},uploaded\r\n");
    }
}
     }
 }
 if ($solrResponse->response->numFound > 1) {
     echo "<br/>\r\n--WARNING: Found more than one possible match within Islandora, not changing\r\n<br/>";
     fwrite($logFile, "\r\n--WARNING: Found more than one possible match within Islandora, not changing\r\n");
     foreach ($solrResponse->response->docs as $doc) {
         echo "--{$doc->PID}\r\n<br/>";
         fwrite($logFile, "--{$doc->PID}\r\n");
     }
     continue;
 }
 $existingPID = $solrResponse->response->docs[0]->PID;
 echo " ({$existingPID}) \r\n<br/>";
 fwrite($logFile, " ({$existingPID}) \r\n");
 //Find the same object in Fedora using Tuque
 $fedoraObject = $repository->getObject($existingPID);
 //Get a copy of MODS record for the object from Fedora
 $MODS = $fedoraObject->getDatastream('MODS');
 $MODScontent = $MODS->content;
 //Parse the MODS record using simple XML
 $MODSxml = new DOMDocument();
 $MODSxml->preserveWhiteSpace = false;
 $MODSxml->formatOutput = true;
 if (!$MODSxml->loadXML($MODScontent)) {
     echo "Could not load XML for {$objectId} PID {$existingPID}";
     fwrite($logFile, "Could not load XML for {$objectId} PID {$existingPID}");
     continue;
 }
 if (!file_exists($oldModsLocation . $objectId . '.xml')) {
     //Save here but reformatted for easier comparison
     file_put_contents($oldModsLocation . $objectId . '.xml', $MODSxml->saveXML());
 $solrResponse = json_decode($solrResponse);
 if (!$solrResponse->response || $solrResponse->response->numFound == 0) {
     fwrite($logFile, 'No basic images found');
 } else {
     $totalRecords = $solrResponse->response->numFound;
     $startRecord = 0;
     $limit = 25;
     $numProcessed = 0;
     while ($numProcessed < $totalRecords) {
         $solrResponse = file_get_contents($solrUrl . $solrQuery . "&rows={$limit}&start={$startRecord}", false);
         $solrResponse = json_decode($solrResponse);
         foreach ($solrResponse->response->docs as $record) {
             $pid = $record->PID;
             fwrite($logFile, "Processing {$pid}\r\n");
             $numProcessed += 1;
             $fedoraObject = $repository->getObject($pid);
             //Get a copy of MODS record for the object from Fedora
             $MODS = $fedoraObject->getDatastream('MODS');
             $MODScontent = $MODS->content;
             //Parse the MODS record using simple XML
             $MODSxml = new DOMDocument();
             $MODSxml->preserveWhiteSpace = false;
             $MODSxml->formatOutput = true;
             if (!$MODSxml->loadXML($MODScontent)) {
                 echo "Could not load XML for {$pid}";
                 fwrite($logFile, "  Could not load XML for {$pid}\r\n");
                 continue;
             }
             $migratedIdentifierElement = $MODSxml->getElementsByTagNameNS('http://marmot.org/local_mods_extension', 'migratedIdentifier')->item(0);
             if ($migratedIdentifierElement == null) {
                 $migratedIdentifierElement = $MODSxml->getElementsByTagName('identifier')->item(0);