// establish a counter for how many objects we edit $objectsChanged = 0; // keep track of how many troublesome objects we had to skip $skippedObjects = array(); // main loop for ALL PDF OBJECTS in the collection drush_print("\n******Beginning main processing loop*****\n"); for ($counter = 0; $counter < $totalNumObjects; $counter++) { $theObject = $allPDFObjects[$counter]; $realCount = $counter + 1; drush_print("Processing record {$realCount} of {$totalNumObjects}"); //print $theObject['s']['value']; $objectPID = $theObject['s']['value']; # try to fetch PID from repo try { //drush_print("Attempting to access $objectPID from repository"); $object = $repository->getObject($objectPID); } catch (Exception $e) { drush_print("\n\n**********####### ERROR #######*********"); drush_print("***Could not get object {$objectPID} from repo***\n\n"); $skippedObjects[] = $objectPID; continue; } # grab the MODS data stream $modsDS = $object['MODS']; /****************MODS RECORD**********************/ // drush_print("Editing MODS record"); $modsDOMDoc = new DOMDocument(); $modsDOMDoc->preserveWhiteSpace = false; $modsDOMDoc->formatOutput = true; $modsDOMDoc->loadXML($modsDS->content); $modsXPath = new DOMXPath($modsDOMDoc);
/** * @param FedoraRepository $repository * @param string $identifier * @return array */ function getObjectForIdentifier($identifier, $repository) { global $fedoraPassword, $fedoraUser, $solrUrl; //Check Solr to see if we have created the compound object yet $escapedIdentifer = str_replace(':', '\\:', $identifier); $solrQuery = "?q=mods_identifier_ms:\"{$escapedIdentifer}\"&fl=PID,dc.title"; $context = stream_context_create(array('http' => array('header' => "Authorization: Basic " . base64_encode("{$fedoraUser}:{$fedoraPassword}")))); //echo("checking solr ".$solrUrl . $solrQuery."<br/>"); $ch = curl_init(); $connectTimeout = 5; $timeout = 20; curl_setopt($ch, CURLOPT_URL, $solrUrl . $solrQuery); curl_setopt($ch, CURLOPT_RETURNTRANSFER, 1); curl_setopt($ch, CURLOPT_CONNECTTIMEOUT, $connectTimeout); curl_setopt($ch, CURLOPT_TIMEOUT, $timeout); curl_setopt($ch, CURLOPT_SSL_VERIFYHOST, false); curl_setopt($ch, CURLOPT_SSL_VERIFYPEER, false); $curTry = 0; $maxTries = 3; while ($curTry < $maxTries) { $solrResponse = curl_exec($ch); if ($solrResponse !== false) { //We got a good response, stop looking. break; } $curTry++; } curl_close($ch); if (!$solrResponse) { die("Solr is currently down"); } else { $solrResponse = json_decode($solrResponse); if ($solrResponse->response->numFound == 0) { $newObject = true; $existingPID = false; } else { $newObject = false; $existingPID = $solrResponse->response->docs[0]->PID; } } //Basic settings for this content type $namespace = 'fortlewis'; //Create an object (this will create a new PID) /** @var AbstractFedoraObject $object */ if ($newObject) { $object = $repository->constructObject($namespace); } else { $object = $repository->getObject($existingPID); } return array($object, $newObject); }
$url = 'localhost:8080/fedora'; $username = '******'; $password = '******'; # set up connection and repository variables $connection = new RepositoryConnection($url, $username, $password); $api = new FedoraApi($connection); $repository = new FedoraRepository($api, new SimpleCache()); # what is the pid you want? $pid = 'islandora:3'; # what to change the name to? $givenName = "Mickey"; $familyName = "Mouse"; $wholeName = $givenName . ' ' . $familyName; # try to fetch PID from repo try { $object = $repository->getObject($pid); } catch (Exception $e) { drush_print('****************ERROR***************'); exit; } # grab the data streams $dublinCoreDS = $object['DC']; $modsDS = $object['MODS']; // drush_print('******Dublin Core********'); // drush_print(@$dublinCoreDS->getContent()); // drush_print(); # parse the datastreams into simplexml objects $modsXML = simplexml_load_string(@$modsDS->getContent()); $dcXML = simplexml_load_string(@$dublinCoreDS->getContent()); //print_r($dcXML->getDocNamespaces()); // foreach ($dcXML->children("dc", TRUE) as $entry)
/** * @param FedoraRepository $repository * @param string $personPid * * @return FedoraObject */ function getFedoraObjectByPid($repository, $personPid) { //echo ("Loading Fedora object $personPid"); $fedoraObject = $repository->getObject($personPid); return $fedoraObject; }
$people = preg_split('/\\r\\n|\\r|\\n/', $exportedItem->people); foreach ($people as $person) { $person = trim($person); if (strlen($person) == 0) { continue; } if ($numPeopleLoaded < $maxPeopleToLoad || $maxPeopleToLoad == -1) { //Check to see if the entity exists already $new = false; $existingPID = doesEntityExist($person); if ($existingPID != false) { if (!$updateModsForExistingEntities) { continue; } //Load the object $entity = $repository->getObject($existingPID); } else { //Create an entity within Islandora $entity = $repository->constructObject('person'); $entity->models = array('islandora:personCModel'); $entity->relationships->add(FEDORA_RELS_EXT_URI, 'isMemberOfCollection', 'marmot:people'); $entity->relationships->add(FEDORA_RELS_EXT_URI, 'isMemberOfCollection', 'islandora:entity_collection'); $new = true; } echo "{$i}) Processing Person {$person} <br/>"; $entity->label = $person; //Add MADS data if ($entity->getDatastream('MODS') == null) { $modsDatastream = $entity->constructDatastream('MODS'); } else { $modsDatastream = $entity->getDatastream('MODS');
<?php //set fits.sh executable first... it could be at /opt/fits //usage //drush php-script regen-obj [root of book object] # repository connection parameters $url = 'localhost:8080/fedora'; $username = '******'; $password = '******'; # set up connection and repository variables $connection = new RepositoryConnection($url, $username, $password); $repository = new FedoraRepository(new FedoraApi($connection), new SimpleCache()); $root_pid = drush_shift(); $parent_obj = $repository->getObject($root_pid); $itql = 'select $page_itql from <#ri> where $page_itql <fedora-rels-ext:isMemberOf> <info:fedora/' . $root_pid . '> order by $page_itql'; $page_objects = $repository->ri->itqlQuery($itql, 'unlimited', '0'); foreach ($page_objects as $page) { $page_pid = $page['page_itql']['value']; $object = islandora_object_load($page_pid); if (!$object->getDataStream('JP2')) { echo "regenerating OBJ for {$page_pid}\n"; $obj_ds = $object['OBJ']; //url of image... http://fedora_repo_url:8080/objects/[pid]/datastreams/OBJ/content $file_url = $repo_url . '/objects/' . $page_pid . '/datastreams/OBJ/content'; $drupal_result = drupal_http_request($file_url); if (!empty($drupal_result->data)) { //create a temporary file $new_file = file_save_data($drupal_result->data, file_default_scheme() . '://'); $path = drupal_realpath($new_file->uri);
$newObject = false; $existingPID = $solrResponse->response->docs[0]->PID; } if ($processAllFiles == false) { continue; } } } //Basic settings for this content type $namespace = 'evld'; //Create an object (this will create a new PID) /** @var AbstractFedoraObject $newPhoto */ if ($newObject) { $newPhoto = $repository->constructObject($namespace); } else { $newPhoto = $repository->getObject($existingPID); } if (strtolower(substr($imageFilename, -3)) == 'jpg') { fwrite($basicImageNames, "{$imageFilename} \r\n"); $isLargeImage = false; //Copy basic image to another location for derivative creation } else { $isLargeImage = true; } if ($newObject) { //$newPhoto->relationships->add() //TODO: if we get a tiff this can be a large image, otherwise it should be a basic image if (strtolower(substr($imageFilename, -3)) == 'jpg') { $newPhoto->models = array('islandora:sp_basic_image'); } else { $newPhoto->models = array('islandora:sp_large_image_cmodel');
/** * @param $fileToLoad * @param $datastream * @param $datastreamLabel * @param $mimeType * @param $existingPID * @param $objectId * @param $datastreamFile * @param FedoraRepository $repository */ function addDatastream($fileToLoad, $datastream, $datastreamLabel, $mimeType, $existingPID, $objectId, $datastreamFile, $repository) { if (!file_exists($fileToLoad)) { fwrite($datastreamFile, "{$existingPID},{$objectId},{$datastream},file does not exist\r\n"); } else { $newPhoto = $repository->getObject($existingPID); $imageDatastream = $newPhoto->constructDatastream($datastream); $imageDatastream->label = $datastreamLabel; $imageDatastream->mimetype = $mimeType; set_time_limit(1600); $imageDatastream->setContentFromFile($fileToLoad); $newPhoto->ingestDatastream($imageDatastream); unset($imageDatastream); fwrite($datastreamFile, "{$existingPID},{$objectId},{$datastream},uploaded\r\n"); } }
} } if ($solrResponse->response->numFound > 1) { echo "<br/>\r\n--WARNING: Found more than one possible match within Islandora, not changing\r\n<br/>"; fwrite($logFile, "\r\n--WARNING: Found more than one possible match within Islandora, not changing\r\n"); foreach ($solrResponse->response->docs as $doc) { echo "--{$doc->PID}\r\n<br/>"; fwrite($logFile, "--{$doc->PID}\r\n"); } continue; } $existingPID = $solrResponse->response->docs[0]->PID; echo " ({$existingPID}) \r\n<br/>"; fwrite($logFile, " ({$existingPID}) \r\n"); //Find the same object in Fedora using Tuque $fedoraObject = $repository->getObject($existingPID); //Get a copy of MODS record for the object from Fedora $MODS = $fedoraObject->getDatastream('MODS'); $MODScontent = $MODS->content; //Parse the MODS record using simple XML $MODSxml = new DOMDocument(); $MODSxml->preserveWhiteSpace = false; $MODSxml->formatOutput = true; if (!$MODSxml->loadXML($MODScontent)) { echo "Could not load XML for {$objectId} PID {$existingPID}"; fwrite($logFile, "Could not load XML for {$objectId} PID {$existingPID}"); continue; } if (!file_exists($oldModsLocation . $objectId . '.xml')) { //Save here but reformatted for easier comparison file_put_contents($oldModsLocation . $objectId . '.xml', $MODSxml->saveXML());
$solrResponse = json_decode($solrResponse); if (!$solrResponse->response || $solrResponse->response->numFound == 0) { fwrite($logFile, 'No basic images found'); } else { $totalRecords = $solrResponse->response->numFound; $startRecord = 0; $limit = 25; $numProcessed = 0; while ($numProcessed < $totalRecords) { $solrResponse = file_get_contents($solrUrl . $solrQuery . "&rows={$limit}&start={$startRecord}", false); $solrResponse = json_decode($solrResponse); foreach ($solrResponse->response->docs as $record) { $pid = $record->PID; fwrite($logFile, "Processing {$pid}\r\n"); $numProcessed += 1; $fedoraObject = $repository->getObject($pid); //Get a copy of MODS record for the object from Fedora $MODS = $fedoraObject->getDatastream('MODS'); $MODScontent = $MODS->content; //Parse the MODS record using simple XML $MODSxml = new DOMDocument(); $MODSxml->preserveWhiteSpace = false; $MODSxml->formatOutput = true; if (!$MODSxml->loadXML($MODScontent)) { echo "Could not load XML for {$pid}"; fwrite($logFile, " Could not load XML for {$pid}\r\n"); continue; } $migratedIdentifierElement = $MODSxml->getElementsByTagNameNS('http://marmot.org/local_mods_extension', 'migratedIdentifier')->item(0); if ($migratedIdentifierElement == null) { $migratedIdentifierElement = $MODSxml->getElementsByTagName('identifier')->item(0);