function addXMLDeclarationUTF8($xml) { if (strpos($xml, '<?xml') === false) { return '<?xml version="1.0" encoding="UTF-8"?>' . NL . $xml; } else { // Clean whatever is there (might be crud!) return addXMLDeclarationUTF8(removeXMLDeclaration($xml)); } }
public function metadata($format, $nestlvl = 0) { $lprefix = ""; if ($nestlvl > 0) { foreach (range(0, $nestlvl) as $nest) { $lprefix .= "\t"; } } $output = ""; $data = false; switch ($format) { case 'dci': require_once REGISTRY_APP_PATH . '/services/method_handlers/dci.php'; $dci_handler = new DCIMethod(); $dci_handler->ro = $this->_rec; $dci_handler->populate_resource($this->id); $data = $dci_handler->ro_handle('dci'); break; case 'oai_dc': $data = $this->_rec->transformToDC(false); break; case 'rif': $data = removeXMLDeclaration(wrapRegistryObjects($this->_rec->getRif())); break; case 'extRif': $data = removeXMLDeclaration($this->_rec->getExtRif()); break; default: $data = removeXMLDeclaration($this->_rec->getRecordDataInScheme(null, $format)); } if ($data) { foreach (explode("\n", $data) as $line) { if (empty($line)) { continue; } $output .= $lprefix . $line . "\n"; } } return $output; }
function transformToDCI($doDsCheck = true) { $this->_CI->load->helper('normalisation'); $exportable = true; if ($doDsCheck) { $ds = $this->_CI->ds->getByID($this->ro->data_source_id); $exportable = $ds->export_dci == 1 || $ds->export_dci == 't'; } if ($exportable) { try { $xslt_processor = Transforms::get_extrif_to_dci_transformer(); $dom = new DOMDocument(); $dom->loadXML($this->ro->getRif(), LIBXML_NOENT); $xslt_processor->setParameter('', 'dateHarvested', date("Y", $this->ro->created)); $xslt_processor->setParameter('', 'dateRequested', date("Y-m-d")); $xml_output = $xslt_processor->transformToXML($dom); if ($xml_output == '') { return ""; } $dom = new DOMDocument(); $dom->loadXML($xml_output); $sxml = simplexml_import_dom($dom); $abstracts = $sxml->xpath('//Abstract'); foreach ($abstracts as $abstract) { $abstract[0] = strip_tags(html_entity_decode((string) $abstract)); } $TimePeriods = $sxml->xpath('//TimeperiodList/TimePeriod'); if (sizeof($TimePeriods) > 2) { $startYear = 99999; $endYear = 0; foreach ($TimePeriods as $tYear) { if ($tYear['TimeSpan'] == 'Start' && intval($tYear) < $startYear) { $startYear = intval($tYear); } if ($tYear['TimeSpan'] == 'End' && intval($tYear) > $endYear) { $endYear = intval($tYear); } unset($tYear[0][0]); } $TimePeriodList = $sxml->xpath('//TimeperiodList')[0]; if ($startYear != 99999) { $eStart = $TimePeriodList->addChild('TimePeriod', $startYear); // uses the first father tag $eStart['TimeSpan'] = 'Start'; } if ($endYear != 0) { $eEnd = $TimePeriodList->addChild('TimePeriod', $endYear); // uses the first father tag $eEnd['TimeSpan'] = 'End'; } } $eAuthorList = $sxml->xpath('//AuthorList')[0]; if (sizeof($sxml->xpath('//AuthorList/Author')) == 0) { $relationshipTypeArray = array('hasPrincipalInvestigator', 'principalInvestigator', 'author', 'coInvestigator', 'isOwnedBy', 'hasCollector'); $classArray = array('party'); $authorList = $this->ro->getRelatedObjectsByClassAndRelationshipType($classArray, $relationshipTypeArray); $seq = 1; if (sizeof($authorList) > 0) { foreach ($authorList as $author) { // Include identifiers and addresses for this author (if they exist in the registry) $researcher_object = $this->_CI->ro->getPublishedByKey($author['key']); if ($researcher_object && ($researcher_sxml = $researcher_object->getSimpleXML(NULL, true))) { try { $eAuthor = $eAuthorList->addChild('Author'); $eAuthor['seq'] = $seq++; // Change the value of the relation to be human-readable $eAuthor["AuthorRole"] = "dude"; //format_relationship("collection",(string)$author["relation_type"],(string)$author['origin'], 'party'); // Do we have an address? (using the normalisation_helper.php) $authorNames = $researcher_sxml->xpath('//extRif:displayTitle'); foreach ($authorNames as $authorName) { $eAuthor->addChild('AuthorName', (string) $authorName); } $researcher_addresses = $researcher_sxml->xpath('//ro:location/ro:address'); $address_string = ""; if (is_array($researcher_addresses)) { foreach ($researcher_addresses as $_addr) { if ($_addr->physical) { $address_string .= normalisePhysicalAddress($_addr->physical) . " "; } else { if ($_addr->electronic) { $address_string .= (string) $_addr->electronic->value . " "; } } } } if ($address_string) { $eAuthor->AuthorAddress->AddressString = $address_string; } } catch (Exception $e) { // ignore sloppy coding errors...SimpleXML is awful } // Handle the researcher IDs (using the normalisation_helper.php) $researcher_ids = $researcher_sxml->xpath('//ro:party/ro:identifier'); //var_dump($researcher_ids); if (is_array($researcher_ids)) { $idArray = array(); foreach ($researcher_ids as $researcher_id) { if ((string) $researcher_id != '' && !in_array((string) $researcher_id, $idArray)) { if (strtoupper($researcher_id['type']) == 'DOI') { $doiVal = $this->substringAfter((string) $researcher_id, 'doi.org/'); $author = $eAuthor->addChild('AuthorID', $doiVal); // uses the first father tag $author['type'] = $researcher_id['type']; $idArray[] = $doiVal; } else { if (strtoupper($researcher_id['type']) == 'AU-ANL:PEAU') { $doiVal = $this->substringAfter((string) $researcher_id, 'nla.gov.au/'); $author = $eAuthor->addChild('AuthorID', $doiVal); // uses the first father tag $author['type'] = $researcher_id['type']; $idArray[] = $doiVal; } else { if (strpos('nla.gov.au/', (string) $researcher_id) !== false) { $doiVal = $this->substringAfter((string) $researcher_id, 'nla.gov.au/'); $author = $eAuthor->addChild('AuthorID', $doiVal); // uses the first father tag $author['type'] = $researcher_id['type']; $idArray[] = $doiVal; } else { $author = $eAuthor->addChild('AuthorID', (string) $researcher_id); // uses the first father tag $author['type'] = $researcher_id['type']; } } } $idArray[] = (string) $researcher_id; } } } } } } } if (sizeof($sxml->xpath('//AuthorList/Author')) == 0) { $eAuthor = $eAuthorList->addChild('Author'); $eAuthor['seq'] = '1'; $eAuthor->addChild('AuthorName', $this->ro->group); } // Post-process the Grant and Funding info elements $fundingInfoList = $sxml->xpath('//FundingInfoList[@postproc="1"]'); foreach ($fundingInfoList as $fundingInfo) { unset($fundingInfo["postproc"]); } $grants = $sxml->xpath('//ParsedFunding'); foreach ($grants as $grant) { $grantNumber = (string) $grant->GrantNumber; // Include identifiers and addresses for this author (if they exist in the registry) $grant_object = $this->_CI->ro->getPublishedByKey($grantNumber); if ($grant_object && $grant_object->status == PUBLISHED) { $grant_sxml = $grant_object->getSimpleXML(NULL, true); // Handle the researcher IDs (using the normalisation_helper.php) $grant_id = $grant_sxml->xpath("//ro:identifier[@type='arc'] | //ro:identifier[@type='nhmrc'] | //ro:identifier[@type='purl']"); $related_party = $grant_object->getRelatedObjectsByClassAndRelationshipType(array('party'), array('isFunderOf', 'isFundedBy')); if (is_array($grant_id)) { $grant->GrantNumber = implode("\n", array_map('normaliseIdentifier', $grant_id)); if (is_array($related_party) && isset($related_party[0])) { $grant->addChild("FundingOrganization", $related_party[0]['title']); } } else { unset($grant[0][0]); } } else { unset($grant[0][0]); } } $blankFundingInfoList = $sxml->xpath('//FundingInfoList[ParsedFunding/GrantNumber/text() = ""] | //FundingInfoList[count(descendant::node()) < 3]'); foreach ($blankFundingInfoList as $blankFundingInfo) { unset($blankFundingInfo[0][0]); } $blankFundingInfos = $sxml->xpath('//FundingInfo[not(FundingInfoList)]'); foreach ($blankFundingInfos as $blankFundingInfo) { unset($blankFundingInfo[0][0]); } // Post-process the Citations element $citations = $sxml->xpath('//CitationList[@postproc="1"]'); foreach ($citations as $i => $citations) { // Remove the "to-process" marker unset($citations[$i]["postproc"]); /*$role->ResearcherID[0] = implode("\n", array_map('normaliseIdentifier', $researcher_ids)); if ((string) $role->ResearcherID[0] == "") { unset($roles[$i]->ResearcherID[0]); }*/ } return trim(removeXMLDeclaration($sxml->asXML())) . NL; } catch (Exception $e) { echo "UNABLE TO TRANSFORM" . BR; echo "<pre>" . nl2br($e->getMessage()) . "</pre>" . BR; } } else { return ""; } }