function populate_resource($id, $overrideExportable = false) { //local SOLR index for fast searching $ci =& get_instance(); $ci->load->library('solr'); $ci->solr->clearOpt('fq'); $ci->solr->setOpt('fq', '+id:' . $id); $this->overrideExportable = $overrideExportable; $result = $ci->solr->executeSearch(true); if (sizeof($result['response']['docs']) == 1) { $this->index = $result['response']['docs'][0]; } //local XML resource $xml = $this->ro->getSimpleXML(); $xml = addXMLDeclarationUTF8($xml->registryObject ? $xml->registryObject->asXML() : $xml->asXML()); $xml = simplexml_load_string($xml); $xml = simplexml_load_string(addXMLDeclarationUTF8($xml->asXML())); if ($xml) { $this->xml = $xml; $rifDom = new DOMDocument(); $rifDom->loadXML($this->ro->getRif()); $gXPath = new DOMXpath($rifDom); $gXPath->registerNamespace('ro', 'http://ands.org.au/standards/rif-cs/registryObjects'); $this->gXPath = $gXPath; } }
function addXMLDeclarationUTF8($xml) { if (strpos($xml, '<?xml') === false) { return '<?xml version="1.0" encoding="UTF-8"?>' . NL . $xml; } else { // Clean whatever is there (might be crud!) return addXMLDeclarationUTF8(removeXMLDeclaration($xml)); } }
function enrich($runBenchMark = false) { $this->_CI->load->model('data_source/data_sources', 'ds'); $this->_CI->load->library('purifier'); // Save ourselves some computation by avoiding creating the whole $ds object for $ds = $this->_CI->ds->getByID($this->ro->data_source_id); //same as in relationships.php $xml = $this->ro->getSimpleXML(); // Reset our namespace object (And go down one level from the wrapper if needed) $xml = addXMLDeclarationUTF8($xml->registryObject ? $xml->registryObject->asXML() : $xml->asXML()); $xml = simplexml_load_string($xml, 'SimpleXMLElement', LIBXML_NOENT); // Clone across the namespace (if applicable) $namespaces = $xml->getNamespaces(true); if (!in_array(RIFCS_NAMESPACE, $namespaces)) { $xml->addAttribute("xmlns", RIFCS_NAMESPACE); } $xml = simplexml_load_string(addXMLDeclarationUTF8($xml->asXML()), 'SimpleXMLElement', LIBXML_NOENT); // Cannot enrich already enriched RIFCS!! if (true) { $xml->addAttribute("extRif:enriched", "true", EXTRIF_NAMESPACE); if (count($xml->key) == 1) { /* EXTENDED METADATA CONTAINER */ $contributor = $this->getContributorExists($this->ro->id); $extendedMetadata = $xml->addChild("extRif:extendedMetadata", NULL, EXTRIF_NAMESPACE); $extendedMetadata->addChild("extRif:slug", $this->ro->slug, EXTRIF_NAMESPACE); $extendedMetadata->addChild("extRif:dataSourceKey", $ds->key, EXTRIF_NAMESPACE); $extendedMetadata->addChild("extRif:status", $this->ro->status, EXTRIF_NAMESPACE); $extendedMetadata->addChild("extRif:id", $this->ro->id, EXTRIF_NAMESPACE); $extendedMetadata->addChild("extRif:dataSourceTitle", $ds->title, EXTRIF_NAMESPACE); $extendedMetadata->addChild("extRif:dataSourceID", $this->ro->data_source_id, EXTRIF_NAMESPACE); $extendedMetadata->addChild("extRif:updateTimestamp", $this->ro->updated, EXTRIF_NAMESPACE); $extendedMetadata->addChild("extRif:displayTitle", str_replace("&", "&", $this->ro->title), EXTRIF_NAMESPACE); $extendedMetadata->addChild("extRif:listTitle", str_replace("&", "&", $this->ro->list_title), EXTRIF_NAMESPACE); try { $extendedMetadata->addChild("extRif:simplifiedTitle", iconv('UTF-8', 'ASCII//TRANSLIT', str_replace("&", "&", $this->ro->list_title)), EXTRIF_NAMESPACE); } catch (Exception $e) { throw new Exception('iconv installation/configuration required for simplified title <br/>' . $e); } $is_contributor_page = false; if ($contributor) { $extendedMetadata->addChild("extRif:contributor", htmlspecialchars_decode($contributor[0]), EXTRIF_NAMESPACE); // also mark whether this is a contributor page (used for boosting later) if ($contributor[0] == $this->ro->slug) { $is_contributor_page = true; } } $theDescription = ''; $theDescriptionType = ''; if ($runBenchMark) { $this->_CI->benchmark->mark('ro_enrich_s1_end'); } if ($xml->{$this->ro->class}->description) { $logoAdded = false; foreach ($xml->{$this->ro->class}->description as $description) { $type = (string) $description['type']; $description_str = (string) $description; //add logo to the extrif if ($type == 'logo' && !$logoAdded) { $logoAdded = true; $logoRef = $this->getLogoUrl($description); $extendedMetadata->addChild("extrif:logo", $logoRef, EXTRIF_NAMESPACE); $this->ro->set_metadata('the_logo', $logoRef); } if ($type == 'brief' && $theDescriptionType != 'brief') { $theDescription = $description_str; $theDescriptionType = $type; } else { if ($type == 'full' && ($theDescriptionType != 'brief' || $theDescriptionType != 'full')) { $theDescription = $description_str; $theDescriptionType = $type; } else { if ($type != '' && $theDescriptionType == '') { $theDescription = $description_str; $theDescriptionType = $type; } else { if ($theDescription == '') { $theDescription = $description_str; $theDescriptionType = $type; } } } } } $theDescription = htmlentities(strip_tags(html_entity_decode($theDescription))); $extendedMetadata->addChild("extRif:dci_description", str_replace("&", "&", $theDescription), EXTRIF_NAMESPACE); } if ($runBenchMark) { $this->_CI->benchmark->mark('ro_enrich_s2_end'); } $subjects = $extendedMetadata->addChild("extRif:subjects", NULL, EXTRIF_NAMESPACE); foreach ($this->ro->processSubjects() as $subject) { $subject_node = $subjects->addChild("extRif:subject", "", EXTRIF_NAMESPACE); $subject_node->addChild("extRif:subject_value", str_replace("&", "&", $subject['value']), EXTRIF_NAMESPACE); $subject_node->addChild("extRif:subject_type", str_replace("&", "&", $subject['type']), EXTRIF_NAMESPACE); $subject_node->addChild("extRif:subject_resolved", str_replace("&", "&", $subject['resolved']), EXTRIF_NAMESPACE); $subject_node->addChild("extRif:subject_uri", str_replace("&", "&", $subject['uri']), EXTRIF_NAMESPACE); } if ($runBenchMark) { $this->_CI->benchmark->mark('ro_enrich_s3_end'); } foreach ($this->ro->processLicence() as $right) { $theright = $extendedMetadata->addChild("extRif:right", str_replace("&", "&", $right['value']), EXTRIF_NAMESPACE); $theright->addAttribute("type", $right['type']); if (isset($right['accessRights_type'])) { $theright->addAttribute("accessRights_type", $right['accessRights_type']); } if (isset($right['rightsUri'])) { $theright->addAttribute("rightsUri", str_replace("&", "&", $right['rightsUri'])); } if (isset($right['licence_type'])) { $theright->addAttribute("licence_type", str_replace("&", "&", $right['licence_type'])); } if (isset($right['licence_group'])) { $theright->addAttribute("licence_group", str_replace("&", "&", $right['licence_group'])); } } // Include the count of any linked records based on identifier matches if ($this->ro->class != 'collection') { $extendedMetadata->addChild("extRif:matching_identifier_count", sizeof($this->ro->findMatchingRecords()), EXTRIF_NAMESPACE); } //ANNOTATIONS $annotations = $extendedMetadata->addChild("extRif:annotations", NULL, EXTRIF_NAMESPACE); //tags if ($tags = $this->ro->getTags()) { $extRifTags = $annotations->addChild('extRif:tags', NULL, EXTRIF_NAMESPACE); foreach ($tags as $tag) { $tag_tag = $extRifTags->addChild('extRif:tag', str_replace("&", "&", $tag['name']), EXTRIF_NAMESPACE); $tag_tag->addAttribute('type', $tag['type']); } } //Theme Page stuff if ($own_themepages = $this->ro->getThemePages()) { foreach ($own_themepages as $t) { $extendedMetadata->addChild("extRif:theme_page", $t['slug'], EXTRIF_NAMESPACE); } } // xxx: spatial extents (sanity checking?) if ($runBenchMark) { $this->_CI->benchmark->mark('ro_enrich_s4_end'); } // NO spatial in extrif if ($runBenchMark) { $this->_CI->benchmark->mark('ro_enrich_s5_end'); } // NO temporal in extrif if ($runBenchMark) { $this->_CI->benchmark->mark('ro_enrich_s6_end'); } // Friendlify dates =) $xml = $this->ro->extractDatesForDisplay($xml); // $allRelatedObjects = array(); /* Add some logic to boost highly connected records & contributor pages */ if ($is_contributor_page) { $this->ro->search_boost = SEARCH_BOOST_CONTRIBUTOR_PAGE; } // elseif (count($allRelatedObjects) > 0) // { // // Give credit to "highly connected" records (but limit to 10) // $this->ro->search_boost = min(pow(SEARCH_BOOST_PER_RELATION_EXP,count($allRelatedObjects)), SEARCH_BOOST_RELATION_MAX); // } /* Names EXTRIF */ //$descriptions = $xml->xpath('//'.$this->ro->class.'/description'); //$ds->append_log(var_export($xml->asXML(), true)); $this->ro->pruneExtrif(); $this->ro->updateXML($xml->asXML(), TRUE, 'extrif'); if ($this->ro->status == PUBLISHED) { $this->ro->processLinks(); } //return $this; } else { throw new Exception("Unable to enrich RIFCS. Not valid RIFCS XML"); } } }
function construct_payload() { $xml = $this->ro->getSimpleXML(); $xml = addXMLDeclarationUTF8($xml->registryObject ? $xml->registryObject->asXML() : $xml->asXML()); $xml = simplexml_load_string($xml); $xml = simplexml_load_string(addXMLDeclarationUTF8($xml->asXML())); $json = array(); $single_values = array('id', 'slug', 'key', 'status', 'data_source_id', 'data_source_key', 'display_title', 'list_title', 'group', 'class', 'type'); foreach ($single_values as $s) { $json[$s] = $this->ro->{$s}; } $json['display_title'] = $this->ro->title; $json['record_modified_timestamp'] = gmdate('Y-m-d\\TH:i:s\\Z', $this->ro->updated ? $this->ro->updated : $this->ro->created); $json['record_created_timestamp'] = gmdate('Y-m-d\\TH:i:s\\Z', $this->ro->created); try { $json['simplified_title'] = iconv('UTF-8', 'ASCII//TRANSLIT', $this->ro->list_title); } catch (Exception $e) { throw new Exception('iconv installation/configuration required for simplified title'); } //macthing identifier count $json['matching_identifier_count'] = sizeof($this->ro->findMatchingRecords()); //contributor $is_contributor = false; $contributor = $this->ro->getContributorExists($this->ro->id); if ($contributor) { $json['contributor_page'] = $contributor[0]; if ($contributor[0] == $this->ro->slug) { $is_contributor = true; } } //descriptions $this->load->library('purifier'); $fields = array('description_type', 'description_value'); foreach ($fields as $f) { $json[$f] = array(); } $theDescription = ''; $theDescriptionType = ''; foreach ($xml->{$this->ro->class}->description as $description) { $type = (string) $description['type']; $description_str = html_entity_decode((string) $description); //clean the HTML $clean_html = $this->purifier->purify_html($description_str); //clean brs if (strpos($description_str, "<br") !== FALSE || strpos($description_str, "<p") !== FALSE || strpos($description_str, "&#60;p") !== FALSE) { $encoded_html = $clean_html; } else { $encoded_html = nl2br($clean_html); } //the one and only THE description if ($type == 'brief' && $theDescriptionType != 'brief') { $theDescription = $encoded_html; $theDescriptionType = $type; } else { if ($type == 'full' && ($theDescriptionType != 'brief' || $theDescriptionType != 'full')) { $theDescription = $encoded_html; $theDescriptionType = $type; } else { if ($type != '' && $theDescriptionType == '') { $theDescription = $encoded_html; $theDescriptionType = $type; } else { if ($theDescription == '') { $theDescription = $encoded_html; $theDescriptionType = $type; } } } } $json['description_value'][] = $encoded_html; $json['description_type'][] = $type; } //will have a description field even if it's blank $json['description'] = htmlentities(strip_tags(html_entity_decode($theDescription), '<p></p><br><br />')); //license if ($rights = $this->ro->processLicence()) { foreach ($rights as $right) { if (isset($right['licence_group'])) { $json['license_class'] = $right['licence_group']; } } } //identifier if ($identifiers = $this->ro->getIdentifiers()) { $fields = array('identifier_value', 'identifier_type'); foreach ($fields as $f) { $json[$f] = array(); } foreach ($identifiers as $identifier) { $json['identifier_value'][] = $identifier['identifier']; $json['identifier_type'][] = $identifier['identifier_type']; } } //related info text for searching $json['related_info_search'] = ''; foreach ($xml->{$this->ro->class}->relatedInfo as $relatedInfo) { $innerXML = $relatedInfo->saveXML(); $dom = new DOMDocument(); $dom->loadXML($innerXML); $xpt = new DOMXpath($dom); foreach ($xpt->query('//relatedInfo') as $node) { $json['related_info_search'] .= trim($node->nodeValue); } } //citation metadata text $json['citation_info_search'] = ''; foreach ($xml->{$this->ro->class}->citationInfo as $citationInfo) { $innerXML = $citationInfo->saveXML(); $dom = new DOMDocument(); $dom->loadXML($innerXML); $xpt = new DOMXpath($dom); foreach ($xpt->query('//citationInfo') as $node) { $json['citation_info_search'] .= trim($node->nodeValue); } } //spatial if ($spatialLocations = $this->ro->getLocationAsLonLats()) { $fields = array('spatial_coverage_extents', 'spatial_coverage_polygons', 'spatial_coverage_centres'); foreach ($fields as $f) { $json[$f] = array(); } $sumOfAllAreas = 0; foreach ($spatialLocations as $lonLat) { $json['spatial_coverage_polygons'][] = $lonLat; $extents = $this->ro->calcExtent($lonLat); $json['spatial_coverage_extents'][] = $extents['extent']; $sumOfAllAreas += $extents['area']; $json['spatial_coverage_centres'][] = $extents['center']; } $json['spatial_coverage_area_sum'] = $sumOfAllAreas; } //temporal if ($temporalCoverageList = $this->ro->processTemporal()) { $fields = array('date_from', 'date_to'); foreach ($fields as $f) { $json[$f] = array(); } foreach ($temporalCoverageList as $temporal) { if ($temporal['type'] == 'dateFrom') { $json['date_from'][] = $temporal['value']; } elseif ($temporal['type'] == 'dateTo') { $json['date_to'][] = $temporal['value']; } } $json['earliest_year'] = $this->ro->getEarliestAsYear(); $json['latest_year'] = $this->ro->getLatestAsYear(); } //theme pages if ($own_themepages = $this->ro->getThemePages()) { $json['theme_page'] = array(); foreach ($own_themepages as $t) { $json['theme_page'][] = $t['slug']; } } //tags if ($tags = $this->ro->getTags()) { $json['tag'] = array(); $json['tag_type'] = array(); foreach ($tags as $tag) { $json['tag'][] = $tag['name']; $json['tag_type'][] = $tag['type']; } } //subjects $subjects = $this->ro->processSubjects(); $fields = array('subject_value_resolved', 'subject_value_unresolved', 'subject_type', 'subject_vocab_uri'); foreach ($fields as $f) { $json[$f] = array(); } foreach ($subjects as $s) { $json['subject_value_unresolved'][] = $s['value']; $json['subject_value_resolved'][] = $s['resolved']; $json['subject_vocab_uri'][] = $s['uri']; $json['subject_type'][] = $s['type']; } //related objects $related_objects = $this->ro->getAllRelatedObjects(false, true, true); $fields = array('related_object_key', 'related_object_id', 'related_object_class', 'related_object_display_title', 'related_object_relation'); foreach ($fields as $f) { $json[$f] = array(); } foreach ($related_objects as $related_object) { $json['related_object_key'][] = $related_object['key']; $json['related_object_id'][] = $related_object['registry_object_id']; $json['related_object_class'][] = $related_object['class']; $json['related_object_display_title'][] = $related_object['title']; $json['related_object_relation'][] = $related_object['relation_type']; } $json = array_filter($json); return $json; }
function use_citation_handle($registry_object_id, $cite_ro) { require_once REGISTRY_APP_PATH . '/services/method_handlers/registry_object_handlers/citations.php'; $xml = $cite_ro->getSimpleXML(); $xml = addXMLDeclarationUTF8($xml->registryObject ? $xml->registryObject->asXML() : $xml->asXML()); $xml = simplexml_load_string($xml); $xml = simplexml_load_string(addXMLDeclarationUTF8($xml->asXML())); if ($xml) { $rifDom = new DOMDocument(); $rifDom->loadXML($cite_ro->getRif()); $gXPath = new DOMXpath($rifDom); $gXPath->registerNamespace('ro', 'http://ands.org.au/standards/rif-cs/registryObjects'); } $ci =& get_instance(); $ci->load->library('solr'); $ci->solr->clearOpt('fq'); $ci->solr->setOpt('fq', '+id:' . $registry_object_id); $ci->solr->setOpt('fl', 'id,key,slug,title,class,type,data_source_id,group,created,status,subject_value_resolved'); $result = $ci->solr->executeSearch(true); if (sizeof($result['response']['docs']) == 1) { $index = $result['response']['docs'][0]; } $resource = array('index' => $index, 'xml' => $xml, 'gXPath' => $gXPath, 'ro' => $cite_ro, 'params' => '', 'default_params' => ''); $citation_handler = new citations($resource); return $citation_handler->getEndnoteText(); }