/** * Translate Primo's XML into array of arrays. * * @param array $data The raw xml from Primo * * @return array The processed response from Primo */ protected function process($data) { $res = parent::process($data); // Load API content as XML objects $sxe = new \SimpleXmlElement($data); if ($sxe === false) { throw new \Exception('Error while parsing the document'); } // Register the 'sear' namespace at the top level to avoid problems: $sxe->registerXPathNamespace('sear', 'http://www.exlibrisgroup.com/xsd/jaguar/search'); // Get the available namespaces. The Primo API uses multiple namespaces. // Will be used to navigate the DOM for elements that have namespaces $namespaces = $sxe->getNameSpaces(true); $docset = $sxe->xpath('//sear:DOC'); if (empty($docset) && isset($sxe->JAGROOT->RESULT->DOCSET->DOC)) { $docset = $sxe->JAGROOT->RESULT->DOCSET->DOC; } for ($i = 0; $i < count($docset); $i++) { $doc = $docset[$i]; // Set OpenURL $sear = $doc->children($namespaces['sear']); if ($openUrl = $this->getOpenUrl($sear)) { $res['documents'][$i]['url'] = $openUrl; } else { unset($res['documents'][$i]['url']); } // Prefix records id's $res['documents'][$i]['recordid'] = 'pci.' . $res['documents'][$i]['recordid']; // Process highlighting if ($this->highlighting) { // VuFind strips Primo highlighting tags from the description, // so we need to re-read the field (preserving highlighting tags). $description = isset($doc->PrimoNMBib->record->display->description) ? (string) $doc->PrimoNMBib->record->display->description : (string) $doc->PrimoNMBib->record->search->description; $description = trim(mb_substr($description, 0, 2500, 'UTF-8')); // these may contain all kinds of metadata, and just stripping // tags mushes it all together confusingly. $description = str_replace("P>", "p>", $description); $d_arr = explode("<p>", $description); foreach ($d_arr as &$value) { $value = trim($value); if (trim(strip_tags($value)) === '') { // get rid of entries that would just have spaces unset($d_arr[$value]); } } // now all paragraphs are converted to linebreaks $description = implode("<br>", $d_arr); $res['documents'][$i]['description'] = $description; $fieldList = ['title' => 'title', 'creator' => 'author', 'description' => 'description']; $start = '<span class="searchword">'; $end = '</span>'; $hilited = []; foreach ($fieldList as $field => $hiliteField) { if (!isset($res['documents'][$i][$field])) { continue; } $val = $res['documents'][$i][$field]; $values = is_array($val) ? $val : [$val]; $valuesHilited = []; foreach ($values as $val) { if (stripos($val, $start) !== false && stripos($val, $end) !== false) { // Replace Primo hilite-tags $hilitedVal = $val; $hilitedVal = str_replace($start, '{{{{START_HILITE}}}}', $hilitedVal); $hilitedVal = str_replace($end, '{{{{END_HILITE}}}}', $hilitedVal); $valuesHilited[] = $hilitedVal; // Strip Primo hilite-tags from record fields $val = str_replace($start, '', $val); $val = str_replace($end, '', $val); $res['documents'][$i][$field] = is_array($res['documents'][$i][$field]) ? [$val] : $val; } } if (!empty($valuesHilited)) { $hilited[$hiliteField] = $valuesHilited; } } $res['documents'][$i]['highlightDetails'] = $hilited; } } return $res; }
/** * Detects the namespaces used the in feed. * * @since 2.8 */ public static function get_namespaces_from_feed() { // Get the feed source from POST data $feed_source = ( isset($_POST['feed_source']) )? $_POST['feed_source'] : NULL; // If no feed source is given, or an empty feed source is given, print an error message if ( $feed_source === '' || $feed_source === NULL ) { die( __('Invalid feed source given.', WPRSS_TEXT_DOMAIN) ); } // Read the feed source $feed = @file_get_contents( $feed_source ); // Show an error if ( $feed === FALSE ) { die( __( 'Failed to read feed source XML. Check that your URL is a valid feed source URL', WPRSS_TEXT_DOMAIN ) ); } try { // Parse the XML $xml = new SimpleXmlElement($feed); // Get the namespaces $namespaces = $xml->getNameSpaces(true); // Unset the standard RSS and XML namespaces unset( $namespaces[''] ); unset( $namespaces['xml'] ); // Print the remaining namespaces as an encoded JSON string die( json_encode( $namespaces ) ); } catch( Exception $e ) { die( __( 'Failed to parse the RSS feed XML. The feed may contain errors or is not a valid feed source.', WPRSS_TEXT_DOMAIN ) ); } }
/** * Translate Primo's XML into array of arrays. * * @param array $data The raw xml from Primo * * @return array The processed response from Primo */ protected function process($data) { // make sure data exists if (strlen($data) == 0) { throw new \Exception('Primo did not return any data'); } // Load API content as XML objects $sxe = new \SimpleXmlElement($data); if ($sxe === false) { throw new \Exception('Error while parsing the document'); } // some useful data about these results $totalhitsarray = $sxe->xpath("//@TOTALHITS"); // if totalhits is missing but we have a message, this is an error // situation. if (!isset($totalhitsarray[0])) { $messages = $sxe->xpath("//@MESSAGE"); $message = isset($messages[0]) ? (string) $messages[0] : "TOTALHITS attribute missing."; throw new \Exception($message); } else { $totalhits = (int) $totalhitsarray[0]; } // TODO: would these be useful? //$firsthit = $sxe->xpath('//@FIRSTHIT'); //$lasthit = $sxe->xpath('//@LASTHIT'); // Register the 'sear' namespace at the top level to avoid problems: $sxe->registerXPathNamespace('sear', 'http://www.exlibrisgroup.com/xsd/jaguar/search'); // Get the available namespaces. The Primo API uses multiple namespaces. // Will be used to navigate the DOM for elements that have namespaces $namespaces = $sxe->getNameSpaces(true); // Get results set data and add to $items array // This foreach grabs all the child elements of sear:DOC, // except those with namespaces $items = []; $docset = $sxe->xpath('//sear:DOC'); if (empty($docset) && isset($sxe->JAGROOT->RESULT->DOCSET->DOC)) { $docset = $sxe->JAGROOT->RESULT->DOCSET->DOC; } foreach ($docset as $doc) { $item = []; // Due to a bug in the primo API, the first result has // a namespace (prim:) while the rest of the results do not. // Those child elements do not get added to $doc. // If the bib parent element (PrimoNMBib) is missing for a $doc, // that means it has the prim namespace prefix. // So first set the right prefix $prefix = $doc; if ($doc->PrimoNMBib != 'true' && isset($namespaces['prim'])) { // Use the namespace prefix to get those missing child // elements out of $doc. $prefix = $doc->children($namespaces['prim']); } // Now, navigate the DOM and set values to the array // cast to (string) to get the element's value not an XML object $item['recordid'] = substr((string) $prefix->PrimoNMBib->record->control->recordid, 3); $item['title'] = (string) $prefix->PrimoNMBib->record->display->title; // format $item['format'] = ucwords(str_replace('_', ' ', (string) $prefix->PrimoNMBib->record->display->type)); // creators $creator = trim((string) $prefix->PrimoNMBib->record->display->creator); if (strlen($creator) > 0) { $item['creator'] = array_map('trim', explode(';', $creator)); } // subjects $subject = trim((string) $prefix->PrimoNMBib->record->display->subject); if (strlen($subject) > 0) { $item['subjects'] = explode(';', $subject); } $item['ispartof'] = (string) $prefix->PrimoNMBib->record->display->ispartof; // description is sort of complicated // TODO: sometimes the entire article is in the description. $description = isset($prefix->PrimoNMBib->record->display->description) ? (string) $prefix->PrimoNMBib->record->display->description : (string) $prefix->PrimoNMBib->record->search->description; $description = trim(mb_substr($description, 0, 2500, 'UTF-8')); // these may contain all kinds of metadata, and just stripping // tags mushes it all together confusingly. $description = str_replace("P>", "p>", $description); $d_arr = explode("<p>", $description); foreach ($d_arr as &$value) { // strip tags, trim so array_filter can get rid of // entries that would just have spaces $value = trim(strip_tags($value)); } $d_arr = array_filter($d_arr); // now all paragraphs are converted to linebreaks $description = implode("<br>", $d_arr); $item['description'] = $description; // and the rest! $item['language'] = (string) $prefix->PrimoNMBib->record->display->language; $item['source'] = implode('; ', (array) $prefix->PrimoNMBib->record->display->source); $item['identifier'] = (string) $prefix->PrimoNMBib->record->display->identifier; $item['fulltext'] = (string) $prefix->PrimoNMBib->record->delivery->fulltext; $item['issn'] = []; foreach ($prefix->PrimoNMBib->record->search->issn as $issn) { $item['issn'][] = (string) $issn; } //Are these two needed? //$item['publisher'] = // (string)$prefix->PrimoNMBib->record->display->publisher; //$item['peerreviewed'] = // (string)$prefix->PrimoNMBib->record->display->lds50; // Get the URL, which has a separate namespace $sear = $doc->children($namespaces['sear']); $item['url'] = !empty($sear->LINKS->openurl) ? (string) $sear->LINKS->openurl : (string) $sear->GETIT->attributes()->GetIt2; // Container data $addata = $prefix->PrimoNMBib->record->addata; $item['container_title'] = (string) $addata->jtitle; $item['container_volume'] = (string) $addata->volume; $item['container_issue'] = (string) $addata->issue; $item['container_start_page'] = (string) $addata->spage; $item['container_end_page'] = (string) $addata->epage; foreach ($addata->eissn as $eissn) { if (!in_array((string) $eissn, $item['issn'])) { $item['issn'][] = (string) $eissn; } } foreach ($addata->issn as $issn) { if (!in_array((string) $issn, $item['issn'])) { $item['issn'][] = (string) $issn; } } // Remove dash-less ISSNs if there are corresponding dashed ones // (We could convert dash-less ISSNs to dashed ones, but try to stay // true to the metadata) $callback = function ($issn) use($item) { return strlen($issn) != 8 || !in_array(substr($issn, 0, 4) . '-' . substr($issn, 4), $item['issn']); }; $item['issn'] = array_values(array_filter($item['issn'], $callback)); $item['fullrecord'] = $prefix->PrimoNMBib->record->asXml(); $items[] = $item; } // Set up variables with needed attribute names // Makes matching attributes and getting their values easier $att = 'NAME'; $key = 'KEY'; $value = 'VALUE'; // Get facet data and add to multidimensional $facets array // Start by getting XML for each FACET element, // which has the name of the facet as an attribute. // We only get the first level of elements // because child elements have a namespace prefix $facets = []; $facetSet = $sxe->xpath('//sear:FACET'); if (empty($facetSet)) { if (!empty($sxe->JAGROOT->RESULT->FACETLIST)) { $facetSet = $sxe->JAGROOT->RESULT->FACETLIST->children($namespaces['sear']); } } foreach ($facetSet as $facetlist) { // Set first level of array with the facet name $facet_name = (string) $facetlist->attributes()->{$att}; // Use the namespace prefix to get second level child elements // (the facet values) out of $facetlist. $sear_facets = $facetlist->children($namespaces['sear']); foreach ($sear_facets as $facetvalues) { // Second level of the array is facet values and their counts $facet_key = (string) $facetvalues->attributes()->{$key}; $facets[$facet_name][$facet_key] = (string) $facetvalues->attributes()->{$value}; } } $didYouMean = []; $suggestions = $sxe->xpath('//sear:QUERYTRANSFORMS'); foreach ($suggestions as $suggestion) { $didYouMean[] = (string) $suggestion->attributes()->QUERY; } return ['recordCount' => $totalhits, 'documents' => $items, 'facets' => $facets, 'didYouMean' => $didYouMean]; }
private function _loadXML() { $feed = file_get_contents($this->_query_link); $xml = new SimpleXmlElement($feed); //Use that namespace $namespaces = $xml->getNameSpaces(true); //Now we don't have the URL hard-coded $dc = (array) $xml->children($namespaces['dc']); //load data to $this->this results foreach ($dc as $key => $value) { switch ($key) { case 'creator': $this->_fetchAuthors($value); break; case 'identifier': $this->_fetchIds($value); break; case 'title': $this->_fetchTitle($value); break; case 'publisher': $this->_fetchPublisher($value); break; case 'language': $this->_fetchLanguage($value); break; case 'description': $this->_fetchDescription($value); break; case 'subject': $this->_fetchSubject($value); break; case 'format': $this->_fetchFormat($value); break; } } }