Example #1
0
 /**
  * Translate Primo's XML into array of arrays.
  *
  * @param array $data The raw xml from Primo
  *
  * @return array      The processed response from Primo
  */
 protected function process($data)
 {
     $res = parent::process($data);
     // Load API content as XML objects
     $sxe = new \SimpleXmlElement($data);
     if ($sxe === false) {
         throw new \Exception('Error while parsing the document');
     }
     // Register the 'sear' namespace at the top level to avoid problems:
     $sxe->registerXPathNamespace('sear', 'http://www.exlibrisgroup.com/xsd/jaguar/search');
     // Get the available namespaces. The Primo API uses multiple namespaces.
     // Will be used to navigate the DOM for elements that have namespaces
     $namespaces = $sxe->getNameSpaces(true);
     $docset = $sxe->xpath('//sear:DOC');
     if (empty($docset) && isset($sxe->JAGROOT->RESULT->DOCSET->DOC)) {
         $docset = $sxe->JAGROOT->RESULT->DOCSET->DOC;
     }
     for ($i = 0; $i < count($docset); $i++) {
         $doc = $docset[$i];
         // Set OpenURL
         $sear = $doc->children($namespaces['sear']);
         if ($openUrl = $this->getOpenUrl($sear)) {
             $res['documents'][$i]['url'] = $openUrl;
         } else {
             unset($res['documents'][$i]['url']);
         }
         // Prefix records id's
         $res['documents'][$i]['recordid'] = 'pci.' . $res['documents'][$i]['recordid'];
         // Process highlighting
         if ($this->highlighting) {
             // VuFind strips Primo highlighting tags from the description,
             // so we need to re-read the field (preserving highlighting tags).
             $description = isset($doc->PrimoNMBib->record->display->description) ? (string) $doc->PrimoNMBib->record->display->description : (string) $doc->PrimoNMBib->record->search->description;
             $description = trim(mb_substr($description, 0, 2500, 'UTF-8'));
             // these may contain all kinds of metadata, and just stripping
             //   tags mushes it all together confusingly.
             $description = str_replace("P>", "p>", $description);
             $d_arr = explode("<p>", $description);
             foreach ($d_arr as &$value) {
                 $value = trim($value);
                 if (trim(strip_tags($value)) === '') {
                     // get rid of entries that would just have spaces
                     unset($d_arr[$value]);
                 }
             }
             // now all paragraphs are converted to linebreaks
             $description = implode("<br>", $d_arr);
             $res['documents'][$i]['description'] = $description;
             $fieldList = ['title' => 'title', 'creator' => 'author', 'description' => 'description'];
             $start = '<span class="searchword">';
             $end = '</span>';
             $hilited = [];
             foreach ($fieldList as $field => $hiliteField) {
                 if (!isset($res['documents'][$i][$field])) {
                     continue;
                 }
                 $val = $res['documents'][$i][$field];
                 $values = is_array($val) ? $val : [$val];
                 $valuesHilited = [];
                 foreach ($values as $val) {
                     if (stripos($val, $start) !== false && stripos($val, $end) !== false) {
                         // Replace Primo hilite-tags
                         $hilitedVal = $val;
                         $hilitedVal = str_replace($start, '{{{{START_HILITE}}}}', $hilitedVal);
                         $hilitedVal = str_replace($end, '{{{{END_HILITE}}}}', $hilitedVal);
                         $valuesHilited[] = $hilitedVal;
                         // Strip Primo hilite-tags from record fields
                         $val = str_replace($start, '', $val);
                         $val = str_replace($end, '', $val);
                         $res['documents'][$i][$field] = is_array($res['documents'][$i][$field]) ? [$val] : $val;
                     }
                 }
                 if (!empty($valuesHilited)) {
                     $hilited[$hiliteField] = $valuesHilited;
                 }
             }
             $res['documents'][$i]['highlightDetails'] = $hilited;
         }
     }
     return $res;
 }
	/**
	 * Detects the namespaces used the in feed.
	 * 
	 * @since 2.8
	 */
	public static function get_namespaces_from_feed() {
		// Get the feed source from POST data
		$feed_source = ( isset($_POST['feed_source']) )? $_POST['feed_source'] : NULL;
		// If no feed source is given, or an empty feed source is given, print an error message
		if ( $feed_source === '' || $feed_source === NULL ) {
			die( __('Invalid feed source given.', WPRSS_TEXT_DOMAIN) );
		}

		// Read the feed source
		$feed = @file_get_contents( $feed_source );
		// Show an error 
		if ( $feed === FALSE ) {
			die( __( 'Failed to read feed source XML. Check that your URL is a valid feed source URL', WPRSS_TEXT_DOMAIN ) );
		}

		try {
			// Parse the XML
			$xml = new SimpleXmlElement($feed);
			// Get the namespaces
			$namespaces = $xml->getNameSpaces(true);
			// Unset the standard RSS and XML namespaces
			unset( $namespaces[''] );
			unset( $namespaces['xml'] );
			// Print the remaining namespaces as an encoded JSON string
			die( json_encode( $namespaces ) );
		}
		catch( Exception $e ) {
			die( __( 'Failed to parse the RSS feed XML. The feed may contain errors or is not a valid feed source.', WPRSS_TEXT_DOMAIN ) );
		}
	}
Example #3
0
 /**
  * Translate Primo's XML into array of arrays.
  *
  * @param array $data The raw xml from Primo
  *
  * @return array      The processed response from Primo
  */
 protected function process($data)
 {
     // make sure data exists
     if (strlen($data) == 0) {
         throw new \Exception('Primo did not return any data');
     }
     // Load API content as XML objects
     $sxe = new \SimpleXmlElement($data);
     if ($sxe === false) {
         throw new \Exception('Error while parsing the document');
     }
     // some useful data about these results
     $totalhitsarray = $sxe->xpath("//@TOTALHITS");
     // if totalhits is missing but we have a message, this is an error
     // situation.
     if (!isset($totalhitsarray[0])) {
         $messages = $sxe->xpath("//@MESSAGE");
         $message = isset($messages[0]) ? (string) $messages[0] : "TOTALHITS attribute missing.";
         throw new \Exception($message);
     } else {
         $totalhits = (int) $totalhitsarray[0];
     }
     // TODO: would these be useful?
     //$firsthit = $sxe->xpath('//@FIRSTHIT');
     //$lasthit = $sxe->xpath('//@LASTHIT');
     // Register the 'sear' namespace at the top level to avoid problems:
     $sxe->registerXPathNamespace('sear', 'http://www.exlibrisgroup.com/xsd/jaguar/search');
     // Get the available namespaces. The Primo API uses multiple namespaces.
     // Will be used to navigate the DOM for elements that have namespaces
     $namespaces = $sxe->getNameSpaces(true);
     // Get results set data and add to $items array
     // This foreach grabs all the child elements of sear:DOC,
     //   except those with namespaces
     $items = [];
     $docset = $sxe->xpath('//sear:DOC');
     if (empty($docset) && isset($sxe->JAGROOT->RESULT->DOCSET->DOC)) {
         $docset = $sxe->JAGROOT->RESULT->DOCSET->DOC;
     }
     foreach ($docset as $doc) {
         $item = [];
         // Due to a bug in the primo API, the first result has
         //   a namespace (prim:) while the rest of the results do not.
         //   Those child elements do not get added to $doc.
         //   If the bib parent element (PrimoNMBib) is missing for a $doc,
         //   that means it has the prim namespace prefix.
         // So first set the right prefix
         $prefix = $doc;
         if ($doc->PrimoNMBib != 'true' && isset($namespaces['prim'])) {
             // Use the namespace prefix to get those missing child
             //   elements out of $doc.
             $prefix = $doc->children($namespaces['prim']);
         }
         // Now, navigate the DOM and set values to the array
         // cast to (string) to get the element's value not an XML object
         $item['recordid'] = substr((string) $prefix->PrimoNMBib->record->control->recordid, 3);
         $item['title'] = (string) $prefix->PrimoNMBib->record->display->title;
         // format
         $item['format'] = ucwords(str_replace('_', ' ', (string) $prefix->PrimoNMBib->record->display->type));
         // creators
         $creator = trim((string) $prefix->PrimoNMBib->record->display->creator);
         if (strlen($creator) > 0) {
             $item['creator'] = array_map('trim', explode(';', $creator));
         }
         // subjects
         $subject = trim((string) $prefix->PrimoNMBib->record->display->subject);
         if (strlen($subject) > 0) {
             $item['subjects'] = explode(';', $subject);
         }
         $item['ispartof'] = (string) $prefix->PrimoNMBib->record->display->ispartof;
         // description is sort of complicated
         // TODO: sometimes the entire article is in the description.
         $description = isset($prefix->PrimoNMBib->record->display->description) ? (string) $prefix->PrimoNMBib->record->display->description : (string) $prefix->PrimoNMBib->record->search->description;
         $description = trim(mb_substr($description, 0, 2500, 'UTF-8'));
         // these may contain all kinds of metadata, and just stripping
         //   tags mushes it all together confusingly.
         $description = str_replace("P>", "p>", $description);
         $d_arr = explode("<p>", $description);
         foreach ($d_arr as &$value) {
             // strip tags, trim so array_filter can get rid of
             // entries that would just have spaces
             $value = trim(strip_tags($value));
         }
         $d_arr = array_filter($d_arr);
         // now all paragraphs are converted to linebreaks
         $description = implode("<br>", $d_arr);
         $item['description'] = $description;
         // and the rest!
         $item['language'] = (string) $prefix->PrimoNMBib->record->display->language;
         $item['source'] = implode('; ', (array) $prefix->PrimoNMBib->record->display->source);
         $item['identifier'] = (string) $prefix->PrimoNMBib->record->display->identifier;
         $item['fulltext'] = (string) $prefix->PrimoNMBib->record->delivery->fulltext;
         $item['issn'] = [];
         foreach ($prefix->PrimoNMBib->record->search->issn as $issn) {
             $item['issn'][] = (string) $issn;
         }
         //Are these two needed?
         //$item['publisher'] =
         //    (string)$prefix->PrimoNMBib->record->display->publisher;
         //$item['peerreviewed'] =
         //    (string)$prefix->PrimoNMBib->record->display->lds50;
         // Get the URL, which has a separate namespace
         $sear = $doc->children($namespaces['sear']);
         $item['url'] = !empty($sear->LINKS->openurl) ? (string) $sear->LINKS->openurl : (string) $sear->GETIT->attributes()->GetIt2;
         // Container data
         $addata = $prefix->PrimoNMBib->record->addata;
         $item['container_title'] = (string) $addata->jtitle;
         $item['container_volume'] = (string) $addata->volume;
         $item['container_issue'] = (string) $addata->issue;
         $item['container_start_page'] = (string) $addata->spage;
         $item['container_end_page'] = (string) $addata->epage;
         foreach ($addata->eissn as $eissn) {
             if (!in_array((string) $eissn, $item['issn'])) {
                 $item['issn'][] = (string) $eissn;
             }
         }
         foreach ($addata->issn as $issn) {
             if (!in_array((string) $issn, $item['issn'])) {
                 $item['issn'][] = (string) $issn;
             }
         }
         // Remove dash-less ISSNs if there are corresponding dashed ones
         // (We could convert dash-less ISSNs to dashed ones, but try to stay
         // true to the metadata)
         $callback = function ($issn) use($item) {
             return strlen($issn) != 8 || !in_array(substr($issn, 0, 4) . '-' . substr($issn, 4), $item['issn']);
         };
         $item['issn'] = array_values(array_filter($item['issn'], $callback));
         $item['fullrecord'] = $prefix->PrimoNMBib->record->asXml();
         $items[] = $item;
     }
     // Set up variables with needed attribute names
     // Makes matching attributes and getting their values easier
     $att = 'NAME';
     $key = 'KEY';
     $value = 'VALUE';
     // Get facet data and add to multidimensional $facets array
     // Start by getting XML for each FACET element,
     //  which has the name of the facet as an attribute.
     // We only get the first level of elements
     //   because child elements have a namespace prefix
     $facets = [];
     $facetSet = $sxe->xpath('//sear:FACET');
     if (empty($facetSet)) {
         if (!empty($sxe->JAGROOT->RESULT->FACETLIST)) {
             $facetSet = $sxe->JAGROOT->RESULT->FACETLIST->children($namespaces['sear']);
         }
     }
     foreach ($facetSet as $facetlist) {
         // Set first level of array with the facet name
         $facet_name = (string) $facetlist->attributes()->{$att};
         // Use the namespace prefix to get second level child elements
         //   (the facet values) out of $facetlist.
         $sear_facets = $facetlist->children($namespaces['sear']);
         foreach ($sear_facets as $facetvalues) {
             // Second level of the array is facet values and their counts
             $facet_key = (string) $facetvalues->attributes()->{$key};
             $facets[$facet_name][$facet_key] = (string) $facetvalues->attributes()->{$value};
         }
     }
     $didYouMean = [];
     $suggestions = $sxe->xpath('//sear:QUERYTRANSFORMS');
     foreach ($suggestions as $suggestion) {
         $didYouMean[] = (string) $suggestion->attributes()->QUERY;
     }
     return ['recordCount' => $totalhits, 'documents' => $items, 'facets' => $facets, 'didYouMean' => $didYouMean];
 }
Example #4
0
 private function _loadXML()
 {
     $feed = file_get_contents($this->_query_link);
     $xml = new SimpleXmlElement($feed);
     //Use that namespace
     $namespaces = $xml->getNameSpaces(true);
     //Now we don't have the URL hard-coded
     $dc = (array) $xml->children($namespaces['dc']);
     //load data to $this->this results
     foreach ($dc as $key => $value) {
         switch ($key) {
             case 'creator':
                 $this->_fetchAuthors($value);
                 break;
             case 'identifier':
                 $this->_fetchIds($value);
                 break;
             case 'title':
                 $this->_fetchTitle($value);
                 break;
             case 'publisher':
                 $this->_fetchPublisher($value);
                 break;
             case 'language':
                 $this->_fetchLanguage($value);
                 break;
             case 'description':
                 $this->_fetchDescription($value);
                 break;
             case 'subject':
                 $this->_fetchSubject($value);
                 break;
             case 'format':
                 $this->_fetchFormat($value);
                 break;
         }
     }
 }