Exemple #1
0
 /**
  * Break the MODS topic element text-node metadata on 
  * the specified character and put into seperate MODS topic elements.
  *
  * @param string $xmlsnippet The initial MODS topic element.
  *
  * @param string $breakOnCharacter The charcter break the string.
  *     The default character is the semicolon ';'.
  *
  * @return string
  *     An XML string containing one or more MODS topic elements.
  */
 public function breakTopicMetadaOnCharacter($xmlsnippet, $breakOnCharacter = ';')
 {
     // Break topic metadata on ; into seperate topic elements.
     $xml = new \DomDocument();
     $xml->loadxml($xmlsnippet, LIBXML_NSCLEAN);
     $topicNode = $xml->getElementsByTagName('topic')->item(0);
     if (!is_object($topicNode)) {
         $xmlstring = $xmlsnippet;
     } else {
         $topictext = $topicNode->nodeValue;
         $topics = explode($breakOnCharacter, $topictext);
         // remove old topic node.
         $topicNodeParent = $topicNode->parentNode;
         $topicNode->parentNode->removeChild($topicNode);
         $subjectNode = $xml->getElementsByTagName($this->topLevelNodeName)->item(0);
         foreach ($topics as $topic) {
             $topic = trim($topic);
             $newtopicElement = $xml->createElement('topic');
             $topictextNode = $xml->createTextNode($topic);
             $newtopicElement->appendChild($topictextNode);
             $subjectNode->appendChild($newtopicElement);
             unset($topictextNode);
             unset($newtopicElement);
         }
         $xmlstring = $xml->saveXML($subjectNode);
     }
     return $xmlstring;
 }
Exemple #2
0
 /**
  * General manipulate wrapper method.
  *
  * @param string $input An XML snippet to be manipulated. We are only interested
  *    in <abstract> snippets.
  *
  * @return string
  *     Manipulated string
  */
 public function manipulate($input)
 {
     $dom = new \DomDocument();
     $dom->loadxml($input, LIBXML_NSCLEAN);
     $abstracts = $dom->getElementsByTagName('abstract');
     if ($abstracts->length == 1) {
         $abstract = $abstracts->item(0);
         // Use Guzzle to hit the API.
         $client = new Client();
         try {
             $original_text = urlencode($abstract->nodeValue);
             $query = "?text={$original_text}&format=json";
             $response = $client->get($this->arrpiUrl . $query);
             // If there is a Guzzle error, log it and return the original snippet.
         } catch (Exception $e) {
             $this->log->addWarning("PiratizeAbstract", array('HTTP request error' => $e->getMessage()));
             return $input;
         }
         $body = $response->getBody();
         $translation = json_decode($body, true);
         $abstract->nodeValue = urldecode($translation['translation']['pirate']);
         // Log any instances where the translation differs from the original text.
         if (urldecode($original_text) != $abstract->nodeValue) {
             $this->log->addInfo("PiratizeAbstract", array('Record key' => $this->record_key, 'Source abstract text' => urldecode($original_text), 'Piratized abstract text' => $abstract->nodeValue));
         }
         // We're done, so return the modified snippet.
         return $dom->saveXML($dom->documentElement);
     } else {
         return $input;
     }
 }
Exemple #3
0
 /**
  * General manipulate wrapper method.
  *
  *  @param string $input The XML fragment to be manipulated. We are only
  *     interested in the <extension><CSVData> fragment added in the
  *     MIK mappings file.
  *
  * @return string
  *     One of the manipulated XML fragment, the original input XML if the
  *     input is not the fragment we are interested in, or an empty string,
  *     which as the effect of removing the empty <extension><CSVData>
  *     fragement from our MODS (if there was an error, for example, we don't
  *     want empty extension elements in our MODS documents).
  */
 public function manipulate($input)
 {
     $dom = new \DomDocument();
     $dom->loadxml($input, LIBXML_NSCLEAN);
     // Test to see if the current fragment is <extension><CSVData>.
     $xpath = new \DOMXPath($dom);
     $csvdatas = $xpath->query("//extension/CSVData");
     // There should only be one <CSVData> fragment in the incoming
     // XML. If there is 0 or more than 1, return the original.
     if ($csvdatas->length === 1) {
         $csvdata = $csvdatas->item(0);
         $csvid = $dom->createElement('id_in_csv', $this->record_key);
         $csvdata->appendChild($csvid);
         $timestamp = date("Y-m-d H:i:s");
         // Add the <CSVRecord> element.
         $csvrecord = $dom->createElement('CSVRecord');
         $now = $dom->createAttribute('timestamp');
         $now->value = $timestamp;
         $csvrecord->appendChild($now);
         $mimetype = $dom->createAttribute('mimetype');
         $mimetype->value = 'application/json';
         $csvrecord->appendChild($mimetype);
         try {
             $metadata_path = $this->settings['FETCHER']['temp_directory'] . DIRECTORY_SEPARATOR . $this->record_key . '.metadata';
             $metadata_contents = file_get_contents($metadata_path);
             $metadata_contents = unserialize($metadata_contents);
             $metadata_contents = json_encode($metadata_contents);
         } catch (Exception $e) {
             $message = "Problem creating <CSVRecord> element for object " . $this->record_key . ":" . $e->getMessage();
             $this->log->addInfo("AddCsvData", array('CSV metadata warning' => $message));
             return '';
         }
         // If the metadata contains the CDATA end delimiter, log and return.
         if (preg_match('/\\]\\]>/', $metadata_contents)) {
             $message = "CSV metadata for object " . $this->record_key . ' contains the CDATA end delimiter ]]>';
             $this->log->addInfo("AddCsvData", array('CSV metadata warning' => $message));
             return '';
         }
         // If we've made it this far, add the metadata to <CcvData> as
         // CDATA and return the modified XML fragment.
         if (strlen($metadata_contents)) {
             $cdata = $dom->createCDATASection($metadata_contents);
             $csvrecord->appendChild($cdata);
             $csvdata->appendChild($csvrecord);
         }
         return $dom->saveXML($dom->documentElement);
     } else {
         // If current fragment is not <extension><CSVData>, return it
         // unmodified.
         return $input;
     }
 }
Exemple #4
0
 /**
  * General manipulate wrapper method.
  *
  * @param string $input An XML snippet to be manipulated.
  *
  * @return string
  *     Manipulated string
  */
 public function manipulate($input)
 {
     $dom = new \DomDocument();
     $dom->loadxml($input, LIBXML_NSCLEAN);
     // Test to see if the current fragment is the one identified in the config file.
     $xpath = new \DOMXPath($dom);
     $date_elements = $xpath->query('/originInfo/' . $this->destDateElement);
     // There should only be one target date element.
     if ($date_elements->length === 1) {
         // Get the child node, which we will repopulate below if its value
         // matches our regex.
         $date_element = $date_elements->item(0);
         // Get its parent so we can reconstruct it for sending back to the
         // metadata parser.
         $origin_info_element = $date_element->parentNode;
         $this->sourceDateFieldValue = $this->getSourceDateFieldValue();
         // See if the value of the date field in the raw metadata matches our
         // pattern, and if it does, replace the value of the target MODS element
         // with a w3cdtf version of the date value.
         // @todo: When 'ca.'' is present, add 'qualifier' attribute with values 'approximate',
         // 'inferred', 'questionable'. Set a default (maybe configurable) date in this case?
         // Check for dates in \d\d-\d\d-\d\d\d\d.
         if (preg_match('/^(\\d\\d)\\-(\\d\\d)\\-(\\d\\d\\d\\d)$/', $this->sourceDateFieldValue, $matches)) {
             $date_element->nodeValue = $matches[3] . '-' . $matches[2] . '-' . $matches[1];
             // Reassemble the parent and child elements.
             $origin_info_element->appendChild($date_element);
             // Convert the back to the snippet and return it.
             $this->logNormalization($this->sourceDateFieldValue, $origin_info_element, $dom);
             return $dom->saveXML($origin_info_element);
         } elseif (preg_match('/^(\\d\\d\\d\\d)\\s+(\\d\\d)\\s+(\\d\\d)$/', $this->sourceDateFieldValue, $matches)) {
             $date_element->nodeValue = $matches[1] . '-' . $matches[2] . '-' . $matches[3];
             $origin_info_element->appendChild($date_element);
             $this->logNormalization($this->sourceDateFieldValue, $origin_info_element, $dom);
             return $dom->saveXML($origin_info_element);
         } elseif (!is_string($this->sourceDateFieldValue) || !strlen($this->sourceDateFieldValue)) {
             $this->log->addWarning("NormalizeDate", array('Record key' => $this->record_key, 'Message' => 'Source date value is empty or not a string'));
             return $input;
         } else {
             $this->log->addWarning("NormalizeDate", array('Record key' => $this->record_key, 'Source date value does not match any pattern' => $this->sourceDateFieldValue));
             return $input;
         }
     } else {
         // If current fragment does not match our XPath expression, return it.
         return $input;
     }
 }
Exemple #5
0
 /**
  * General manipulate wrapper method.
  *
  *  @param string $input The XML fragment to be manipulated. We are only
  *     interested in the <identifier type="uuid"> fragment added in the
  *     MIK mappings file.
  *
  * @return string
  *     One of the manipulated XML fragment, the original input XML if the
  *     input is not the fragment we are interested in.
  */
 public function manipulate($input)
 {
     $dom = new \DomDocument();
     $dom->loadxml($input, LIBXML_NSCLEAN);
     // Test to see if the current fragment is <identifier type="uuid">.
     $xpath = new \DOMXPath($dom);
     $uuid_identifiers = $xpath->query("//identifier[@type='uuid']");
     // There should only be one <identifier type="uuid"/> fragment in the
     // incoming XML, defined in the mappings file. If there is 0, return
     // the original.
     if ($uuid_identifiers->length === 1) {
         $uuid_identifier = $uuid_identifiers->item(0);
         // If our incoming fragment is already a valid UUID v4, return it as
         // is. Note that if a identifier with type "uuid" already exists, this
         // manipulator will add a new one, since we are processing the MODS
         // on an element by element basis, not the entire MODS document.
         if (strlen($uuid_identifier->nodeValue) && preg_match('/^[0-9A-F]{8}-[0-9A-F]{4}-4[0-9A-F]{3}-[89AB][0-9A-F]{3}-[0-9A-F]{12}$/i', $uuid_identifier->nodeValue)) {
             $this->log->addError("AddUuidToMods", array('UUID already present' => $uuid_identifier->nodeValue));
             return $input;
         } else {
             try {
                 $uuid4 = Uuid::uuid4();
                 $uuid4_string = $uuid4->toString();
             } catch (UnsatisfiedDependencyException $e) {
                 // Log error and return $input.
                 $this->log->addError("AddUuidToMods", array('UUID generation error' => $e->getMessage()));
             }
             $uuid_identifier->nodeValue = $uuid4_string;
             return $dom->saveXML($dom->documentElement);
         }
     } else {
         // If current fragment is not <identifier type="uuid">,
         // with or without a valid UUID v4 as a value, return it unmodified.
         return $input;
     }
 }
	</item>
	<item date="3247283732">
		<caption>Testing...</caption>
		<content>
		<![CDATA[
		dies ist ein zweiter test...
		]]>
		</content>
	</item>
</news>';
$xsl2str = '<xsl:stylesheet version = "1.0" xmlns:xsl="http://www.w3.org/1999/XSL/Transform">
<xsl:template match="/">
	<xsl:apply-templates />
</xsl:template>

<xsl:template match="content">
	<b><xsl:value-of select="." /></b>
</xsl:template>
';
$xml = new DomDocument();
$xml->loadxml($xmlstr);
$xsl = new DomDocument();
$xsl->load("test1.xsl");
$xsl2 = new DomDocument();
$xsl2->load("test1-2.xsl");
$proc = new xsltprocessor();
$proc->importStyleSheet($xsl2);
$proc->importStyleSheet($xsl);
echo $proc->transformToXML($xml);
echo '<br><br>';
printf("%s<br>Count: %d", md5("29o3_DEFAULT_THEME"), strlen(md5("test")));
Exemple #7
0
 /**
  * General manipulate wrapper method.
  *
  *  @param string $input The XML fragment to be manipulated. We are only
  *     interested in the <extension><CONTENTdmData> fragment added in the
  *     MIK mappings file.
  *
  * @return string
  *     One of the manipulated XML fragment, the original input XML if the
  *     input is not the fragment we are interested in, or an empty string,
  *     which as the effect of removing the empty <extension><CONTENTdmData>
  *     fragement from our MODS (if there was an error, for example, we don't
  *     want empty extension elements in our MODS documents).
  */
 public function manipulate($input)
 {
     $dom = new \DomDocument();
     $dom->loadxml($input, LIBXML_NSCLEAN);
     // Test to see if the current fragment is <extension><CONTENTdmData>.
     $xpath = new \DOMXPath($dom);
     $cdmdatas = $xpath->query("//extension/CONTENTdmData");
     // There should only be one <CONTENTdmData> fragment in the incoming
     // XML. If there is 0 or more than 1, return the original.
     if ($cdmdatas->length === 1) {
         $contentdmdata = $cdmdatas->item(0);
         $alias = $dom->createElement('alias', $this->alias);
         $contentdmdata->appendChild($alias);
         $pointer = $dom->createElement('pointer', $this->record_key);
         $contentdmdata->appendChild($pointer);
         $timestamp = date("Y-m-d H:i:s");
         // Add the <dmGetItemInfo> element.
         $dmGetItemInfo = $dom->createElement('dmGetItemInfo');
         $now = $dom->createAttribute('timestamp');
         $now->value = $timestamp;
         $dmGetItemInfo->appendChild($now);
         $mimetype = $dom->createAttribute('mimetype');
         $mimetype->value = 'application/json';
         $dmGetItemInfo->appendChild($mimetype);
         $source_url = $this->settings['METADATA_PARSER']['ws_url'] . 'dmGetItemInfo/' . $this->alias . '/' . $this->record_key . '/json';
         $source = $dom->createAttribute('source');
         $source->value = $source_url;
         $dmGetItemInfo->appendChild($source);
         $item_info = $this->getCdmData($this->alias, $this->record_key, 'dmGetItemInfo', 'json');
         // CONTENTdm returns a 200 OK with its error messages, so we can't rely
         // on catching all 'errors' with the above try/catch block. Instead, we
         // check to see if the string 'dmcreated' (one of the metadata fields
         // returned for every object) is in the response body. If it's not,
         // assume CONTENTdm has returned an error of some sort, log it, and
         // return.
         if (!preg_match('/dmcreated/', $item_info)) {
             $this->log->addInfo("AddContentdmData", array('CONTENTdm internal error' => $item_info));
             return '';
         }
         // If the CONTENTdm metadata contains the CDATA end delimiter, log and return.
         if (preg_match('/\\]\\]>/', $item_info)) {
             $message = "CONTENTdm metadata for object " . $this->settings['METADATA_PARSER']['alias'] . '/' . $this->record_key . ' contains the CDATA end delimiter ]]>';
             $this->log->addInfo("AddContentdmData", array('CONTENTdm metadata warning' => $message));
             return '';
         }
         // If we've made it this far, add the output of dmGetItemInfo to <CONTENTdmData> as
         // CDATA and return the modified XML fragment.
         if (strlen($item_info)) {
             $cdata = $dom->createCDATASection($item_info);
             $dmGetItemInfo->appendChild($cdata);
             $contentdmdata->appendChild($dmGetItemInfo);
         }
         // Add the <dmCompoundObjectInfo> element.
         $dmGetCompoundObjectInfo = $dom->createElement('dmGetCompoundObjectInfo');
         $now = $dom->createAttribute('timestamp');
         $now->value = $timestamp;
         $dmGetCompoundObjectInfo->appendChild($now);
         $mimetype = $dom->createAttribute('mimetype');
         $mimetype->value = 'text/xml';
         $dmGetCompoundObjectInfo->appendChild($mimetype);
         $source = $dom->createAttribute('source');
         $source_url = $this->settings['METADATA_PARSER']['ws_url'] . 'dmGetCompoundObjectInfo/' . $this->alias . '/' . $this->record_key . '/xml';
         $source->value = $source_url;
         $dmGetCompoundObjectInfo->appendChild($source);
         $compound_object_info = $this->getCdmData($this->alias, $this->record_key, 'dmGetCompoundObjectInfo', 'xml');
         // Only add the <dmGetCompoundObjectInfo> element if the object is compound.
         if (strlen($compound_object_info) && preg_match('/<cpd>/', $compound_object_info)) {
             $cdata = $dom->createCDATASection($compound_object_info);
             $dmGetCompoundObjectInfo->appendChild($cdata);
             $contentdmdata->appendChild($dmGetCompoundObjectInfo);
         }
         // Add the <GetParent> element.
         $GetParent = $dom->createElement('GetParent');
         $now = $dom->createAttribute('timestamp');
         $now->value = $timestamp;
         $GetParent->appendChild($now);
         $mimetype = $dom->createAttribute('mimetype');
         $mimetype->value = 'text/xml';
         $GetParent->appendChild($mimetype);
         $source = $dom->createAttribute('source');
         $source_url = $this->settings['METADATA_PARSER']['ws_url'] . 'GetParent/' . $this->alias . '/' . $this->record_key . '/xml';
         $source->value = $source_url;
         $GetParent->appendChild($source);
         $parent_info = $this->getCdmData($this->alias, $this->record_key, 'GetParent', 'xml');
         // Only add the <GetParent> element if the object has a parent
         // pointer of not -1.
         if (strlen($parent_info) && !preg_match('/\\-1/', $parent_info)) {
             $cdata = $dom->createCDATASection($parent_info);
             $GetParent->appendChild($cdata);
             $contentdmdata->appendChild($GetParent);
         }
         return $dom->saveXML($dom->documentElement);
     } else {
         // If current fragment is not <extension><CONTENTdmData>, return it
         // unmodified.
         return $input;
     }
 }