/** * Break the MODS topic element text-node metadata on * the specified character and put into seperate MODS topic elements. * * @param string $xmlsnippet The initial MODS topic element. * * @param string $breakOnCharacter The charcter break the string. * The default character is the semicolon ';'. * * @return string * An XML string containing one or more MODS topic elements. */ public function breakTopicMetadaOnCharacter($xmlsnippet, $breakOnCharacter = ';') { // Break topic metadata on ; into seperate topic elements. $xml = new \DomDocument(); $xml->loadxml($xmlsnippet, LIBXML_NSCLEAN); $topicNode = $xml->getElementsByTagName('topic')->item(0); if (!is_object($topicNode)) { $xmlstring = $xmlsnippet; } else { $topictext = $topicNode->nodeValue; $topics = explode($breakOnCharacter, $topictext); // remove old topic node. $topicNodeParent = $topicNode->parentNode; $topicNode->parentNode->removeChild($topicNode); $subjectNode = $xml->getElementsByTagName($this->topLevelNodeName)->item(0); foreach ($topics as $topic) { $topic = trim($topic); $newtopicElement = $xml->createElement('topic'); $topictextNode = $xml->createTextNode($topic); $newtopicElement->appendChild($topictextNode); $subjectNode->appendChild($newtopicElement); unset($topictextNode); unset($newtopicElement); } $xmlstring = $xml->saveXML($subjectNode); } return $xmlstring; }
/** * General manipulate wrapper method. * * @param string $input An XML snippet to be manipulated. We are only interested * in <abstract> snippets. * * @return string * Manipulated string */ public function manipulate($input) { $dom = new \DomDocument(); $dom->loadxml($input, LIBXML_NSCLEAN); $abstracts = $dom->getElementsByTagName('abstract'); if ($abstracts->length == 1) { $abstract = $abstracts->item(0); // Use Guzzle to hit the API. $client = new Client(); try { $original_text = urlencode($abstract->nodeValue); $query = "?text={$original_text}&format=json"; $response = $client->get($this->arrpiUrl . $query); // If there is a Guzzle error, log it and return the original snippet. } catch (Exception $e) { $this->log->addWarning("PiratizeAbstract", array('HTTP request error' => $e->getMessage())); return $input; } $body = $response->getBody(); $translation = json_decode($body, true); $abstract->nodeValue = urldecode($translation['translation']['pirate']); // Log any instances where the translation differs from the original text. if (urldecode($original_text) != $abstract->nodeValue) { $this->log->addInfo("PiratizeAbstract", array('Record key' => $this->record_key, 'Source abstract text' => urldecode($original_text), 'Piratized abstract text' => $abstract->nodeValue)); } // We're done, so return the modified snippet. return $dom->saveXML($dom->documentElement); } else { return $input; } }
/** * General manipulate wrapper method. * * @param string $input The XML fragment to be manipulated. We are only * interested in the <extension><CSVData> fragment added in the * MIK mappings file. * * @return string * One of the manipulated XML fragment, the original input XML if the * input is not the fragment we are interested in, or an empty string, * which as the effect of removing the empty <extension><CSVData> * fragement from our MODS (if there was an error, for example, we don't * want empty extension elements in our MODS documents). */ public function manipulate($input) { $dom = new \DomDocument(); $dom->loadxml($input, LIBXML_NSCLEAN); // Test to see if the current fragment is <extension><CSVData>. $xpath = new \DOMXPath($dom); $csvdatas = $xpath->query("//extension/CSVData"); // There should only be one <CSVData> fragment in the incoming // XML. If there is 0 or more than 1, return the original. if ($csvdatas->length === 1) { $csvdata = $csvdatas->item(0); $csvid = $dom->createElement('id_in_csv', $this->record_key); $csvdata->appendChild($csvid); $timestamp = date("Y-m-d H:i:s"); // Add the <CSVRecord> element. $csvrecord = $dom->createElement('CSVRecord'); $now = $dom->createAttribute('timestamp'); $now->value = $timestamp; $csvrecord->appendChild($now); $mimetype = $dom->createAttribute('mimetype'); $mimetype->value = 'application/json'; $csvrecord->appendChild($mimetype); try { $metadata_path = $this->settings['FETCHER']['temp_directory'] . DIRECTORY_SEPARATOR . $this->record_key . '.metadata'; $metadata_contents = file_get_contents($metadata_path); $metadata_contents = unserialize($metadata_contents); $metadata_contents = json_encode($metadata_contents); } catch (Exception $e) { $message = "Problem creating <CSVRecord> element for object " . $this->record_key . ":" . $e->getMessage(); $this->log->addInfo("AddCsvData", array('CSV metadata warning' => $message)); return ''; } // If the metadata contains the CDATA end delimiter, log and return. if (preg_match('/\\]\\]>/', $metadata_contents)) { $message = "CSV metadata for object " . $this->record_key . ' contains the CDATA end delimiter ]]>'; $this->log->addInfo("AddCsvData", array('CSV metadata warning' => $message)); return ''; } // If we've made it this far, add the metadata to <CcvData> as // CDATA and return the modified XML fragment. if (strlen($metadata_contents)) { $cdata = $dom->createCDATASection($metadata_contents); $csvrecord->appendChild($cdata); $csvdata->appendChild($csvrecord); } return $dom->saveXML($dom->documentElement); } else { // If current fragment is not <extension><CSVData>, return it // unmodified. return $input; } }
/** * General manipulate wrapper method. * * @param string $input An XML snippet to be manipulated. * * @return string * Manipulated string */ public function manipulate($input) { $dom = new \DomDocument(); $dom->loadxml($input, LIBXML_NSCLEAN); // Test to see if the current fragment is the one identified in the config file. $xpath = new \DOMXPath($dom); $date_elements = $xpath->query('/originInfo/' . $this->destDateElement); // There should only be one target date element. if ($date_elements->length === 1) { // Get the child node, which we will repopulate below if its value // matches our regex. $date_element = $date_elements->item(0); // Get its parent so we can reconstruct it for sending back to the // metadata parser. $origin_info_element = $date_element->parentNode; $this->sourceDateFieldValue = $this->getSourceDateFieldValue(); // See if the value of the date field in the raw metadata matches our // pattern, and if it does, replace the value of the target MODS element // with a w3cdtf version of the date value. // @todo: When 'ca.'' is present, add 'qualifier' attribute with values 'approximate', // 'inferred', 'questionable'. Set a default (maybe configurable) date in this case? // Check for dates in \d\d-\d\d-\d\d\d\d. if (preg_match('/^(\\d\\d)\\-(\\d\\d)\\-(\\d\\d\\d\\d)$/', $this->sourceDateFieldValue, $matches)) { $date_element->nodeValue = $matches[3] . '-' . $matches[2] . '-' . $matches[1]; // Reassemble the parent and child elements. $origin_info_element->appendChild($date_element); // Convert the back to the snippet and return it. $this->logNormalization($this->sourceDateFieldValue, $origin_info_element, $dom); return $dom->saveXML($origin_info_element); } elseif (preg_match('/^(\\d\\d\\d\\d)\\s+(\\d\\d)\\s+(\\d\\d)$/', $this->sourceDateFieldValue, $matches)) { $date_element->nodeValue = $matches[1] . '-' . $matches[2] . '-' . $matches[3]; $origin_info_element->appendChild($date_element); $this->logNormalization($this->sourceDateFieldValue, $origin_info_element, $dom); return $dom->saveXML($origin_info_element); } elseif (!is_string($this->sourceDateFieldValue) || !strlen($this->sourceDateFieldValue)) { $this->log->addWarning("NormalizeDate", array('Record key' => $this->record_key, 'Message' => 'Source date value is empty or not a string')); return $input; } else { $this->log->addWarning("NormalizeDate", array('Record key' => $this->record_key, 'Source date value does not match any pattern' => $this->sourceDateFieldValue)); return $input; } } else { // If current fragment does not match our XPath expression, return it. return $input; } }
/** * General manipulate wrapper method. * * @param string $input The XML fragment to be manipulated. We are only * interested in the <identifier type="uuid"> fragment added in the * MIK mappings file. * * @return string * One of the manipulated XML fragment, the original input XML if the * input is not the fragment we are interested in. */ public function manipulate($input) { $dom = new \DomDocument(); $dom->loadxml($input, LIBXML_NSCLEAN); // Test to see if the current fragment is <identifier type="uuid">. $xpath = new \DOMXPath($dom); $uuid_identifiers = $xpath->query("//identifier[@type='uuid']"); // There should only be one <identifier type="uuid"/> fragment in the // incoming XML, defined in the mappings file. If there is 0, return // the original. if ($uuid_identifiers->length === 1) { $uuid_identifier = $uuid_identifiers->item(0); // If our incoming fragment is already a valid UUID v4, return it as // is. Note that if a identifier with type "uuid" already exists, this // manipulator will add a new one, since we are processing the MODS // on an element by element basis, not the entire MODS document. if (strlen($uuid_identifier->nodeValue) && preg_match('/^[0-9A-F]{8}-[0-9A-F]{4}-4[0-9A-F]{3}-[89AB][0-9A-F]{3}-[0-9A-F]{12}$/i', $uuid_identifier->nodeValue)) { $this->log->addError("AddUuidToMods", array('UUID already present' => $uuid_identifier->nodeValue)); return $input; } else { try { $uuid4 = Uuid::uuid4(); $uuid4_string = $uuid4->toString(); } catch (UnsatisfiedDependencyException $e) { // Log error and return $input. $this->log->addError("AddUuidToMods", array('UUID generation error' => $e->getMessage())); } $uuid_identifier->nodeValue = $uuid4_string; return $dom->saveXML($dom->documentElement); } } else { // If current fragment is not <identifier type="uuid">, // with or without a valid UUID v4 as a value, return it unmodified. return $input; } }
</item> <item date="3247283732"> <caption>Testing...</caption> <content> <![CDATA[ dies ist ein zweiter test... ]]> </content> </item> </news>'; $xsl2str = '<xsl:stylesheet version = "1.0" xmlns:xsl="http://www.w3.org/1999/XSL/Transform"> <xsl:template match="/"> <xsl:apply-templates /> </xsl:template> <xsl:template match="content"> <b><xsl:value-of select="." /></b> </xsl:template> '; $xml = new DomDocument(); $xml->loadxml($xmlstr); $xsl = new DomDocument(); $xsl->load("test1.xsl"); $xsl2 = new DomDocument(); $xsl2->load("test1-2.xsl"); $proc = new xsltprocessor(); $proc->importStyleSheet($xsl2); $proc->importStyleSheet($xsl); echo $proc->transformToXML($xml); echo '<br><br>'; printf("%s<br>Count: %d", md5("29o3_DEFAULT_THEME"), strlen(md5("test")));
/** * General manipulate wrapper method. * * @param string $input The XML fragment to be manipulated. We are only * interested in the <extension><CONTENTdmData> fragment added in the * MIK mappings file. * * @return string * One of the manipulated XML fragment, the original input XML if the * input is not the fragment we are interested in, or an empty string, * which as the effect of removing the empty <extension><CONTENTdmData> * fragement from our MODS (if there was an error, for example, we don't * want empty extension elements in our MODS documents). */ public function manipulate($input) { $dom = new \DomDocument(); $dom->loadxml($input, LIBXML_NSCLEAN); // Test to see if the current fragment is <extension><CONTENTdmData>. $xpath = new \DOMXPath($dom); $cdmdatas = $xpath->query("//extension/CONTENTdmData"); // There should only be one <CONTENTdmData> fragment in the incoming // XML. If there is 0 or more than 1, return the original. if ($cdmdatas->length === 1) { $contentdmdata = $cdmdatas->item(0); $alias = $dom->createElement('alias', $this->alias); $contentdmdata->appendChild($alias); $pointer = $dom->createElement('pointer', $this->record_key); $contentdmdata->appendChild($pointer); $timestamp = date("Y-m-d H:i:s"); // Add the <dmGetItemInfo> element. $dmGetItemInfo = $dom->createElement('dmGetItemInfo'); $now = $dom->createAttribute('timestamp'); $now->value = $timestamp; $dmGetItemInfo->appendChild($now); $mimetype = $dom->createAttribute('mimetype'); $mimetype->value = 'application/json'; $dmGetItemInfo->appendChild($mimetype); $source_url = $this->settings['METADATA_PARSER']['ws_url'] . 'dmGetItemInfo/' . $this->alias . '/' . $this->record_key . '/json'; $source = $dom->createAttribute('source'); $source->value = $source_url; $dmGetItemInfo->appendChild($source); $item_info = $this->getCdmData($this->alias, $this->record_key, 'dmGetItemInfo', 'json'); // CONTENTdm returns a 200 OK with its error messages, so we can't rely // on catching all 'errors' with the above try/catch block. Instead, we // check to see if the string 'dmcreated' (one of the metadata fields // returned for every object) is in the response body. If it's not, // assume CONTENTdm has returned an error of some sort, log it, and // return. if (!preg_match('/dmcreated/', $item_info)) { $this->log->addInfo("AddContentdmData", array('CONTENTdm internal error' => $item_info)); return ''; } // If the CONTENTdm metadata contains the CDATA end delimiter, log and return. if (preg_match('/\\]\\]>/', $item_info)) { $message = "CONTENTdm metadata for object " . $this->settings['METADATA_PARSER']['alias'] . '/' . $this->record_key . ' contains the CDATA end delimiter ]]>'; $this->log->addInfo("AddContentdmData", array('CONTENTdm metadata warning' => $message)); return ''; } // If we've made it this far, add the output of dmGetItemInfo to <CONTENTdmData> as // CDATA and return the modified XML fragment. if (strlen($item_info)) { $cdata = $dom->createCDATASection($item_info); $dmGetItemInfo->appendChild($cdata); $contentdmdata->appendChild($dmGetItemInfo); } // Add the <dmCompoundObjectInfo> element. $dmGetCompoundObjectInfo = $dom->createElement('dmGetCompoundObjectInfo'); $now = $dom->createAttribute('timestamp'); $now->value = $timestamp; $dmGetCompoundObjectInfo->appendChild($now); $mimetype = $dom->createAttribute('mimetype'); $mimetype->value = 'text/xml'; $dmGetCompoundObjectInfo->appendChild($mimetype); $source = $dom->createAttribute('source'); $source_url = $this->settings['METADATA_PARSER']['ws_url'] . 'dmGetCompoundObjectInfo/' . $this->alias . '/' . $this->record_key . '/xml'; $source->value = $source_url; $dmGetCompoundObjectInfo->appendChild($source); $compound_object_info = $this->getCdmData($this->alias, $this->record_key, 'dmGetCompoundObjectInfo', 'xml'); // Only add the <dmGetCompoundObjectInfo> element if the object is compound. if (strlen($compound_object_info) && preg_match('/<cpd>/', $compound_object_info)) { $cdata = $dom->createCDATASection($compound_object_info); $dmGetCompoundObjectInfo->appendChild($cdata); $contentdmdata->appendChild($dmGetCompoundObjectInfo); } // Add the <GetParent> element. $GetParent = $dom->createElement('GetParent'); $now = $dom->createAttribute('timestamp'); $now->value = $timestamp; $GetParent->appendChild($now); $mimetype = $dom->createAttribute('mimetype'); $mimetype->value = 'text/xml'; $GetParent->appendChild($mimetype); $source = $dom->createAttribute('source'); $source_url = $this->settings['METADATA_PARSER']['ws_url'] . 'GetParent/' . $this->alias . '/' . $this->record_key . '/xml'; $source->value = $source_url; $GetParent->appendChild($source); $parent_info = $this->getCdmData($this->alias, $this->record_key, 'GetParent', 'xml'); // Only add the <GetParent> element if the object has a parent // pointer of not -1. if (strlen($parent_info) && !preg_match('/\\-1/', $parent_info)) { $cdata = $dom->createCDATASection($parent_info); $GetParent->appendChild($cdata); $contentdmdata->appendChild($GetParent); } return $dom->saveXML($dom->documentElement); } else { // If current fragment is not <extension><CONTENTdmData>, return it // unmodified. return $input; } }