public static function parse($doc, $options = array()) { $terms = array(); libxml_use_internal_errors(true); // Report XML errors if (!$doc) { foreach (libxml_get_errors() as $error) { //TODO echo errors in template. Use custom validator? var_dump($error); } } $skos = new sfSkosPlugin(); $skos->xpath = new DOMXPath($doc); // Create Xpath object, register namespaces $skos->xpath->registerNamespace('rdf', 'http://www.w3.org/1999/02/22-rdf-syntax-ns#'); $skos->xpath->registerNamespace('skos', 'http://www.w3.org/2004/02/skos/core#'); $skos->xpath->registerNamespace('dc', 'http://purl.org/dc/elements/1.1/'); // Set taxonomy $skos->taxonomy = QubitTaxonomy::getById(QubitTaxonomy::SUBJECT_ID); if (isset($options['taxonomy'])) { $skos->taxonomy = $options['taxonomy']; } $skos->parent = QubitTerm::getById(QubitTerm::ROOT_ID); if (isset($options['parent'])) { $skos->parent = $options['parent']; } // XPath selector for expanded RDF syntax $rdfsel = "rdf:Description[rdf:type[@rdf:resource='http://www.w3.org/2004/02/skos/core#Concept']]"; // Get all concepts $concepts = $skos->xpath->query("skos:Concept | {$rdfsel}"); // Create terms from concepts foreach ($concepts as $concept) { if (!$concept instanceof domElement) { continue; } $skos->addTerm($concept); } // Built term associations (including hierarchy) foreach ($concepts as $concept) { if (!$concept instanceof domElement) { continue; } // Add parent if (0 < $skos->xpath->query('./skos:broader', $concept)->length) { $skos->setParent($concept); } // Add children if (0 < $skos->xpath->query('./skos:narrower', $concept)->length) { $skos->setChildren($concept); } // Add relations if (0 < $skos->xpath->query('./skos:related', $concept)->length) { $skos->addTermRelations($concept); } } return $skos; }
public function execute($request) { $this->form = new sfForm(); $this->timer = new QubitTimer(); $this->terms = array(); $this->termsPerPage = intval(sfConfig::get('app_hits_per_page')); $this->taxonomy = null; $this->parent = QubitTerm::getById(QubitTerm::ROOT_ID); if (isset($this->getRoute()->resource)) { $resource = $this->getRoute()->resource; if ('QubitTaxonomy' == $resource->className) { $this->taxonomy = QubitTaxonomy::getById($resource->id); } else { $this->parent = QubitTerm::getById($resource->id); $this->taxonomy = $this->parent->taxonomy; } } if (!isset($this->taxonomy)) { $this->forward404(); } // Check user authorization if (!QubitAcl::check($this->parent, 'create')) { QubitAcl::forwardUnauthorized(); } $this->form->setWidget('file', new sfWidgetFormInputFile()); $this->form->setValidator('file', new sfValidatorFile()); if ($request->isMethod('post')) { $this->form->bind($request->getPostParameters(), $request->getFiles()); if ($this->form->isValid()) { if (null !== ($file = $this->form->getValue('file'))) { $doc = new domDocument(); $doc->substituteEntities = true; $doc->load($file->getTempName()); $this->skos = sfSkosPlugin::parse($doc, array('taxonomy' => $this->taxonomy, 'parent' => $this->parent)); } } } else { $this->setTemplate('importSelect'); } }
public function import($xmlFile, $options = array()) { // load the XML document into a DOMXML object $importDOM = $this->loadXML($xmlFile, $options); // if we were unable to parse the XML file at all if (empty($importDOM->documentElement)) { $errorMsg = sfContext::getInstance()->i18n->__('Unable to parse XML file: malformed or unresolvable entities'); throw new Exception($errorMsg); } // if libxml threw errors, populate them to show in the template if ($importDOM->libxmlerrors) { // warning condition, XML file has errors (perhaps not well-formed or invalid?) foreach ($importDOM->libxmlerrors as $libxmlerror) { $xmlerrors[] = sfContext::getInstance()->i18n->__('libxml error %code% on line %line% in input file: %message%', array('%code%' => $libxmlerror->code, '%message%' => $libxmlerror->message, '%line%' => $libxmlerror->line)); } $this->errors = array_merge((array) $this->errors, $xmlerrors); } if ('eac-cpf' == $importDOM->documentElement->tagName) { $this->rootObject = new QubitActor(); $this->rootObject->parentId = QubitActor::ROOT_ID; $eac = new sfEacPlugin($this->rootObject); $eac->parse($importDOM); $this->rootObject->save(); return $this; } // FIXME hardcoded until we decide how these will be developed $validSchemas = array('+//ISBN 1-931666-00-8//DTD ead.dtd Encoded Archival Description (EAD) Version 2002//EN' => 'ead', '-//Society of American Archivists//DTD ead.dtd (Encoded Archival Description (EAD) Version 1.0)//EN' => 'ead1', 'http://www.loc.gov/METS/' => 'mets', 'http://www.loc.gov/mods/' => 'mods', 'http://www.loc.gov/MARC21/slim' => 'marc', 'record' => 'oai_dc_record', 'dc' => 'dc', 'oai_dc:dc' => 'dc', 'dublinCore' => 'dc', 'metadata' => 'dc', 'ead' => 'ead', 'add' => 'alouette', 'http://www.w3.org/2004/02/skos/core#' => 'skos'); // determine what kind of schema we're trying to import $schemaDescriptors = array($importDOM->documentElement->tagName); if (!empty($importDOM->namespaces)) { krsort($importDOM->namespaces); $schemaDescriptors = array_merge($schemaDescriptors, $importDOM->namespaces); } if (!empty($importDOM->doctype)) { $schemaDescriptors = array_merge($schemaDescriptors, array($importDOM->doctype->name, $importDOM->doctype->systemId, $importDOM->doctype->publicId)); } foreach ($schemaDescriptors as $descriptor) { if (array_key_exists($descriptor, $validSchemas)) { $importSchema = $validSchemas[$descriptor]; } } switch ($importSchema) { case 'ead': // just validate EAD import for now until we can get StrictXMLParsing working for all schemas in the self::LoadXML function. Having problems right now loading schemas. $importDOM->validate(); // if libxml threw errors, populate them to show in the template foreach (libxml_get_errors() as $libxmlerror) { $this->errors[] = sfContext::getInstance()->i18n->__('libxml error %code% on line %line% in input file: %message%', array('%code%' => $libxmlerror->code, '%message%' => $libxmlerror->message, '%line%' => $libxmlerror->line)); } break; case 'skos': $criteria = new Criteria(); $criteria->add(QubitSetting::NAME, 'plugins'); $setting = QubitSetting::getOne($criteria); if (null === $setting || !in_array('sfSkosPlugin', unserialize($setting->getValue(array('sourceCulture' => true))))) { throw new sfException(sfContext::getInstance()->i18n->__('The SKOS plugin is not enabled')); } $importTerms = sfSkosPlugin::parse($importDOM); $this->rootObject = QubitTaxonomy::getById(QubitTaxonomy::SUBJECT_ID); $this->count = count($importTerms); return $this; break; } $importMap = sfConfig::get('sf_app_module_dir') . DIRECTORY_SEPARATOR . 'object' . DIRECTORY_SEPARATOR . 'config' . DIRECTORY_SEPARATOR . 'import' . DIRECTORY_SEPARATOR . $importSchema . '.yml'; if (!file_exists($importMap)) { // error condition, unknown schema or no import filter $errorMsg = sfContext::getInstance()->i18n->__('Unknown schema or import format: "%format%"', array('%format%' => $importSchema)); throw new Exception($errorMsg); } $this->schemaMap = sfYaml::load($importMap); // if XSLs are specified in the mapping, process them if (!empty($this->schemaMap['processXSLT'])) { // pre-filter through XSLs in order foreach ((array) $this->schemaMap['processXSLT'] as $importXSL) { $importXSL = sfConfig::get('sf_data_dir') . DIRECTORY_SEPARATOR . 'xslt' . DIRECTORY_SEPARATOR . $importXSL; if (file_exists($importXSL)) { // instantiate an XSLT parser $xslDOM = new DOMDocument(); $xslDOM->load($importXSL); // Configure the transformer $xsltProc = new XSLTProcessor(); $xsltProc->registerPHPFunctions(); $xsltProc->importStyleSheet($xslDOM); $importDOM->loadXML($xsltProc->transformToXML($importDOM)); unset($xslDOM); unset($xsltProc); } else { $this->errors[] = sfContext::getInstance()->i18n->__('Unable to load import XSL filter: "%importXSL%"', array('%importXSL%' => $importXSL)); } } // re-initialize xpath on the new XML $importDOM->xpath = new DOMXPath($importDOM); } // switch source culture if langusage is set in an EAD document if ($importSchema == 'ead') { if (is_object($langusage = $importDOM->xpath->query('//eadheader/profiledesc/langusage/language/@langcode'))) { $sf_user = sfContext::getInstance()->user; $currentCulture = $sf_user->getCulture(); $langCodeConvertor = new fbISO639_Map(); foreach ($langusage as $language) { $isocode = trim(preg_replace('/[\\n\\r\\s]+/', ' ', $language->nodeValue)); // convert to Symfony culture code if (!($twoCharCode = strtolower($langCodeConvertor->getID2($isocode)))) { $twoCharCode = $isocode; } // Check to make sure that the selected language is supported with a Symfony i18n data file. // If not it will cause a fatal error in the Language List component on every response. ProjectConfiguration::getActive()->loadHelpers('I18N'); try { format_language($twoCharCode, $twoCharCode); } catch (Exception $e) { $this->errors[] = sfContext::getInstance()->i18n->__('EAD "langmaterial" is set to') . ': "' . $isocode . '". ' . sfContext::getInstance()->i18n->__('This language is currently not supported.'); continue; } if ($currentCulture !== $twoCharCode) { $this->errors[] = sfContext::getInstance()->i18n->__('EAD "langmaterial" is set to') . ': "' . $isocode . '" (' . format_language($twoCharCode, 'en') . '). ' . sfContext::getInstance()->i18n->__('Your XML document has been saved in this language and your user interface has just been switched to this language.'); } $sf_user->setCulture($twoCharCode); // can only set to one language, so have to break once the first valid language is encountered break; } } } unset($this->schemaMap['processXSLT']); // go through schema map and populate objects/properties foreach ($this->schemaMap as $name => $mapping) { // if object is not defined or a valid class, we can't process this mapping if (empty($mapping['Object']) || !class_exists('Qubit' . $mapping['Object'])) { $this->errors[] = sfContext::getInstance()->i18n->__('Non-existent class defined in import mapping: "%class%"', array('%class%' => 'Qubit' . $mapping['Object'])); continue; } // get a list of XML nodes to process $nodeList = $importDOM->xpath->query($mapping['XPath']); foreach ($nodeList as $domNode) { // create a new object $class = 'Qubit' . $mapping['Object']; $currentObject = new $class(); // set the rootObject to use for initial display in successful import if (!$this->rootObject) { $this->rootObject = $currentObject; } // if a parent path is specified, try to parent the node if (empty($mapping['Parent'])) { $parentNodes = new DOMNodeList(); } else { $parentNodes = $importDOM->xpath->query('(' . $mapping['Parent'] . ')', $domNode); } if ($parentNodes->length > 0) { // parent ID comes from last node in the list because XPath forces forward document order $parentId = $parentNodes->item($parentNodes->length - 1)->getAttribute('xml:id'); unset($parentNodes); if (!empty($parentId) && is_callable(array($currentObject, 'setParentId'))) { $currentObject->parentId = $parentId; } } else { // orphaned object, set root if possible if (is_callable(array($currentObject, 'setRoot'))) { $currentObject->setRoot(); } } // go through methods and populate properties foreach ($mapping['Methods'] as $name => $methodMap) { // if method is not defined, we can't process this mapping if (empty($methodMap['Method']) || !is_callable(array($currentObject, $methodMap['Method']))) { $this->errors[] = sfContext::getInstance()->i18n->__('Non-existent method defined in import mapping: "%method%"', array('%method%' => $methodMap['Method'])); continue; } // get a list of XML nodes to process $nodeList2 = $importDOM->xpath->query($methodMap['XPath'], $domNode); if (is_object($nodeList2)) { switch ($name) { // hack: some multi-value elements (e.g. 'languages') need to get passed as one array instead of individual nodes values case 'languages': $langCodeConvertor = new fbISO639_Map(); $value = array(); foreach ($nodeList2 as $nodeee) { if ($twoCharCode = $langCodeConvertor->getID2($nodeee->nodeValue)) { $value[] = strtolower($twoCharCode); } else { $value[] = $nodeee->nodeValue; } } $currentObject->language = $value; break; case 'flocat': $resources = array(); foreach ($nodeList2 as $nodeee) { $resources[] = $nodeee->nodeValue; } if (0 < count($resources)) { $currentObject->importDigitalObjectFromUri($resources); } break; default: foreach ($nodeList2 as $domNode2) { // normalize the node text (trim whitespace manually); NB: this will strip any child elements, eg. HTML tags $nodeValue = trim(preg_replace('/[\\n\\r\\s]+/', ' ', $domNode2->nodeValue)); // if you want the full XML from the node, use this $nodeXML = $domNode2->ownerDocument->saveXML($domNode2); // set the parameters for the method call if (empty($methodMap['Parameters'])) { $parameters = array($nodeValue); } else { $parameters = array(); foreach ((array) $methodMap['Parameters'] as $parameter) { // if the parameter begins with %, evaluate it as an XPath expression relative to the current node if ('%' == substr($parameter, 0, 1)) { // evaluate the XPath expression $xPath = substr($parameter, 1); $result = $importDOM->xpath->query($xPath, $domNode2); if ($result->length > 1) { // convert nodelist into an array foreach ($result as $element) { $resultArray[] = $element->nodeValue; } $parameters[] = $resultArray; } else { // pass the node value unaltered; this provides an alternative to $nodeValue above $parameters[] = $result->item(0)->nodeValue; } } else { // Confirm DOMXML node exists to avoid warnings at run-time if (false !== preg_match_all('/\\$importDOM->xpath->query\\(\'@\\w+\', \\$domNode2\\)->item\\(0\\)->nodeValue/', $parameter, $matches)) { foreach ($matches[0] as $match) { $str = str_replace('->nodeValue', '', $match); if (null !== ($node = eval('return ' . $str . ';'))) { // Substitute node value for search string $parameter = str_replace($match, '\'' . $node->nodeValue . '\'', $parameter); } else { // Replace empty nodes with null in parameter string $parameter = str_replace($match, 'null', $parameter); } } } eval('$parameters[] = ' . $parameter . ';'); } } } // invoke the object and method defined in the schema map call_user_func_array(array(&$currentObject, $methodMap['Method']), $parameters); } } unset($nodeList2); } } // make sure we have a publication status set before indexing if ($currentObject instanceof QubitInformationObject && count($currentObject->statuss) == 0) { $currentObject->setPublicationStatus(sfConfig::get('app_defaultPubStatus', QubitTerm::PUBLICATION_STATUS_DRAFT_ID)); } // save the object after it's fully-populated $currentObject->save(); // write the ID onto the current XML node for tracking $domNode->setAttribute('xml:id', $currentObject->id); } } return $this; }