/** * Converts a RDF structure to a Solr Document * * @param DOMNode $Description * @param array $extradata * @param DOMXPath $xpath * @param string $fallbackStatus The status which will be used if no other status is detected. * @return OpenSKOS_Solr_Document */ public static function DomNode2SolrDocument(DOMNode $Description, array $extradata = array(), DOMXPath $xpath = null, $fallbackStatus = '') { if ($Description->nodeName != 'rdf:Description') { throw new OpenSKOS_Rdf_Parser_Exception('wrong nodeName, expected `rdf:Description`, got `' . $Description->nodeName . '`'); } if (null === $xpath) { $xpath = new DOMXPath($Description->ownerDocument); //support for only these namespaces: foreach (self::$namespaces as $prefix => $uri) { $xpath->registerNamespace($prefix, $uri); } } // Sets created_timestamp, modified_timestamp and approved_timestamp. $autoExtraData = array(); $dateSubmittedNodes = $xpath->query('dcterms:dateSubmitted', $Description); if ($dateSubmittedNodes->length > 0) { $autoExtraData['created_timestamp'] = date(self::SOLR_DATETIME_FORMAT, strtotime($dateSubmittedNodes->item(0)->nodeValue)); } else { $autoExtraData['created_timestamp'] = date(self::SOLR_DATETIME_FORMAT); } $dateModifiedNodes = $xpath->query('dcterms:modified', $Description); if ($dateModifiedNodes->length > 0) { $autoExtraData['modified_timestamp'] = date(self::SOLR_DATETIME_FORMAT, strtotime($dateModifiedNodes->item(0)->nodeValue)); } $dateAcceptedNodes = $xpath->query('dcterms:dateAccepted', $Description); if ($dateAcceptedNodes->length > 0) { $autoExtraData['approved_timestamp'] = date(self::SOLR_DATETIME_FORMAT, strtotime($dateAcceptedNodes->item(0)->nodeValue)); } // Sets status. If we have info for date submited the status is candidate, if we have info for date accepted the status is approved. if ($dateAcceptedNodes->length > 0) { $autoExtraData['status'] = 'approved'; } else { if ($dateSubmittedNodes->length > 0) { $autoExtraData['status'] = 'candidate'; } else { if (!empty($fallbackStatus)) { $autoExtraData['status'] = $fallbackStatus; } } } // Merges the incoming extra data with the auto detected extra data. $extradata = array_merge($autoExtraData, $extradata); // Set deleted timestamp if status is expired and deleted timestamp is not already set. if (!isset($extradata['deleted_timestamp']) && (isset($extradata['status']) && $extradata['status'] == 'expired' || isset($extradata['deleted']) && $extradata['deleted'])) { $extradata['deleted_timestamp'] = date(self::SOLR_DATETIME_FORMAT); } // Fix empty values if (empty($extradata['approved_timestamp'])) { unset($extradata['approved_timestamp']); } if (empty($extradata['approved_by'])) { unset($extradata['approved_by']); } if (empty($extradata['deleted_timestamp'])) { unset($extradata['deleted_timestamp']); } if (empty($extradata['deleted_by'])) { unset($extradata['deleted_by']); } // Creates the solr document from the description and the extra data. $document = new OpenSKOS_Solr_Document(); foreach ($extradata as $key => $var) { $document->{$key} = is_bool($var) ? true === $var ? 'true' : 'false' : $var; } if (!isset($extradata['uri'])) { $uri = $Description->getAttributeNS(self::$namespaces['rdf'], 'about'); if (!$uri) { throw new OpenSKOS_Rdf_Parser_Exception('missing required attribute rdf:about'); } $document->uri = $uri; } else { $uri = $extradata['uri']; } if (!isset($extradata['uuid'])) { $document->uuid = OpenSKOS_Utils::uuid(); } if ($type = $xpath->query('./rdf:type', $Description)->item(0)) { $resource = $type->getAttributeNS(self::$namespaces['rdf'], 'resource'); if (0 !== strpos($resource, self::$namespaces['skos'])) { return; } $className = parse_url($resource, PHP_URL_FRAGMENT); $document->class = parse_url($type->getAttributeNS(self::$namespaces['rdf'], 'resource'), PHP_URL_FRAGMENT); } else { throw new OpenSKOS_Rdf_Parser_Exception('missing required attribute rdf:type'); return; } $skosElements = $xpath->query('./skos:*', $Description); foreach ($skosElements as $skosElement) { $fieldname = str_replace('skos:', '', $skosElement->nodeName); if (in_array($fieldname, self::$langMapping)) { if ($xml_lang = $skosElement->getAttribute('xml:lang')) { $fieldname = $fieldname . '@' . $xml_lang; } } $document->{$fieldname} = trim($skosElement->nodeValue) ? trim($skosElement->nodeValue) : $skosElement->getAttributeNS(self::$namespaces['rdf'], 'resource'); //store every first preflabel/notation in a sortable field: if (0 === strpos($fieldname, 'prefLabel') || 0 === strpos($fieldname, 'notation')) { $sortFieldName = str_replace(array('prefLabel', 'notation'), array('prefLabelSort', 'notationSort'), $fieldname); if (!$document->offsetExists($sortFieldName)) { $offset = $document->offsetGet($fieldname); $document->{$sortFieldName} = array_shift($offset); } //also store the first language in a generic field: if (strpos($fieldname, '@')) { $sortFieldName = preg_replace('/@.+/', 'Sort', $fieldname); if (!$document->offsetExists($sortFieldName)) { $offset = $document->offsetGet($fieldname); $document->{$sortFieldName} = array_shift($offset); } } } } foreach (array('dc', 'dcterms') as $ns) { foreach ($xpath->query('./' . $ns . ':*', $Description) as $element) { $fieldname = str_replace($ns . ':', 'dcterms_', $element->nodeName); $document->{$fieldname} = trim($element->nodeValue); } } //some XML files use rdfs:label/rdfs:comment // let's map those to dcterms:title/dcterms:description foreach ($xpath->query('./rdfs:label | ./dcterms:description', $Description) as $element) { $fieldname = str_replace(array('rdfs:label', 'rdfs:comment'), array('dcterms:title', 'dcterms:description'), $element->nodeName); $document->{$fieldname} = trim($element->nodeValue); } //infer dcterms:title from skos:prefLabel if not already present, using the first // prefLabel found matching one of the following criteria, checked in this order: // 1. with xml:lang=XY where XY is lang option (if set) // 2. without an xml:lang attribute // 3. any prefLabel if (!isset($document->dcterms_title)) { $prefLabelXpathQueries = array('./skos:prefLabel[not(@xml:lang)]', './skos:prefLabel'); if (!empty($extradata['lang'])) { array_unshift($prefLabelXpathQueries, "./skos:prefLabel[@xml:lang='" . $extradata['lang'] . "']"); } foreach ($prefLabelXpathQueries as $xpathQuery) { if ($prefLabelElement = $xpath->query($xpathQuery, $Description)->item(0)) { $prefLabel = trim($prefLabelElement->nodeValue); $document->dcterms_title = $prefLabel; break; } } } $document->xml = $Description->ownerDocument->saveXML($Description); //store namespaces: $availableNamespaces = array(); foreach ($Description->childNodes as $childNode) { if ($childNode->nodeType === XML_ELEMENT_NODE) { $prefix = preg_replace('/^([a-z0-9\\-\\_]+)\\:.+$/', '$1', $childNode->nodeName); if (!in_array($prefix, $availableNamespaces)) { $availableNamespaces[] = $prefix; } } } if ($availableNamespaces) { $document->xmlns = $availableNamespaces; } return $document; }
/** * unique */ protected function generateUUID() { return OpenSKOS_Utils::uuid(); }
/** * Converts a RDF structure to a Solr Document * * @param DOMNode $Description * @param array $extradata * @param DOMXPath $xpath * @param string $fallbackStatus The status which will be used if no other status is detected. * @param bool $autoGenerateUri If the script should auto generate uri and notation * @param OpenSKOS_Db_Table_Row_Collection $collection * @return OpenSKOS_Solr_Document */ public static function DomNode2SolrDocument(DOMNode $Description, array $extradata = array(), DOMXPath $xpath = null, $fallbackStatus = '', $autoGenerateUri = false, $collection = null) { if ($Description->nodeName != 'rdf:Description') { throw new OpenSKOS_Rdf_Parser_Exception('wrong nodeName, expected `rdf:Description`, got `' . $Description->nodeName . '`'); } if (null === $xpath) { $xpath = new DOMXPath($Description->ownerDocument); //support for only these namespaces: foreach (self::$namespaces as $prefix => $uri) { $xpath->registerNamespace($prefix, $uri); } } // Sets created_timestamp, modified_timestamp and approved_timestamp. $autoExtraData = array(); $dateSubmittedNodes = $xpath->query('dcterms:dateSubmitted', $Description); if ($dateSubmittedNodes->length > 0) { $autoExtraData['created_timestamp'] = date(self::SOLR_DATETIME_FORMAT, strtotime($dateSubmittedNodes->item(0)->nodeValue)); } else { $autoExtraData['created_timestamp'] = date(self::SOLR_DATETIME_FORMAT); } $dateModifiedNodes = $xpath->query('dcterms:modified', $Description); if ($dateModifiedNodes->length > 0) { $autoExtraData['modified_timestamp'] = date(self::SOLR_DATETIME_FORMAT, strtotime($dateModifiedNodes->item(0)->nodeValue)); } $dateAcceptedNodes = $xpath->query('dcterms:dateAccepted', $Description); if ($dateAcceptedNodes->length > 0) { $autoExtraData['approved_timestamp'] = date(self::SOLR_DATETIME_FORMAT, strtotime($dateAcceptedNodes->item(0)->nodeValue)); } // Sets status. If we have info for date submited the status is candidate, if we have info for date accepted the status is approved. $openskosStatusNodes = $xpath->query('openskos:status', $Description); if ($openskosStatusNodes->length > 0) { $autoExtraData['status'] = $openskosStatusNodes->item(0)->nodeValue; } elseif (!empty($fallbackStatus)) { $autoExtraData['status'] = $fallbackStatus; } elseif ($collection !== null && !$collection->getTenant()['enableStatusesSystem']) { $autoExtraData['status'] = OpenSKOS_Concept_Status::APPROVED; } else { $autoExtraData['status'] = OpenSKOS_Concept_Status::CANDIDATE; } // Merges the incoming extra data with the auto detected extra data. $extradata = array_merge($autoExtraData, $extradata); // Validates status if (!empty($extradata['status']) && !in_array($extradata['status'], OpenSKOS_Concept_Status::getStatuses())) { throw new OpenSKOS_Rdf_Parser_Exception('Status "' . $extradata['status'] . '" not recognized.'); } // Status deleted equals soft deletion and soft deleting equals status deleted. if (isset($extradata['status']) && $extradata['status'] == OpenSKOS_Concept_Status::DELETED) { $extradata['deleted'] = true; } elseif (isset($extradata['deleted']) && $extradata['deleted']) { $extradata['status'] = OpenSKOS_Concept_Status::DELETED; } // Set deleted timestamp if status is OBSOLETE(expired) and deleted timestamp is not already set. if (empty($extradata['deleted_timestamp']) && (isset($extradata['status']) && OpenSKOS_Concept_Status::isStatusLikeDeleted($extradata['status']) || isset($extradata['deleted']) && $extradata['deleted'])) { $extradata['deleted_timestamp'] = date(self::SOLR_DATETIME_FORMAT); } // Fix empty values if (empty($extradata['approved_timestamp'])) { unset($extradata['approved_timestamp']); } if (empty($extradata['approved_by'])) { unset($extradata['approved_by']); } if (empty($extradata['deleted_timestamp'])) { unset($extradata['deleted_timestamp']); } if (empty($extradata['deleted_by'])) { unset($extradata['deleted_by']); } // Creates the solr document from the description and the extra data. $document = new OpenSKOS_Solr_Document(); foreach ($extradata as $key => $var) { $document->{$key} = is_bool($var) ? true === $var ? 'true' : 'false' : $var; } if (!isset($extradata['uuid'])) { $document->uuid = OpenSKOS_Utils::uuid(); } if ($type = $xpath->query('./rdf:type', $Description)->item(0)) { $resource = $type->getAttributeNS(self::$namespaces['rdf'], 'resource'); if (0 !== strpos($resource, self::$namespaces['skos'])) { return; } $className = parse_url($resource, PHP_URL_FRAGMENT); $document->class = parse_url($type->getAttributeNS(self::$namespaces['rdf'], 'resource'), PHP_URL_FRAGMENT); } else { throw new OpenSKOS_Rdf_Parser_Exception('missing required attribute rdf:type'); return; } $skosElements = $xpath->query('./skos:*', $Description); foreach ($skosElements as $skosElement) { $fieldname = str_replace('skos:', '', $skosElement->nodeName); if (in_array($fieldname, self::$langMapping)) { if ($xml_lang = $skosElement->getAttribute('xml:lang')) { $fieldname = $fieldname . '@' . $xml_lang; } } $document->{$fieldname} = trim($skosElement->nodeValue) ? trim($skosElement->nodeValue) : $skosElement->getAttributeNS(self::$namespaces['rdf'], 'resource'); //store every first preflabel/notation in a sortable field: if (0 === strpos($fieldname, 'prefLabel') || 0 === strpos($fieldname, 'notation')) { $sortFieldName = str_replace(array('prefLabel', 'notation'), array('prefLabelSort', 'notationSort'), $fieldname); if (!$document->offsetExists($sortFieldName)) { $offset = $document->offsetGet($fieldname); $document->{$sortFieldName} = array_shift($offset); } //also store the first language in a generic field: if (strpos($fieldname, '@')) { $sortFieldName = preg_replace('/@.+/', 'Sort', $fieldname); if (!$document->offsetExists($sortFieldName)) { $offset = $document->offsetGet($fieldname); $document->{$sortFieldName} = array_shift($offset); } } } } foreach (array('dc', 'dcterms') as $ns) { foreach ($xpath->query('./' . $ns . ':*', $Description) as $element) { $fieldname = str_replace($ns . ':', 'dcterms_', $element->nodeName); $document->{$fieldname} = trim($element->nodeValue); } } //some XML files use rdfs:label/rdfs:comment // let's map those to dcterms:title/dcterms:description foreach ($xpath->query('./rdfs:label | ./dcterms:description', $Description) as $element) { $fieldname = str_replace(array('rdfs:label', 'rdfs:comment'), array('dcterms:title', 'dcterms:description'), $element->nodeName); $document->{$fieldname} = trim($element->nodeValue); } $document->xml = $Description->ownerDocument->saveXML($Description); // Checks or generate uri if (!$document->offsetExists('uri')) { $uri = $Description->getAttributeNS(self::$namespaces['rdf'], 'about'); if ($uri) { $document->uri = $uri; } else { if ($autoGenerateUri) { $document->autoGenerateUri($collection); } else { throw new OpenSKOS_Rdf_Parser_Exception('missing required attribute rdf:about'); } } } // Puts status in the Document $document->updateStatusInGeneratedXml(); //store namespaces: $availableNamespaces = array(); foreach ($Description->childNodes as $childNode) { if ($childNode->nodeType === XML_ELEMENT_NODE) { $prefix = preg_replace('/^([a-z0-9\\-\\_]+)\\:.+$/', '$1', $childNode->nodeName); if (!in_array($prefix, $availableNamespaces)) { $availableNamespaces[] = $prefix; } } } if ($availableNamespaces) { $document->xmlns = $availableNamespaces; } return $document; }