Example #1
0
 /**
  * Converts a RDF structure to a Solr Document
  * 
  * @param DOMNode $Description
  * @param array $extradata
  * @param DOMXPath $xpath
  * @param string $fallbackStatus The status which will be used if no other status is detected.
  * @return OpenSKOS_Solr_Document
  */
 public static function DomNode2SolrDocument(DOMNode $Description, array $extradata = array(), DOMXPath $xpath = null, $fallbackStatus = '')
 {
     if ($Description->nodeName != 'rdf:Description') {
         throw new OpenSKOS_Rdf_Parser_Exception('wrong nodeName, expected `rdf:Description`, got `' . $Description->nodeName . '`');
     }
     if (null === $xpath) {
         $xpath = new DOMXPath($Description->ownerDocument);
         //support for only these namespaces:
         foreach (self::$namespaces as $prefix => $uri) {
             $xpath->registerNamespace($prefix, $uri);
         }
     }
     // Sets created_timestamp, modified_timestamp and approved_timestamp.
     $autoExtraData = array();
     $dateSubmittedNodes = $xpath->query('dcterms:dateSubmitted', $Description);
     if ($dateSubmittedNodes->length > 0) {
         $autoExtraData['created_timestamp'] = date(self::SOLR_DATETIME_FORMAT, strtotime($dateSubmittedNodes->item(0)->nodeValue));
     } else {
         $autoExtraData['created_timestamp'] = date(self::SOLR_DATETIME_FORMAT);
     }
     $dateModifiedNodes = $xpath->query('dcterms:modified', $Description);
     if ($dateModifiedNodes->length > 0) {
         $autoExtraData['modified_timestamp'] = date(self::SOLR_DATETIME_FORMAT, strtotime($dateModifiedNodes->item(0)->nodeValue));
     }
     $dateAcceptedNodes = $xpath->query('dcterms:dateAccepted', $Description);
     if ($dateAcceptedNodes->length > 0) {
         $autoExtraData['approved_timestamp'] = date(self::SOLR_DATETIME_FORMAT, strtotime($dateAcceptedNodes->item(0)->nodeValue));
     }
     // Sets status. If we have info for date submited the status is candidate, if we have info for date accepted the status is approved.
     if ($dateAcceptedNodes->length > 0) {
         $autoExtraData['status'] = 'approved';
     } else {
         if ($dateSubmittedNodes->length > 0) {
             $autoExtraData['status'] = 'candidate';
         } else {
             if (!empty($fallbackStatus)) {
                 $autoExtraData['status'] = $fallbackStatus;
             }
         }
     }
     // Merges the incoming extra data with the auto detected extra data.
     $extradata = array_merge($autoExtraData, $extradata);
     // Set deleted timestamp if status is expired and deleted timestamp is not already set.
     if (!isset($extradata['deleted_timestamp']) && (isset($extradata['status']) && $extradata['status'] == 'expired' || isset($extradata['deleted']) && $extradata['deleted'])) {
         $extradata['deleted_timestamp'] = date(self::SOLR_DATETIME_FORMAT);
     }
     // Fix empty values
     if (empty($extradata['approved_timestamp'])) {
         unset($extradata['approved_timestamp']);
     }
     if (empty($extradata['approved_by'])) {
         unset($extradata['approved_by']);
     }
     if (empty($extradata['deleted_timestamp'])) {
         unset($extradata['deleted_timestamp']);
     }
     if (empty($extradata['deleted_by'])) {
         unset($extradata['deleted_by']);
     }
     // Creates the solr document from the description and the extra data.
     $document = new OpenSKOS_Solr_Document();
     foreach ($extradata as $key => $var) {
         $document->{$key} = is_bool($var) ? true === $var ? 'true' : 'false' : $var;
     }
     if (!isset($extradata['uri'])) {
         $uri = $Description->getAttributeNS(self::$namespaces['rdf'], 'about');
         if (!$uri) {
             throw new OpenSKOS_Rdf_Parser_Exception('missing required attribute rdf:about');
         }
         $document->uri = $uri;
     } else {
         $uri = $extradata['uri'];
     }
     if (!isset($extradata['uuid'])) {
         $document->uuid = OpenSKOS_Utils::uuid();
     }
     if ($type = $xpath->query('./rdf:type', $Description)->item(0)) {
         $resource = $type->getAttributeNS(self::$namespaces['rdf'], 'resource');
         if (0 !== strpos($resource, self::$namespaces['skos'])) {
             return;
         }
         $className = parse_url($resource, PHP_URL_FRAGMENT);
         $document->class = parse_url($type->getAttributeNS(self::$namespaces['rdf'], 'resource'), PHP_URL_FRAGMENT);
     } else {
         throw new OpenSKOS_Rdf_Parser_Exception('missing required attribute rdf:type');
         return;
     }
     $skosElements = $xpath->query('./skos:*', $Description);
     foreach ($skosElements as $skosElement) {
         $fieldname = str_replace('skos:', '', $skosElement->nodeName);
         if (in_array($fieldname, self::$langMapping)) {
             if ($xml_lang = $skosElement->getAttribute('xml:lang')) {
                 $fieldname = $fieldname . '@' . $xml_lang;
             }
         }
         $document->{$fieldname} = trim($skosElement->nodeValue) ? trim($skosElement->nodeValue) : $skosElement->getAttributeNS(self::$namespaces['rdf'], 'resource');
         //store every first preflabel/notation in a sortable field:
         if (0 === strpos($fieldname, 'prefLabel') || 0 === strpos($fieldname, 'notation')) {
             $sortFieldName = str_replace(array('prefLabel', 'notation'), array('prefLabelSort', 'notationSort'), $fieldname);
             if (!$document->offsetExists($sortFieldName)) {
                 $offset = $document->offsetGet($fieldname);
                 $document->{$sortFieldName} = array_shift($offset);
             }
             //also store the first language in a generic field:
             if (strpos($fieldname, '@')) {
                 $sortFieldName = preg_replace('/@.+/', 'Sort', $fieldname);
                 if (!$document->offsetExists($sortFieldName)) {
                     $offset = $document->offsetGet($fieldname);
                     $document->{$sortFieldName} = array_shift($offset);
                 }
             }
         }
     }
     foreach (array('dc', 'dcterms') as $ns) {
         foreach ($xpath->query('./' . $ns . ':*', $Description) as $element) {
             $fieldname = str_replace($ns . ':', 'dcterms_', $element->nodeName);
             $document->{$fieldname} = trim($element->nodeValue);
         }
     }
     //some XML files use rdfs:label/rdfs:comment
     // let's map those to dcterms:title/dcterms:description
     foreach ($xpath->query('./rdfs:label | ./dcterms:description', $Description) as $element) {
         $fieldname = str_replace(array('rdfs:label', 'rdfs:comment'), array('dcterms:title', 'dcterms:description'), $element->nodeName);
         $document->{$fieldname} = trim($element->nodeValue);
     }
     //infer dcterms:title from skos:prefLabel if not already present, using the first
     // prefLabel found matching one of the following criteria, checked in this order:
     // 1. with xml:lang=XY where XY is lang option (if set)
     // 2. without an xml:lang attribute
     // 3. any prefLabel
     if (!isset($document->dcterms_title)) {
         $prefLabelXpathQueries = array('./skos:prefLabel[not(@xml:lang)]', './skos:prefLabel');
         if (!empty($extradata['lang'])) {
             array_unshift($prefLabelXpathQueries, "./skos:prefLabel[@xml:lang='" . $extradata['lang'] . "']");
         }
         foreach ($prefLabelXpathQueries as $xpathQuery) {
             if ($prefLabelElement = $xpath->query($xpathQuery, $Description)->item(0)) {
                 $prefLabel = trim($prefLabelElement->nodeValue);
                 $document->dcterms_title = $prefLabel;
                 break;
             }
         }
     }
     $document->xml = $Description->ownerDocument->saveXML($Description);
     //store namespaces:
     $availableNamespaces = array();
     foreach ($Description->childNodes as $childNode) {
         if ($childNode->nodeType === XML_ELEMENT_NODE) {
             $prefix = preg_replace('/^([a-z0-9\\-\\_]+)\\:.+$/', '$1', $childNode->nodeName);
             if (!in_array($prefix, $availableNamespaces)) {
                 $availableNamespaces[] = $prefix;
             }
         }
     }
     if ($availableNamespaces) {
         $document->xmlns = $availableNamespaces;
     }
     return $document;
 }
Example #2
0
 /**
  * unique
  */
 protected function generateUUID()
 {
     return OpenSKOS_Utils::uuid();
 }
Example #3
0
 /**
  * Converts a RDF structure to a Solr Document
  * 
  * @param DOMNode $Description
  * @param array $extradata
  * @param DOMXPath $xpath
  * @param string $fallbackStatus The status which will be used if no other status is detected.
  * @param bool $autoGenerateUri If the script should auto generate uri and notation
  * @param OpenSKOS_Db_Table_Row_Collection $collection
  * @return OpenSKOS_Solr_Document
  */
 public static function DomNode2SolrDocument(DOMNode $Description, array $extradata = array(), DOMXPath $xpath = null, $fallbackStatus = '', $autoGenerateUri = false, $collection = null)
 {
     if ($Description->nodeName != 'rdf:Description') {
         throw new OpenSKOS_Rdf_Parser_Exception('wrong nodeName, expected `rdf:Description`, got `' . $Description->nodeName . '`');
     }
     if (null === $xpath) {
         $xpath = new DOMXPath($Description->ownerDocument);
         //support for only these namespaces:
         foreach (self::$namespaces as $prefix => $uri) {
             $xpath->registerNamespace($prefix, $uri);
         }
     }
     // Sets created_timestamp, modified_timestamp and approved_timestamp.
     $autoExtraData = array();
     $dateSubmittedNodes = $xpath->query('dcterms:dateSubmitted', $Description);
     if ($dateSubmittedNodes->length > 0) {
         $autoExtraData['created_timestamp'] = date(self::SOLR_DATETIME_FORMAT, strtotime($dateSubmittedNodes->item(0)->nodeValue));
     } else {
         $autoExtraData['created_timestamp'] = date(self::SOLR_DATETIME_FORMAT);
     }
     $dateModifiedNodes = $xpath->query('dcterms:modified', $Description);
     if ($dateModifiedNodes->length > 0) {
         $autoExtraData['modified_timestamp'] = date(self::SOLR_DATETIME_FORMAT, strtotime($dateModifiedNodes->item(0)->nodeValue));
     }
     $dateAcceptedNodes = $xpath->query('dcterms:dateAccepted', $Description);
     if ($dateAcceptedNodes->length > 0) {
         $autoExtraData['approved_timestamp'] = date(self::SOLR_DATETIME_FORMAT, strtotime($dateAcceptedNodes->item(0)->nodeValue));
     }
     // Sets status. If we have info for date submited the status is candidate, if we have info for date accepted the status is approved.
     $openskosStatusNodes = $xpath->query('openskos:status', $Description);
     if ($openskosStatusNodes->length > 0) {
         $autoExtraData['status'] = $openskosStatusNodes->item(0)->nodeValue;
     } elseif (!empty($fallbackStatus)) {
         $autoExtraData['status'] = $fallbackStatus;
     } elseif ($collection !== null && !$collection->getTenant()['enableStatusesSystem']) {
         $autoExtraData['status'] = OpenSKOS_Concept_Status::APPROVED;
     } else {
         $autoExtraData['status'] = OpenSKOS_Concept_Status::CANDIDATE;
     }
     // Merges the incoming extra data with the auto detected extra data.
     $extradata = array_merge($autoExtraData, $extradata);
     // Validates status
     if (!empty($extradata['status']) && !in_array($extradata['status'], OpenSKOS_Concept_Status::getStatuses())) {
         throw new OpenSKOS_Rdf_Parser_Exception('Status "' . $extradata['status'] . '" not recognized.');
     }
     // Status deleted equals soft deletion and soft deleting equals status deleted.
     if (isset($extradata['status']) && $extradata['status'] == OpenSKOS_Concept_Status::DELETED) {
         $extradata['deleted'] = true;
     } elseif (isset($extradata['deleted']) && $extradata['deleted']) {
         $extradata['status'] = OpenSKOS_Concept_Status::DELETED;
     }
     // Set deleted timestamp if status is OBSOLETE(expired) and deleted timestamp is not already set.
     if (empty($extradata['deleted_timestamp']) && (isset($extradata['status']) && OpenSKOS_Concept_Status::isStatusLikeDeleted($extradata['status']) || isset($extradata['deleted']) && $extradata['deleted'])) {
         $extradata['deleted_timestamp'] = date(self::SOLR_DATETIME_FORMAT);
     }
     // Fix empty values
     if (empty($extradata['approved_timestamp'])) {
         unset($extradata['approved_timestamp']);
     }
     if (empty($extradata['approved_by'])) {
         unset($extradata['approved_by']);
     }
     if (empty($extradata['deleted_timestamp'])) {
         unset($extradata['deleted_timestamp']);
     }
     if (empty($extradata['deleted_by'])) {
         unset($extradata['deleted_by']);
     }
     // Creates the solr document from the description and the extra data.
     $document = new OpenSKOS_Solr_Document();
     foreach ($extradata as $key => $var) {
         $document->{$key} = is_bool($var) ? true === $var ? 'true' : 'false' : $var;
     }
     if (!isset($extradata['uuid'])) {
         $document->uuid = OpenSKOS_Utils::uuid();
     }
     if ($type = $xpath->query('./rdf:type', $Description)->item(0)) {
         $resource = $type->getAttributeNS(self::$namespaces['rdf'], 'resource');
         if (0 !== strpos($resource, self::$namespaces['skos'])) {
             return;
         }
         $className = parse_url($resource, PHP_URL_FRAGMENT);
         $document->class = parse_url($type->getAttributeNS(self::$namespaces['rdf'], 'resource'), PHP_URL_FRAGMENT);
     } else {
         throw new OpenSKOS_Rdf_Parser_Exception('missing required attribute rdf:type');
         return;
     }
     $skosElements = $xpath->query('./skos:*', $Description);
     foreach ($skosElements as $skosElement) {
         $fieldname = str_replace('skos:', '', $skosElement->nodeName);
         if (in_array($fieldname, self::$langMapping)) {
             if ($xml_lang = $skosElement->getAttribute('xml:lang')) {
                 $fieldname = $fieldname . '@' . $xml_lang;
             }
         }
         $document->{$fieldname} = trim($skosElement->nodeValue) ? trim($skosElement->nodeValue) : $skosElement->getAttributeNS(self::$namespaces['rdf'], 'resource');
         //store every first preflabel/notation in a sortable field:
         if (0 === strpos($fieldname, 'prefLabel') || 0 === strpos($fieldname, 'notation')) {
             $sortFieldName = str_replace(array('prefLabel', 'notation'), array('prefLabelSort', 'notationSort'), $fieldname);
             if (!$document->offsetExists($sortFieldName)) {
                 $offset = $document->offsetGet($fieldname);
                 $document->{$sortFieldName} = array_shift($offset);
             }
             //also store the first language in a generic field:
             if (strpos($fieldname, '@')) {
                 $sortFieldName = preg_replace('/@.+/', 'Sort', $fieldname);
                 if (!$document->offsetExists($sortFieldName)) {
                     $offset = $document->offsetGet($fieldname);
                     $document->{$sortFieldName} = array_shift($offset);
                 }
             }
         }
     }
     foreach (array('dc', 'dcterms') as $ns) {
         foreach ($xpath->query('./' . $ns . ':*', $Description) as $element) {
             $fieldname = str_replace($ns . ':', 'dcterms_', $element->nodeName);
             $document->{$fieldname} = trim($element->nodeValue);
         }
     }
     //some XML files use rdfs:label/rdfs:comment
     // let's map those to dcterms:title/dcterms:description
     foreach ($xpath->query('./rdfs:label | ./dcterms:description', $Description) as $element) {
         $fieldname = str_replace(array('rdfs:label', 'rdfs:comment'), array('dcterms:title', 'dcterms:description'), $element->nodeName);
         $document->{$fieldname} = trim($element->nodeValue);
     }
     $document->xml = $Description->ownerDocument->saveXML($Description);
     // Checks or generate uri
     if (!$document->offsetExists('uri')) {
         $uri = $Description->getAttributeNS(self::$namespaces['rdf'], 'about');
         if ($uri) {
             $document->uri = $uri;
         } else {
             if ($autoGenerateUri) {
                 $document->autoGenerateUri($collection);
             } else {
                 throw new OpenSKOS_Rdf_Parser_Exception('missing required attribute rdf:about');
             }
         }
     }
     // Puts status in the Document
     $document->updateStatusInGeneratedXml();
     //store namespaces:
     $availableNamespaces = array();
     foreach ($Description->childNodes as $childNode) {
         if ($childNode->nodeType === XML_ELEMENT_NODE) {
             $prefix = preg_replace('/^([a-z0-9\\-\\_]+)\\:.+$/', '$1', $childNode->nodeName);
             if (!in_array($prefix, $availableNamespaces)) {
                 $availableNamespaces[] = $prefix;
             }
         }
     }
     if ($availableNamespaces) {
         $document->xmlns = $availableNamespaces;
     }
     return $document;
 }