Example #1
0
 public function readData($source_definition, $rest_parameters = array())
 {
     // Fetch the URI of the rdf file
     $uri = $source_definition['uri'];
     $format = strtolower($source_definition['format']);
     $content = file_get_contents($uri);
     // Try parsing the contents of the rdf file
     $graph = new Graph();
     $parser;
     if ($format == 'turtle') {
         $parser = new Turtle();
     } elseif ($format == 'xml') {
         // EasyRdf identifies rdfxml with rdf, not with xml as a format
         $format = 'rdfxml';
         $parser = new RdfXml();
     } else {
         \App::abort(500, "The format you added, {$format}, is not supported. The supported formats are turtle and xml.");
     }
     $triples_added = $parser->parse($graph, $content, $format, '');
     $data = new Data();
     $data->data = $graph;
     $data->is_semantic = true;
     $data->preferred_formats = $this->getPreferredFormats();
     return $data;
 }
Example #2
0
 protected function open()
 {
     // Perform XLST
     $xsl_url = "https://webgate.ec.europa.eu/CITnet/stash/projects/ODCKAN/repos/iso-19139-to-dcat-ap/browse/iso-19139-to-dcat-ap.xsl?raw";
     $proc = null;
     try {
         $xml = new DOMDocument();
         $xml->load($this->extractor->uri);
         $xsl = new DOMDocument();
         $xsl->load($xsl_url);
         $proc = new XSLTProcessor();
         $proc->importStyleSheet($xsl);
     } catch (\ErrorException $ex) {
         $this->log('Something went wrong: ' . $ex->getMessage());
         die;
     }
     $dcat_document = $proc->transformToXML($xml);
     \EasyRdf\RdfNamespace::set('locn', 'http://www.w3.org/ns/locn#');
     // Parse the dcat graph
     $graph = new Graph();
     $rdf_parser = new RdfXml();
     $rdf_parser->parse($graph, $dcat_document, 'rdfxml', 'http://foo');
     $this->datasets = $graph->allOfType('dcat:Dataset');
 }
Example #3
0
 public function readData($source_definition, $rest_parameters = array())
 {
     $endpoint = $source_definition['endpoint'];
     $endpoint_user = $source_definition['endpoint_user'];
     $endpoint_password = $source_definition['endpoint_password'];
     $query = $source_definition['query'];
     $limitInQuery = false;
     //Check if the query is already paged.
     if (stripos($source_definition['query'], 'limit')) {
         $limitInQuery = true;
     } else {
         list($limit, $offset) = Pager::calculateLimitAndOffset();
         // Sparql endpoints often have a built in limit on the amount of rows that they return
         // Avoid problems by capping the given limit by the Pager class
         if ($limit > self::$MAX_LIMIT) {
             $limit = self::$MAX_LIMIT;
         }
     }
     // Retrieve the necessary variables to read from a SPARQL endpoint
     $uri = \Request::url();
     // Process the if and ifnot-statements in the query
     $query = $this->processLogicalStatements($query);
     // Process the parameters in the uri (to catch hashtag values for example)
     $query = $this->processParameters($query);
     // Create a count query for paging purposes, this assumes that a where clause is included in the query
     // Note that the where "clause" is obligatory but it's not mandatory it is preceded by a WHERE keyword
     $matches = array();
     $keyword = "";
     // If a select statement has been passed, we ask for JSON results
     // If a construct statement has been passed, we ask for RDF/XML
     // This piece of code can be removed in later versions as the query_type will be determined
     // upon saving/editing a query
     if (stripos($query, "select") !== false) {
         // SELECT query
         $keyword = "select";
     } elseif (stripos($query, "construct") !== false) {
         // CONSTRUCT query
         $keyword = "construct";
     } else {
         // No valid SPARQL keyword has been found, is checked during validation
         \App::abort(500, "No CONSTRUCT or SELECT statement has been found in the given query: {$query}");
     }
     // Prepare the count query for paging purposes
     // This implies the removal of the select or construct statement
     // and only using the where statement
     // Make a distinction between select and construct since
     // construct will be followed by a {} sequence, whereas a select statement will not
     $prefix = '';
     $filter = '';
     // Covers FROM <...> FROM <...> WHERE{ } , FROM <...> FROM <...> { }, WHERE { }, { }
     $where_clause = '(.+((FROM.*<.+>)+.*{.+})|((GRAPH.*<.+>)+.*{.+})|.*?(WHERE.*{.+})|.*?({.+}))[a-zA-Z0-9]*?';
     $matches = array();
     if ($keyword == 'select') {
         $regex = $keyword . $where_clause;
         preg_match_all("/(.*?){$regex}/msi", $query, $matches);
     } else {
         preg_match_all("/(.*?){$keyword}(\\s*\\{[^{]+\\}){$where_clause}/mis", $query, $matches);
     }
     $prefix = $matches[1][0];
     $filter = "";
     // Preg match all has 3 entries for the where clause, pick the first hit
     if (!empty($matches[3][0])) {
         $filter = $matches[3][0];
     }
     if (!empty($matches[4][0]) && empty($filter)) {
         $filter = $matches[4][0];
     }
     if (!empty($matches[5][0]) && empty($filter)) {
         $filter = $matches[5][0];
     }
     $last_element = end($matches);
     if (!empty($last_element[0]) && empty($filter)) {
         $filter = $last_element[0];
     }
     if (empty($filter)) {
         \App::abort(500, "Failed to retrieve the where clause from the query: {$query}");
     }
     if (!$limitInQuery) {
         // Prepare the query to count results
         $count_query = $matches[1][0] . ' SELECT (count(*) AS ?count) ' . $filter;
         $count_query = urlencode($count_query);
         $count_query = str_replace("+", "%20", $count_query);
         $count_uri = $endpoint . '?query=' . $count_query . '&format=' . urlencode("application/sparql-results+json");
         $response = $this->executeUri($count_uri, $endpoint_user, $endpoint_password);
         $response = json_decode($response);
         // If something goes wrong, the resonse will either be null or false
         if (!$response) {
             \App::abort(500, "Something went wrong while executing the count query. The assembled URI was: {$count_uri}");
         }
         $count = $response->results->bindings[0]->count->value;
         // Calculate page link headers, previous, next and last based on the count from the previous query
         $paging = Pager::calculatePagingHeaders($limit, $offset, $count);
     }
     $query = $source_definition['query'];
     $query = $this->processLogicalStatements($query);
     $query = $this->processParameters($query);
     if (!$limitInQuery) {
         if (!empty($offset)) {
             $query = $query . " OFFSET {$offset} ";
         }
         if (!empty($limit)) {
             $query = $query . " LIMIT {$limit}";
         }
     }
     // Prepare the query with proper encoding for the request
     $query = str_replace('%23', '#', $query);
     $q = urlencode($query);
     $q = str_replace("+", "%20", $q);
     if ($keyword == 'select') {
         $query_uri = $endpoint . '?query=' . $q . '&format=' . urlencode("application/sparql-results+json");
         $response = $this->executeUri($query_uri, $endpoint_user, $endpoint_password);
         $result = json_decode($response, true);
         if (!$result) {
             \App::abort(500, 'The query has been executed, but the endpoint failed to return sparql results in JSON.');
         }
         $is_semantic = false;
     } else {
         $query_uri = $endpoint . '?query=' . $q . '&format=' . urlencode("application/rdf+xml");
         $response = $this->executeUri($query_uri, $endpoint_user, $endpoint_password);
         // Parse the triple response and retrieve the triples from them
         $result = new Graph();
         $parser = new RdfXml();
         $parser->parse($result, $response, 'rdfxml', null);
         $is_semantic = true;
     }
     // Create the data object to return
     $data = new Data();
     if (!$limitInQuery) {
         $data->paging = $paging;
     }
     $data->data = $result;
     $data->is_semantic = $is_semantic;
     $data->preferred_formats = $this->getPreferredFormats();
     if ($is_semantic) {
         // Fetch the available namespaces and pass
         // them as a configuration of the semantic data result
         $ontologies = $this->ontologies->getAll();
         $prefixes = array();
         foreach ($ontologies as $ontology) {
             $prefixes[$ontology["prefix"]] = $ontology["uri"];
         }
         $data->semantic = new \stdClass();
         $data->semantic->conf = array('ns' => $prefixes);
         $data->preferred_formats = array('ttl', 'jsonld', 'rdf');
     }
     // Determine which parameters were given in the query
     $matches = array();
     $query_parameters = array();
     preg_match_all("/\\\$\\{(.+?)\\}/", $source_definition['query'], $matches);
     if (!empty($matches[1])) {
         $matches = $matches[1];
         foreach ($matches as $entry) {
             array_push($query_parameters, $entry);
         }
     }
     $data->optional_parameters = $query_parameters;
     return $data;
 }