public function readData($source_definition, $rest_parameters = array()) { // Fetch the URI of the rdf file $uri = $source_definition['uri']; $format = strtolower($source_definition['format']); $content = file_get_contents($uri); // Try parsing the contents of the rdf file $graph = new Graph(); $parser; if ($format == 'turtle') { $parser = new Turtle(); } elseif ($format == 'xml') { // EasyRdf identifies rdfxml with rdf, not with xml as a format $format = 'rdfxml'; $parser = new RdfXml(); } else { \App::abort(500, "The format you added, {$format}, is not supported. The supported formats are turtle and xml."); } $triples_added = $parser->parse($graph, $content, $format, ''); $data = new Data(); $data->data = $graph; $data->is_semantic = true; $data->preferred_formats = $this->getPreferredFormats(); return $data; }
protected function open() { // Perform XLST $xsl_url = "https://webgate.ec.europa.eu/CITnet/stash/projects/ODCKAN/repos/iso-19139-to-dcat-ap/browse/iso-19139-to-dcat-ap.xsl?raw"; $proc = null; try { $xml = new DOMDocument(); $xml->load($this->extractor->uri); $xsl = new DOMDocument(); $xsl->load($xsl_url); $proc = new XSLTProcessor(); $proc->importStyleSheet($xsl); } catch (\ErrorException $ex) { $this->log('Something went wrong: ' . $ex->getMessage()); die; } $dcat_document = $proc->transformToXML($xml); \EasyRdf\RdfNamespace::set('locn', 'http://www.w3.org/ns/locn#'); // Parse the dcat graph $graph = new Graph(); $rdf_parser = new RdfXml(); $rdf_parser->parse($graph, $dcat_document, 'rdfxml', 'http://foo'); $this->datasets = $graph->allOfType('dcat:Dataset'); }
public function readData($source_definition, $rest_parameters = array()) { $endpoint = $source_definition['endpoint']; $endpoint_user = $source_definition['endpoint_user']; $endpoint_password = $source_definition['endpoint_password']; $query = $source_definition['query']; $limitInQuery = false; //Check if the query is already paged. if (stripos($source_definition['query'], 'limit')) { $limitInQuery = true; } else { list($limit, $offset) = Pager::calculateLimitAndOffset(); // Sparql endpoints often have a built in limit on the amount of rows that they return // Avoid problems by capping the given limit by the Pager class if ($limit > self::$MAX_LIMIT) { $limit = self::$MAX_LIMIT; } } // Retrieve the necessary variables to read from a SPARQL endpoint $uri = \Request::url(); // Process the if and ifnot-statements in the query $query = $this->processLogicalStatements($query); // Process the parameters in the uri (to catch hashtag values for example) $query = $this->processParameters($query); // Create a count query for paging purposes, this assumes that a where clause is included in the query // Note that the where "clause" is obligatory but it's not mandatory it is preceded by a WHERE keyword $matches = array(); $keyword = ""; // If a select statement has been passed, we ask for JSON results // If a construct statement has been passed, we ask for RDF/XML // This piece of code can be removed in later versions as the query_type will be determined // upon saving/editing a query if (stripos($query, "select") !== false) { // SELECT query $keyword = "select"; } elseif (stripos($query, "construct") !== false) { // CONSTRUCT query $keyword = "construct"; } else { // No valid SPARQL keyword has been found, is checked during validation \App::abort(500, "No CONSTRUCT or SELECT statement has been found in the given query: {$query}"); } // Prepare the count query for paging purposes // This implies the removal of the select or construct statement // and only using the where statement // Make a distinction between select and construct since // construct will be followed by a {} sequence, whereas a select statement will not $prefix = ''; $filter = ''; // Covers FROM <...> FROM <...> WHERE{ } , FROM <...> FROM <...> { }, WHERE { }, { } $where_clause = '(.+((FROM.*<.+>)+.*{.+})|((GRAPH.*<.+>)+.*{.+})|.*?(WHERE.*{.+})|.*?({.+}))[a-zA-Z0-9]*?'; $matches = array(); if ($keyword == 'select') { $regex = $keyword . $where_clause; preg_match_all("/(.*?){$regex}/msi", $query, $matches); } else { preg_match_all("/(.*?){$keyword}(\\s*\\{[^{]+\\}){$where_clause}/mis", $query, $matches); } $prefix = $matches[1][0]; $filter = ""; // Preg match all has 3 entries for the where clause, pick the first hit if (!empty($matches[3][0])) { $filter = $matches[3][0]; } if (!empty($matches[4][0]) && empty($filter)) { $filter = $matches[4][0]; } if (!empty($matches[5][0]) && empty($filter)) { $filter = $matches[5][0]; } $last_element = end($matches); if (!empty($last_element[0]) && empty($filter)) { $filter = $last_element[0]; } if (empty($filter)) { \App::abort(500, "Failed to retrieve the where clause from the query: {$query}"); } if (!$limitInQuery) { // Prepare the query to count results $count_query = $matches[1][0] . ' SELECT (count(*) AS ?count) ' . $filter; $count_query = urlencode($count_query); $count_query = str_replace("+", "%20", $count_query); $count_uri = $endpoint . '?query=' . $count_query . '&format=' . urlencode("application/sparql-results+json"); $response = $this->executeUri($count_uri, $endpoint_user, $endpoint_password); $response = json_decode($response); // If something goes wrong, the resonse will either be null or false if (!$response) { \App::abort(500, "Something went wrong while executing the count query. The assembled URI was: {$count_uri}"); } $count = $response->results->bindings[0]->count->value; // Calculate page link headers, previous, next and last based on the count from the previous query $paging = Pager::calculatePagingHeaders($limit, $offset, $count); } $query = $source_definition['query']; $query = $this->processLogicalStatements($query); $query = $this->processParameters($query); if (!$limitInQuery) { if (!empty($offset)) { $query = $query . " OFFSET {$offset} "; } if (!empty($limit)) { $query = $query . " LIMIT {$limit}"; } } // Prepare the query with proper encoding for the request $query = str_replace('%23', '#', $query); $q = urlencode($query); $q = str_replace("+", "%20", $q); if ($keyword == 'select') { $query_uri = $endpoint . '?query=' . $q . '&format=' . urlencode("application/sparql-results+json"); $response = $this->executeUri($query_uri, $endpoint_user, $endpoint_password); $result = json_decode($response, true); if (!$result) { \App::abort(500, 'The query has been executed, but the endpoint failed to return sparql results in JSON.'); } $is_semantic = false; } else { $query_uri = $endpoint . '?query=' . $q . '&format=' . urlencode("application/rdf+xml"); $response = $this->executeUri($query_uri, $endpoint_user, $endpoint_password); // Parse the triple response and retrieve the triples from them $result = new Graph(); $parser = new RdfXml(); $parser->parse($result, $response, 'rdfxml', null); $is_semantic = true; } // Create the data object to return $data = new Data(); if (!$limitInQuery) { $data->paging = $paging; } $data->data = $result; $data->is_semantic = $is_semantic; $data->preferred_formats = $this->getPreferredFormats(); if ($is_semantic) { // Fetch the available namespaces and pass // them as a configuration of the semantic data result $ontologies = $this->ontologies->getAll(); $prefixes = array(); foreach ($ontologies as $ontology) { $prefixes[$ontology["prefix"]] = $ontology["uri"]; } $data->semantic = new \stdClass(); $data->semantic->conf = array('ns' => $prefixes); $data->preferred_formats = array('ttl', 'jsonld', 'rdf'); } // Determine which parameters were given in the query $matches = array(); $query_parameters = array(); preg_match_all("/\\\$\\{(.+?)\\}/", $source_definition['query'], $matches); if (!empty($matches[1])) { $matches = $matches[1]; foreach ($matches as $entry) { array_push($query_parameters, $entry); } } $data->optional_parameters = $query_parameters; return $data; }