public function readData($source_definition, $rest_parameters = array()) { // Fetch the URI of the rdf file $uri = $source_definition['uri']; $format = strtolower($source_definition['format']); $content = file_get_contents($uri); // Try parsing the contents of the rdf file $graph = new \EasyRdf_Graph(); $parser; if ($format == 'turtle') { $parser = new \EasyRdf_Parser_Turtle(); } elseif ($format == 'xml') { // EasyRdf identifies rdfxml with rdf, not with xml as a format $format = 'rdfxml'; $parser = new \EasyRdf_Parser_RdfXml(); } else { \App::abort(500, "The format you added, {$format}, is not supported. The supported formats are turtle and xml."); } $triples_added = $parser->parse($graph, $content, $format, ''); $data = new Data(); $data->data = $graph; $data->is_semantic = true; $data->preferred_formats = $this->getPreferredFormats(); return $data; }
/** * @see https://github.com/njh/easyrdf/issues/157 */ public function testIssue157() { $filename = 'rdfxml/gh157-base.rdf'; $graph = new EasyRdf_Graph(); $triple_count = $this->parser->parse($graph, readFixture($filename), 'rdfxml', null); foreach ($graph->toRdfPhp() as $iri => $properies) { $this->assertEquals('http://www.example.org/base#foo', $iri); } }
/** * Add triples to the graph and return it based on limit, offset and the SPARQL query * * @param string $base_uri * @param EasyRdf_Graph $graph * @param int $limit * @param int $offset * @param integer $depth The depth the queries should have, handlers should not override this if given * * @return EasyRdf_Graph */ public function addTriples($base_uri, $graph, $limit, $offset, $depth = null) { $total_triples = $graph->countTriples(); // Iterate the sparql endpoints foreach ($this->sparql_repo->getAll() as $sparql_source) { $endpoint = $sparql_source['endpoint']; $pw = $sparql_source['endpoint_password']; $user = $sparql_source['endpoint_user']; $endpoint = rtrim($endpoint, '/'); if (is_null($depth)) { $depth = $sparql_source['depth']; } $count_query = $this->query_builder->createCountQuery($base_uri, \Request::root(), $sparql_source['named_graph'], $depth); \Log::info("sparql : " . $count_query . " base uri : " . $base_uri); // Check if the response has been cached yet $cache_string = $this->buildCacheString($sparql_source['id'], $count_query); if (Cache::has($cache_string)) { $result = Cache::get($cache_string); } else { $count_query = urlencode($count_query); $count_query = str_replace("+", "%20", $count_query); $query_uri = $endpoint . '?query=' . $count_query . '&format=' . urlencode("application/sparql-results+json"); // Make a request with the count query to the SPARQL endpoint $result = $this->executeUri($query_uri, array(), $user, $pw); Cache::put($cache_string, $result, 5); } $response = json_decode($result); if (!empty($response)) { $count = $response->results->bindings[0]->count->value; // If the amount of matching triples is higher than the offset // add them and update the offset, if not higher, then only update the offset if ($count > $offset) { // Read the triples from the sparql endpoint $query_limit = $limit - $total_triples; $query = $this->query_builder->createFetchQuery($base_uri, \Request::root(), $sparql_source['named_graph'], $query_limit, $offset, $depth); $query = urlencode($query); $query = str_replace("+", "%20", $query); $query_uri = $endpoint . '?query=' . $query . '&format=' . urlencode("application/rdf+xml"); // Check for caching $cache_string = $this->buildCacheString($sparql_source['id'], $query_uri); if (Cache::has($cache_string)) { $result = Cache::get($cache_string); } else { $result = $this->executeUri($query_uri, array(), $user, $pw); } if (!empty($result) && $result[0] == '<') { // Parse the triple response and retrieve the triples from them $result_graph = new \EasyRdf_Graph(); $parser = new \EasyRdf_Parser_RdfXml(); $parser->parse($result_graph, $result, 'rdfxml', null); $graph = $this->mergeGraph($graph, $result_graph); $total_triples += $count - $offset; } else { $sparql_id = $sparql_source['id']; \Log::error("Something went wrong while fetching the triples from the sparql source with id {$sparql_id}. The error was " . $result . ". The query was : " . $query_uri); } } else { // Update the offset $offset -= $count; } if ($offset < 0) { $offset = 0; } } } return $graph; }
public function readData($source_definition, $rest_parameters = array()) { $endpoint = $source_definition['endpoint']; $endpoint_user = $source_definition['endpoint_user']; $endpoint_password = $source_definition['endpoint_password']; $query = $source_definition['query']; $limitInQuery = false; //Check if the query is already paged. if (stripos($source_definition['query'], 'limit')) { $limitInQuery = true; } else { list($limit, $offset) = Pager::calculateLimitAndOffset(); // Sparql endpoints often have a built in limit on the amount of rows that they return // Avoid problems by capping the given limit by the Pager class if ($limit > self::$MAX_LIMIT) { $limit = self::$MAX_LIMIT; } } // Retrieve the necessary variables to read from a SPARQL endpoint $uri = \Request::url(); // Process the if and ifnot-statements in the query $query = $this->processLogicalStatements($query); // Process the parameters in the uri (to catch hashtag values for example) $query = $this->processParameters($query); // Create a count query for paging purposes, this assumes that a where clause is included in the query // Note that the where "clause" is obligatory but it's not mandatory it is preceded by a WHERE keyword $matches = array(); $keyword = ""; // If a select statement has been passed, we ask for JSON results // If a construct statement has been passed, we ask for RDF/XML if (stripos($query, "select") !== false) { // SELECT query $keyword = "select"; } elseif (stripos($query, "construct") !== false) { // CONSTRUCT query $keyword = "construct"; } else { // No valid SPARQL keyword has been found, is checked during validation \App::abort(500, "No CONSTRUCT or SELECT statement has been found in the given query: {$query}"); } // Prepare the count query for paging purposes // This implies the removal of the select or construct statement // and only using the where statement // Make a distinction between select and construct since // construct will be followed by a {} sequence, whereas a select statement will not $prefix = ''; $filter = ''; // Covers FROM <...> FROM <...> WHERE{ } , FROM <...> FROM <...> { }, WHERE { }, { } $where_clause = '(.*?(FROM.+?{.+})|.*?(WHERE.*{.+})|.*?({.+}))[a-zA-Z0-9]*?'; $matches = array(); if ($keyword == 'select') { $regex = $keyword . $where_clause; preg_match_all("/(.*?){$regex}/msi", $query, $matches); } else { preg_match_all("/(.*?){$keyword}(\\s*\\{[^{]+\\}){$where_clause}/mis", $query, $matches); } $prefix = $matches[1][0]; $filter = ""; // Preg match all has 3 entries for the where clause, pick the first hit if (!empty($matches[3][0])) { $filter = $matches[3][0]; } if (!empty($matches[4][0])) { $filter = $matches[4][0]; } if (!empty($matches[5][0])) { $filter = $matches[5][0]; } if (empty($filter)) { \App::abort(500, "Failed to retrieve the where clause from the query: {$query}"); } if (!$limitInQuery) { // Prepare the query to count results $count_query = $matches[1][0] . ' SELECT (count(*) AS ?count) ' . $filter; $count_query = urlencode($count_query); $count_query = str_replace("+", "%20", $count_query); $count_uri = $endpoint . '?query=' . $count_query . '&format=' . urlencode("application/sparql-results+json"); $response = $this->executeUri($count_uri, $endpoint_user, $endpoint_password); $response = json_decode($response); // If something goes wrong, the resonse will either be null or false if (!$response) { \App::abort(500, "Something went wrong while executing the count query. The assembled URI was: {$count_uri}"); } $count = $response->results->bindings[0]->count->value; // Calculate page link headers, previous, next and last based on the count from the previous query $paging = Pager::calculatePagingHeaders($limit, $offset, $count); } $query = $source_definition['query']; $query = $this->processLogicalStatements($query); $query = $this->processParameters($query); if (!$limitInQuery) { if (!empty($offset)) { $query = $query . " OFFSET {$offset} "; } if (!empty($limit)) { $query = $query . " LIMIT {$limit}"; } } // Prepare the query with proper encoding for the request $query = str_replace('%23', '#', $query); $q = urlencode($query); $q = str_replace("+", "%20", $q); if ($keyword == 'select') { $query_uri = $endpoint . '?query=' . $q . '&format=' . urlencode("application/sparql-results+json"); $response = $this->executeUri($query_uri, $endpoint_user, $endpoint_password); $result = json_decode($response); if (!$result) { \App::abort(500, 'The query has been executed, but the endpoint failed to return sparql results in JSON.'); } $is_semantic = false; } else { $query_uri = $endpoint . '?query=' . $q . '&format=' . urlencode("application/rdf+xml"); $response = $this->executeUri($query_uri, $endpoint_user, $endpoint_password); // Parse the triple response and retrieve the triples from them $result = new \EasyRdf_Graph(); $parser = new \EasyRdf_Parser_RdfXml(); $parser->parse($result, $response, 'rdfxml', null); $is_semantic = true; } // Create the data object to return $data = new Data(); $data->data = $result; if (!$limitInQuery) { $data->paging = $paging; } $data->is_semantic = $is_semantic; $data->preferred_formats = $this->getPreferredFormats(); \Log::info("SPARQL query: {$query}"); if ($is_semantic) { // Fetch the available namespaces and pass // them as a configuration of the semantic data result $ontologies = $this->ontologies->getAll(); $prefixes = array(); foreach ($ontologies as $ontology) { $prefixes[$ontology["prefix"]] = $ontology["uri"]; } $data->semantic = new \stdClass(); $data->semantic->conf = array('ns' => $prefixes); $data->preferred_formats = array('ttl', 'jsonld', 'rdf'); } // Determine which parameters were given in the query $matches = array(); $query_parameters = array(); preg_match_all("/\\\$\\{(.+?)\\}/", $source_definition['query'], $matches); if (!empty($matches[1])) { $matches = $matches[1]; foreach ($matches as $entry) { array_push($query_parameters, $entry); } } $data->optional_parameters = $query_parameters; return $data; }