Example #1
0
 public function readData($source_definition, $rest_parameters = array())
 {
     // Fetch the URI of the rdf file
     $uri = $source_definition['uri'];
     $format = strtolower($source_definition['format']);
     $content = file_get_contents($uri);
     // Try parsing the contents of the rdf file
     $graph = new \EasyRdf_Graph();
     $parser;
     if ($format == 'turtle') {
         $parser = new \EasyRdf_Parser_Turtle();
     } elseif ($format == 'xml') {
         // EasyRdf identifies rdfxml with rdf, not with xml as a format
         $format = 'rdfxml';
         $parser = new \EasyRdf_Parser_RdfXml();
     } else {
         \App::abort(500, "The format you added, {$format}, is not supported. The supported formats are turtle and xml.");
     }
     $triples_added = $parser->parse($graph, $content, $format, '');
     $data = new Data();
     $data->data = $graph;
     $data->is_semantic = true;
     $data->preferred_formats = $this->getPreferredFormats();
     return $data;
 }
 /**
  * @see https://github.com/njh/easyrdf/issues/157
  */
 public function testIssue157()
 {
     $filename = 'rdfxml/gh157-base.rdf';
     $graph = new EasyRdf_Graph();
     $triple_count = $this->parser->parse($graph, readFixture($filename), 'rdfxml', null);
     foreach ($graph->toRdfPhp() as $iri => $properies) {
         $this->assertEquals('http://www.example.org/base#foo', $iri);
     }
 }
Example #3
0
 /**
  * Add triples to the graph and return it based on limit, offset and the SPARQL query
  *
  * @param string        $base_uri
  * @param EasyRdf_Graph $graph
  * @param int           $limit
  * @param int           $offset
  * @param integer       $depth     The depth the queries should have, handlers should not override this if given
  *
  * @return EasyRdf_Graph
  */
 public function addTriples($base_uri, $graph, $limit, $offset, $depth = null)
 {
     $total_triples = $graph->countTriples();
     // Iterate the sparql endpoints
     foreach ($this->sparql_repo->getAll() as $sparql_source) {
         $endpoint = $sparql_source['endpoint'];
         $pw = $sparql_source['endpoint_password'];
         $user = $sparql_source['endpoint_user'];
         $endpoint = rtrim($endpoint, '/');
         if (is_null($depth)) {
             $depth = $sparql_source['depth'];
         }
         $count_query = $this->query_builder->createCountQuery($base_uri, \Request::root(), $sparql_source['named_graph'], $depth);
         \Log::info("sparql : " . $count_query . " base uri : " . $base_uri);
         // Check if the response has been cached yet
         $cache_string = $this->buildCacheString($sparql_source['id'], $count_query);
         if (Cache::has($cache_string)) {
             $result = Cache::get($cache_string);
         } else {
             $count_query = urlencode($count_query);
             $count_query = str_replace("+", "%20", $count_query);
             $query_uri = $endpoint . '?query=' . $count_query . '&format=' . urlencode("application/sparql-results+json");
             // Make a request with the count query to the SPARQL endpoint
             $result = $this->executeUri($query_uri, array(), $user, $pw);
             Cache::put($cache_string, $result, 5);
         }
         $response = json_decode($result);
         if (!empty($response)) {
             $count = $response->results->bindings[0]->count->value;
             // If the amount of matching triples is higher than the offset
             // add them and update the offset, if not higher, then only update the offset
             if ($count > $offset) {
                 // Read the triples from the sparql endpoint
                 $query_limit = $limit - $total_triples;
                 $query = $this->query_builder->createFetchQuery($base_uri, \Request::root(), $sparql_source['named_graph'], $query_limit, $offset, $depth);
                 $query = urlencode($query);
                 $query = str_replace("+", "%20", $query);
                 $query_uri = $endpoint . '?query=' . $query . '&format=' . urlencode("application/rdf+xml");
                 // Check for caching
                 $cache_string = $this->buildCacheString($sparql_source['id'], $query_uri);
                 if (Cache::has($cache_string)) {
                     $result = Cache::get($cache_string);
                 } else {
                     $result = $this->executeUri($query_uri, array(), $user, $pw);
                 }
                 if (!empty($result) && $result[0] == '<') {
                     // Parse the triple response and retrieve the triples from them
                     $result_graph = new \EasyRdf_Graph();
                     $parser = new \EasyRdf_Parser_RdfXml();
                     $parser->parse($result_graph, $result, 'rdfxml', null);
                     $graph = $this->mergeGraph($graph, $result_graph);
                     $total_triples += $count - $offset;
                 } else {
                     $sparql_id = $sparql_source['id'];
                     \Log::error("Something went wrong while fetching the triples from the sparql source with id {$sparql_id}. The error was " . $result . ". The query was : " . $query_uri);
                 }
             } else {
                 // Update the offset
                 $offset -= $count;
             }
             if ($offset < 0) {
                 $offset = 0;
             }
         }
     }
     return $graph;
 }
Example #4
0
 public function readData($source_definition, $rest_parameters = array())
 {
     $endpoint = $source_definition['endpoint'];
     $endpoint_user = $source_definition['endpoint_user'];
     $endpoint_password = $source_definition['endpoint_password'];
     $query = $source_definition['query'];
     $limitInQuery = false;
     //Check if the query is already paged.
     if (stripos($source_definition['query'], 'limit')) {
         $limitInQuery = true;
     } else {
         list($limit, $offset) = Pager::calculateLimitAndOffset();
         // Sparql endpoints often have a built in limit on the amount of rows that they return
         // Avoid problems by capping the given limit by the Pager class
         if ($limit > self::$MAX_LIMIT) {
             $limit = self::$MAX_LIMIT;
         }
     }
     // Retrieve the necessary variables to read from a SPARQL endpoint
     $uri = \Request::url();
     // Process the if and ifnot-statements in the query
     $query = $this->processLogicalStatements($query);
     // Process the parameters in the uri (to catch hashtag values for example)
     $query = $this->processParameters($query);
     // Create a count query for paging purposes, this assumes that a where clause is included in the query
     // Note that the where "clause" is obligatory but it's not mandatory it is preceded by a WHERE keyword
     $matches = array();
     $keyword = "";
     // If a select statement has been passed, we ask for JSON results
     // If a construct statement has been passed, we ask for RDF/XML
     if (stripos($query, "select") !== false) {
         // SELECT query
         $keyword = "select";
     } elseif (stripos($query, "construct") !== false) {
         // CONSTRUCT query
         $keyword = "construct";
     } else {
         // No valid SPARQL keyword has been found, is checked during validation
         \App::abort(500, "No CONSTRUCT or SELECT statement has been found in the given query: {$query}");
     }
     // Prepare the count query for paging purposes
     // This implies the removal of the select or construct statement
     // and only using the where statement
     // Make a distinction between select and construct since
     // construct will be followed by a {} sequence, whereas a select statement will not
     $prefix = '';
     $filter = '';
     // Covers FROM <...> FROM <...> WHERE{ } , FROM <...> FROM <...> { }, WHERE { }, { }
     $where_clause = '(.*?(FROM.+?{.+})|.*?(WHERE.*{.+})|.*?({.+}))[a-zA-Z0-9]*?';
     $matches = array();
     if ($keyword == 'select') {
         $regex = $keyword . $where_clause;
         preg_match_all("/(.*?){$regex}/msi", $query, $matches);
     } else {
         preg_match_all("/(.*?){$keyword}(\\s*\\{[^{]+\\}){$where_clause}/mis", $query, $matches);
     }
     $prefix = $matches[1][0];
     $filter = "";
     // Preg match all has 3 entries for the where clause, pick the first hit
     if (!empty($matches[3][0])) {
         $filter = $matches[3][0];
     }
     if (!empty($matches[4][0])) {
         $filter = $matches[4][0];
     }
     if (!empty($matches[5][0])) {
         $filter = $matches[5][0];
     }
     if (empty($filter)) {
         \App::abort(500, "Failed to retrieve the where clause from the query: {$query}");
     }
     if (!$limitInQuery) {
         // Prepare the query to count results
         $count_query = $matches[1][0] . ' SELECT (count(*) AS ?count) ' . $filter;
         $count_query = urlencode($count_query);
         $count_query = str_replace("+", "%20", $count_query);
         $count_uri = $endpoint . '?query=' . $count_query . '&format=' . urlencode("application/sparql-results+json");
         $response = $this->executeUri($count_uri, $endpoint_user, $endpoint_password);
         $response = json_decode($response);
         // If something goes wrong, the resonse will either be null or false
         if (!$response) {
             \App::abort(500, "Something went wrong while executing the count query. The assembled URI was: {$count_uri}");
         }
         $count = $response->results->bindings[0]->count->value;
         // Calculate page link headers, previous, next and last based on the count from the previous query
         $paging = Pager::calculatePagingHeaders($limit, $offset, $count);
     }
     $query = $source_definition['query'];
     $query = $this->processLogicalStatements($query);
     $query = $this->processParameters($query);
     if (!$limitInQuery) {
         if (!empty($offset)) {
             $query = $query . " OFFSET {$offset} ";
         }
         if (!empty($limit)) {
             $query = $query . " LIMIT {$limit}";
         }
     }
     // Prepare the query with proper encoding for the request
     $query = str_replace('%23', '#', $query);
     $q = urlencode($query);
     $q = str_replace("+", "%20", $q);
     if ($keyword == 'select') {
         $query_uri = $endpoint . '?query=' . $q . '&format=' . urlencode("application/sparql-results+json");
         $response = $this->executeUri($query_uri, $endpoint_user, $endpoint_password);
         $result = json_decode($response);
         if (!$result) {
             \App::abort(500, 'The query has been executed, but the endpoint failed to return sparql results in JSON.');
         }
         $is_semantic = false;
     } else {
         $query_uri = $endpoint . '?query=' . $q . '&format=' . urlencode("application/rdf+xml");
         $response = $this->executeUri($query_uri, $endpoint_user, $endpoint_password);
         // Parse the triple response and retrieve the triples from them
         $result = new \EasyRdf_Graph();
         $parser = new \EasyRdf_Parser_RdfXml();
         $parser->parse($result, $response, 'rdfxml', null);
         $is_semantic = true;
     }
     // Create the data object to return
     $data = new Data();
     $data->data = $result;
     if (!$limitInQuery) {
         $data->paging = $paging;
     }
     $data->is_semantic = $is_semantic;
     $data->preferred_formats = $this->getPreferredFormats();
     \Log::info("SPARQL query: {$query}");
     if ($is_semantic) {
         // Fetch the available namespaces and pass
         // them as a configuration of the semantic data result
         $ontologies = $this->ontologies->getAll();
         $prefixes = array();
         foreach ($ontologies as $ontology) {
             $prefixes[$ontology["prefix"]] = $ontology["uri"];
         }
         $data->semantic = new \stdClass();
         $data->semantic->conf = array('ns' => $prefixes);
         $data->preferred_formats = array('ttl', 'jsonld', 'rdf');
     }
     // Determine which parameters were given in the query
     $matches = array();
     $query_parameters = array();
     preg_match_all("/\\\$\\{(.+?)\\}/", $source_definition['query'], $matches);
     if (!empty($matches[1])) {
         $matches = $matches[1];
         foreach ($matches as $entry) {
             array_push($query_parameters, $entry);
         }
     }
     $data->optional_parameters = $query_parameters;
     return $data;
 }