Esempio n. 1
0
 public function readData($source_definition, $rest_parameters = array())
 {
     $uri = $source_definition['uri'];
     // Keep track of the prefix URI's
     $this->prefixes = array();
     // Check for caching
     if (Cache::has($uri)) {
         $data = Cache::get($uri);
     } else {
         // Fetch the data
         $data = @file_get_contents($uri);
         if (!empty($data)) {
             Cache::put($uri, $data, $source_definition['cache']);
         } else {
             $uri = $source_definition['uri'];
             \App::abort(500, "Cannot retrieve data from the XML file located on {$uri}.");
         }
     }
     $data_result = new Data();
     $data_result->data = $data;
     $data_result->semantic = $this->prefixes;
     $data_result->preferred_formats = $this->getPreferredFormats();
     if (!empty($source_definition['geo_formatted']) && $source_definition['geo_formatted']) {
         $data_result->geo_formatted = true;
         $data_result->preferred_formats = array('geojson', 'map', 'php');
     }
     return $data_result;
 }
Esempio n. 2
0
 public function readData($source_definition, $rest_parameters = array())
 {
     $uri = $source_definition['uri'];
     // Check for caching
     if (Cache::has($uri)) {
         $data = Cache::get($uri);
     } else {
         // Fetch the data
         $data = [];
         if (!filter_var($uri, FILTER_VALIDATE_URL) === false) {
             $ch = curl_init();
             curl_setopt($ch, CURLOPT_URL, $uri);
             curl_setopt($ch, CURLOPT_RETURNTRANSFER, true);
             $data = curl_exec($ch);
             curl_close($ch);
         } else {
             $data = @file_get_contents($uri);
         }
         if ($data) {
             Cache::put($uri, $data, $source_definition['cache']);
         } else {
             $uri = $source_definition['uri'];
             \App::abort(500, "Cannot retrieve data from the JSON file located on {$uri}.");
         }
     }
     $php_object = json_decode($data);
     $data_result = new Data();
     $data_result->data = $php_object;
     $data_result->preferred_formats = $this->getPreferredFormats();
     return $data_result;
 }
Esempio n. 3
0
 private function getPlainJson($uri)
 {
     $data = [];
     if (Cache::has($uri)) {
         return Cache::get($uri);
     }
     if (!filter_var($uri, FILTER_VALIDATE_URL) === false) {
         $parts = parse_url($uri);
         if ($parts['scheme'] != 'file') {
             $data = $this->getRemoteData($uri);
         } else {
             $data = @file_get_contents($uri);
         }
     } else {
         $data = @file_get_contents($uri);
     }
     if ($data) {
         Cache::put($uri, $data, $this->cache);
     } else {
         \App::abort(500, "Cannot retrieve data from the JSON file located on {$uri}.");
     }
     return $data;
 }
Esempio n. 4
0
 /**
  * Add triples to the graph and return it based on limit, offset and the SPARQL query
  *
  * @param string        $base_uri
  * @param EasyRdf_Graph $graph
  * @param int           $limit
  * @param int           $offset
  * @param integer       $depth     The depth the queries should have, handlers should not override this if given
  *
  * @return EasyRdf_Graph
  */
 public function addTriples($base_uri, $graph, $limit, $offset, $depth = null)
 {
     $total_triples = $graph->countTriples();
     // Iterate the sparql endpoints
     foreach ($this->sparql_repo->getAll() as $sparql_source) {
         $endpoint = $sparql_source['endpoint'];
         $pw = $sparql_source['endpoint_password'];
         $user = $sparql_source['endpoint_user'];
         $endpoint = rtrim($endpoint, '/');
         if (is_null($depth)) {
             $depth = $sparql_source['depth'];
         }
         $count_query = $this->query_builder->createCountQuery($base_uri, \Request::root(), $sparql_source['named_graph'], $depth);
         \Log::info("sparql : " . $count_query . " base uri : " . $base_uri);
         // Check if the response has been cached yet
         $cache_string = $this->buildCacheString($sparql_source['id'], $count_query);
         if (Cache::has($cache_string)) {
             $result = Cache::get($cache_string);
         } else {
             $count_query = urlencode($count_query);
             $count_query = str_replace("+", "%20", $count_query);
             $query_uri = $endpoint . '?query=' . $count_query . '&format=' . urlencode("application/sparql-results+json");
             // Make a request with the count query to the SPARQL endpoint
             $result = $this->executeUri($query_uri, array(), $user, $pw);
             Cache::put($cache_string, $result, 5);
         }
         $response = json_decode($result);
         if (!empty($response)) {
             $count = $response->results->bindings[0]->count->value;
             // If the amount of matching triples is higher than the offset
             // add them and update the offset, if not higher, then only update the offset
             if ($count > $offset) {
                 // Read the triples from the sparql endpoint
                 $query_limit = $limit - $total_triples;
                 $query = $this->query_builder->createFetchQuery($base_uri, \Request::root(), $sparql_source['named_graph'], $query_limit, $offset, $depth);
                 $query = urlencode($query);
                 $query = str_replace("+", "%20", $query);
                 $query_uri = $endpoint . '?query=' . $query . '&format=' . urlencode("application/rdf+xml");
                 // Check for caching
                 $cache_string = $this->buildCacheString($sparql_source['id'], $query_uri);
                 if (Cache::has($cache_string)) {
                     $result = Cache::get($cache_string);
                 } else {
                     $result = $this->executeUri($query_uri, array(), $user, $pw);
                 }
                 if (!empty($result) && $result[0] == '<') {
                     // Parse the triple response and retrieve the triples from them
                     $result_graph = new \EasyRdf_Graph();
                     $parser = new \EasyRdf_Parser_RdfXml();
                     $parser->parse($result_graph, $result, 'rdfxml', null);
                     $graph = $this->mergeGraph($graph, $result_graph);
                     $total_triples += $count - $offset;
                 } else {
                     $sparql_id = $sparql_source['id'];
                     \Log::error("Something went wrong while fetching the triples from the sparql source with id {$sparql_id}. The error was " . $result . ". The query was : " . $query_uri);
                 }
             } else {
                 // Update the offset
                 $offset -= $count;
             }
             if ($offset < 0) {
                 $offset = 0;
             }
         }
     }
     return $graph;
 }
Esempio n. 5
0
 /**
  * Retrieve a Data object identified by $uri
  *
  * @param string $uri The identifier that identifies a resource
  *
  * @return \Response
  */
 public function get($uri)
 {
     // Check permissions
     Auth::requirePermissions('dataset.view');
     // Split for an (optional) extension
     list($uri, $extension) = $this->processURI($uri);
     // Check for caching
     // Based on: URI / Rest parameters / Query parameters / Paging headers
     $cache_string = $uri;
     list($limit, $offset) = Pager::calculateLimitAndOffset();
     $cache_string .= '/limit=' . $limit . 'offset=' . $offset;
     $cache_string .= http_build_query(\Input::except('limit', 'offset', 'page', 'page_size'));
     $cache_string = sha1($cache_string);
     if (Cache::has($cache_string)) {
         return ContentNegotiator::getResponse(Cache::get($cache_string), $extension);
     } else {
         // Get definition
         $definition = $this->definition->getByIdentifier($uri);
         if ($definition) {
             // Get source definition
             $source_definition = $this->definition->getDefinitionSource($definition['source_id'], $definition['source_type']);
             if ($source_definition) {
                 $source_type = $source_definition['type'];
                 // Create the right datacontroller
                 $controller_class = 'Tdt\\Core\\DataControllers\\' . $source_type . 'Controller';
                 $data_controller = \App::make($controller_class);
                 // Get REST parameters
                 $rest_parameters = str_replace($definition['collection_uri'] . '/' . $definition['resource_name'], '', $uri);
                 $rest_parameters = ltrim($rest_parameters, '/');
                 $rest_parameters = explode('/', $rest_parameters);
                 if (empty($rest_parameters[0]) && !is_numeric($rest_parameters[0])) {
                     $rest_parameters = array();
                 }
                 // Retrieve dataobject from datacontroller
                 $data = $data_controller->readData($source_definition, $rest_parameters);
                 $data->rest_parameters = $rest_parameters;
                 // REST filtering
                 if ($source_type != 'INSTALLED' && count($data->rest_parameters) > 0) {
                     $data->data = self::applyRestFilter($data->data, $data->rest_parameters);
                 }
                 // Add definition to the object
                 $data->definition = $definition;
                 // Add source definition to the object
                 $data->source_definition = $source_definition;
                 // Add the available, supported formats to the object
                 $format_helper = new FormatHelper();
                 $data->formats = $format_helper->getAvailableFormats($data);
                 // Store in cache
                 Cache::put($cache_string, $data, $source_definition['cache']);
                 // Return the formatted response with content negotiation
                 return ContentNegotiator::getResponse($data, $extension);
             } else {
                 \App::abort(404, "Source for the definition could not be found.");
             }
         } else {
             // Coulnd't find a definition, but it might be a collection
             $resources = $this->definition->getByCollection($uri);
             if (count($resources) > 0) {
                 $data = new Data();
                 $data->data = new \stdClass();
                 $data->data->datasets = array();
                 $data->data->collections = array();
                 foreach ($resources as $res) {
                     // Check if it's a subcollection or a dataset
                     $collection_uri = rtrim($res['collection_uri'], '/');
                     if ($collection_uri == $uri) {
                         array_push($data->data->datasets, \URL::to($collection_uri . '/' . $res['resource_name']));
                     } else {
                         // Push the subcollection if it's not already in the array
                         if (!in_array(\URL::to($collection_uri), $data->data->collections)) {
                             array_push($data->data->collections, \URL::to($collection_uri));
                         }
                     }
                 }
                 // Fake a definition
                 $data->definition = new \Definition();
                 $uri_array = explode('/', $uri);
                 $last_chunk = array_pop($uri_array);
                 $data->definition->collection_uri = join('/', $uri_array);
                 $data->definition->resource_name = $last_chunk;
                 // Return the formatted response with content negotiation
                 return ContentNegotiator::getResponse($data, $extension);
             } else {
                 \App::abort(404, "The dataset or collection you were looking for could not be found (URI: {$uri}).");
             }
         }
     }
 }
Esempio n. 6
0
 /**
  * Retrieve a Data object identified by $uri
  *
  * @param string $uri The identifier that identifies a resource
  *
  * @return \Response
  */
 public function get($uri)
 {
     // Check permissions
     Auth::requirePermissions('dataset.view');
     // Split for an (optional) extension
     list($uri, $extension) = $this->processURI($uri);
     // Check for caching
     // Based on: URI / Rest parameters / Query parameters / Paging headers
     $cache_string = $uri;
     list($limit, $offset) = Pager::calculateLimitAndOffset();
     $cache_string .= '/limit=' . $limit . 'offset=' . $offset;
     $omit = ['limit', 'offset', 'page', 'page_size'];
     $query_string_params = \Input::get();
     foreach ($query_string_params as $key => $val) {
         if (in_array($key, $omit)) {
             unset($query_string_params[$key]);
         }
     }
     $cache_string .= http_build_query($query_string_params);
     $cache_string = sha1($cache_string);
     if (Cache::has($cache_string)) {
         return ContentNegotiator::getResponse(Cache::get($cache_string), $extension);
     } else {
         // Get definition
         $definition = $this->definition->getByIdentifier($uri);
         if ($definition) {
             // Get source definition
             $source_definition = $this->definition->getDefinitionSource($definition['source_id'], $definition['source_type']);
             if ($source_definition) {
                 $source_type = $source_definition['type'];
                 // Create the right datacontroller
                 $controller_class = 'Tdt\\Core\\DataControllers\\' . $source_type . 'Controller';
                 $data_controller = \App::make($controller_class);
                 // Get REST parameters
                 $uri_segments = explode('/', $uri);
                 $rest_parameters = array_diff($uri_segments, array($definition['collection_uri'], $definition['resource_name']));
                 $rest_parameters = array_values($rest_parameters);
                 $throttle_response = $this->applyThrottle($definition);
                 if (!empty($throttle_response)) {
                     return $throttle_response;
                 }
                 // Retrieve dataobject from datacontroller
                 $data = $data_controller->readData($source_definition, $rest_parameters);
                 // If the source type is XML, just return the XML contents, don't transform
                 if (strtolower($source_type) == 'xml' && $extension == 'xml') {
                     return $this->createXMLResponse($data->data);
                 }
                 $data->rest_parameters = $rest_parameters;
                 // REST filtering
                 if ($source_type != 'INSTALLED' && count($data->rest_parameters) > 0) {
                     $data->data = self::applyRestFilter($data->data, $data->rest_parameters);
                 }
                 // Semantic paging with the hydra voc
                 if ($data->is_semantic && !empty($data->paging)) {
                     \EasyRdf_Namespace::set('hydra', 'http://www.w3.org/ns/hydra/core#');
                     $graph = $data->data;
                     $url = \URL::to($definition['collection_uri'] . '/' . $definition['resource_name']);
                     $request_url = \Request::url();
                     $graph->addResource($request_url, 'void:subset', $url);
                     foreach ($data->paging as $key => $val) {
                         $paged_url = $request_url . '?offset=' . $val[0] . '&limit=' . $val[1] . Pager::buildQuerystring();
                         switch ($key) {
                             case 'next':
                                 $graph->addResource($request_url, 'hydra:nextPage', $paged_url);
                                 break;
                             case 'previous':
                                 $graph->addResource($request_url, 'hydra:previousPage', $paged_url);
                                 break;
                             case 'last':
                                 $graph->addResource($request_url, 'hydra:lastPage', $paged_url);
                                 break;
                             case 'first':
                                 $graph->addResource($request_url, 'hydra:firstPage', $paged_url);
                                 break;
                         }
                     }
                     $graph->addResource($url, 'a', 'dcat:Dataset');
                     $title = null;
                     if (!empty($definition['title'])) {
                         $title = $definition['title'];
                     } else {
                         $title = $definition['collection_uri'] . '/' . $definition['resource_name'];
                     }
                     $graph->addLiteral($url, 'dc:title', $title);
                     $graph->addLiteral($url, 'dc:description', $source_definition['description']);
                     $graph->addResource($url, 'dcat:distribution', $url . '.json');
                     $data->data = $graph;
                 }
                 // Add definition to the object
                 $data->definition = $definition;
                 // Add source definition to the object
                 $data->source_definition = $source_definition;
                 // Add the available, supported formats to the object
                 $format_helper = new FormatHelper();
                 $data->formats = $format_helper->getAvailableFormats($data);
                 // Store in cache
                 Cache::put($cache_string, $data, $source_definition['cache']);
                 // Return the formatted response with content negotiation
                 return ContentNegotiator::getResponse($data, $extension);
             } else {
                 \App::abort(404, "Source for the definition could not be found.");
             }
         } else {
             // Coulnd't find a definition, but it might be a collection
             $resources = $this->definition->getByCollection($uri);
             if (count($resources) > 0) {
                 $data = new Data();
                 $data->data = new \stdClass();
                 $data->data->datasets = array();
                 $data->data->collections = array();
                 foreach ($resources as $res) {
                     // Check if it's a subcollection or a dataset
                     $collection_uri = rtrim($res['collection_uri'], '/');
                     if ($collection_uri == $uri) {
                         array_push($data->data->datasets, \URL::to($collection_uri . '/' . $res['resource_name']));
                     } else {
                         // Push the subcollection if it's not already in the array
                         if (!in_array(\URL::to($collection_uri), $data->data->collections)) {
                             array_push($data->data->collections, \URL::to($collection_uri));
                         }
                     }
                 }
                 // Fake a definition
                 $data->definition = new \Definition();
                 $uri_array = explode('/', $uri);
                 $last_chunk = array_pop($uri_array);
                 $data->definition->collection_uri = join('/', $uri_array);
                 $data->definition->resource_name = $last_chunk;
                 // Return the formatted response with content negotiation
                 return ContentNegotiator::getResponse($data, $extension);
             } else {
                 \App::abort(404, "The dataset or collection you were looking for could not be found (URI: {$uri}).");
             }
         }
     }
 }
Esempio n. 7
0
 /**
  * Resolve a graph pattern query (/all route)
  *
  * @param string $format The format of the request
  *
  * @return \Response
  */
 public function solveQuery($format = null)
 {
     $data;
     if (!empty($format)) {
         $format = ltrim($format, '.');
     }
     // Ignore the rest of the uri after /all and work with the request parameters as they were given
     $cache_string = sha1($this->getRawRequestURI(\Request::root()));
     // Check if the response to the query has been cached already
     if (Cache::has($cache_string)) {
         $data = Cache::get($cache_string);
     } else {
         // Get the graph pattern query string parameters from the request
         list($s, $p, $o) = $this->getTemplateParameters();
         // Pass them to our sparql query builder
         SparqlQueryBuilder::setParameters(array($s, $p, $o));
         $base_uri = null;
         // If no parameter has been filled in, the URI we have to match triples with is the root of our application
         if ($s == '?s' && $p == '?p' && $o == '?o') {
             $base_uri = \Request::root();
         }
         // Fetch matching triples
         $result = $this->triples->getTriples($base_uri, \Request::get('limit', 100), \Request::get('offset', 0));
         // If the graph contains no triples, then the graph pattern couldn't resolve to anything, 404 it is
         if ($result->countTriples() == 0) {
             \App::abort(404, "The resource couldn't be found, nor dereferenced.");
         }
         $definition = array('resource_name' => "all", 'collection_uri' => "");
         $source_definition = array('description' => 'Semantic data collected out the configured semantic data sources.', 'type' => 'Semantic');
         $data = new Data();
         $data->definition = $definition;
         $data->source_definition = $source_definition;
         $data->data = $result;
         $data->is_semantic = true;
         // Add the available, supported formats to the object
         $format_helper = new FormatHelper();
         $data->formats = $format_helper->getAvailableFormats($data);
         // Store in cache for a default of 5 minutes
         Cache::put($cache_string, $data, 5);
     }
     // Add the hydra namespace, it's not present in the easy rdf namespaces by default
     \EasyRdf_Namespace::set('hydra', 'http://www.w3.org/ns/hydra/core#');
     // Return the formatted response with content negotiation
     $response = ContentNegotiator::getResponse($data, $format);
     // Pass a Vary header so that browsers know they have to take the accept header
     // into consideration when they apply caching client side
     $response->header('Vary', 'Accept');
     // Allow CORS
     $response->header('Access-Control-Allow-Origin', '*');
     return $response;
 }
Esempio n. 8
0
 /**
  * Add triples to the graph and return it based on limit, offset and the SPARQL query
  *
  * @param string        $base_uri
  * @param EasyRdf_Graph $graph
  * @param int           $limit
  * @param int           $offset
  *
  * @return EasyRdf_Graph
  */
 public function addTriples($base_uri, $graph, $limit, $offset)
 {
     $total_triples = $graph->countTriples();
     // Iterate the LDF end points, note that ldf servers don't necessarily have page size's as a parameter
     // But rather have a fixed page size
     foreach ($this->ldf_repo->getAll() as $ldf_conf) {
         // Build the query string (raw)
         $query_string = $_SERVER['QUERY_STRING'];
         $q_string_raw = '';
         $query_parts = explode('&', $query_string);
         // Don't let paging parameters in the re-constructed query string
         $invalid_q_string = array('page', 'page_size', 'limit', 'offset');
         foreach ($query_parts as $part) {
             if (!empty($part)) {
                 $couple = explode('=', $part);
                 if (!in_array($couple[0], $invalid_q_string)) {
                     $q_string_raw .= $couple[0] . '=' . $couple[1] . '&';
                 }
             }
         }
         $q_string_raw = rtrim($q_string_raw, '&');
         $start_fragment = $ldf_conf['startfragment'];
         $entire_fragment = $start_fragment . '?' . $q_string_raw;
         $entire_fragment = rtrim($entire_fragment, '?');
         // Make the LDF query (basic GET to the endpoint, should provide us with a hydra:totalItems or void:triples entry)
         $accept = array("Accept: text/turtle,*/*;q=0.0");
         $response = '';
         if (Cache::has($entire_fragment)) {
             $response = Cache::get($entire_fragment);
         } else {
             $response = $this->executeUri($entire_fragment, $accept);
         }
         if ($response) {
             // Try decoding it into turtle, if not something is wrong with the response body
             try {
                 $tmp_graph = new \EasyRdf_Graph();
                 $parser = new \EasyRdf_Parser_Turtle();
                 \EasyRdf_Namespace::set('hydra', 'http://www.w3.org/ns/hydra/core#');
                 $parser->parse($tmp_graph, $response, 'turtle', null);
                 // Fetch the count (hydra:totalItems or void:triples)
                 $count = $tmp_graph->getLiteral($entire_fragment, 'hydra:totalItems');
                 $page_size = $tmp_graph->getLiteral($entire_fragment, 'hydra:itemsPerPage');
                 if (is_null($count)) {
                     $count = $tmp_graph->getLiteral($entire_fragment, 'void:triples');
                 }
                 if (is_null($count) || is_null($page_size)) {
                     // Skip, the count has not been found on this endpoint
                     $count = -1;
                     \Log::warning("An LDF endpoint's count could not be retrieved from the uri: {$entire_fragment}");
                 } else {
                     $count = $count->getValue();
                     $page_size = $page_size->getValue();
                     Cache::put($entire_fragment, $response, 5);
                 }
                 // If the amount of matching triples is higher than the offset
                 // add them and update the offset, if not higher, then only update the offset
                 if ($count > $offset) {
                     // Read the triples from the LDF
                     $query_limit = $limit - $total_triples;
                     // There's no way of giving along the page size (not that we can presume)
                     // So we have to make a numer of requests
                     $amount_of_requests = ceil($query_limit / $page_size);
                     // Calculate the page offset from the offset parameter
                     $page_offset = ceil($offset / $page_size);
                     for ($i = $page_offset; $i < $amount_of_requests + $page_offset; $i++) {
                         $paged_fragment = $entire_fragment;
                         if (!empty($q_string_raw)) {
                             $paged_fragment .= '&page=' . $i;
                         } else {
                             $paged_fragment .= '?page=' . $i;
                         }
                         // Ask for turtle
                         $accept = array('Accept: text/turtle');
                         $response = '';
                         if (Cache::has($paged_fragment)) {
                             $response = Cache::get($paged_fragment);
                         } else {
                             $response = $this->executeUri($paged_fragment, $accept);
                         }
                         if ($response) {
                             // Try decoding it into turtle, if not something is wrong with the response body
                             try {
                                 $tmp_graph = new \EasyRdf_Graph();
                                 $parser = new \EasyRdf_Parser_Turtle();
                                 $parser->parse($tmp_graph, $response, 'turtle', $start_fragment);
                                 // Fetch the count (hydra:totalItems or void:triples)
                                 $total_items = $tmp_graph->getLiteral($paged_fragment, 'hydra:totalItems');
                                 if (is_null($total_items)) {
                                     $total_items = $tmp_graph->getLiteral($paged_fragment, 'void:triples');
                                 }
                                 if (!is_null($total_items)) {
                                     Cache::put($paged_fragment, $tmp_graph, 5);
                                     // This needs to be a function of a different helper class for LDF endpoints
                                     $tmp_graph = $this->rebaseGraph($start_fragment, $tmp_graph);
                                     $graph = $this->mergeGraph($graph, $tmp_graph);
                                     $total_triples += $page_size;
                                 }
                             } catch (\EasyRdf_Parser_Exception $ex) {
                                 \Log::error("Failed to parse turtle content from the LDF endpoint: {$start_fragment}");
                             }
                         } else {
                             \Log::error("Something went wrong while fetching the triples from a LDF source. The error was " . $response . ". The query was : " . $paged_fragment);
                         }
                     }
                 } else {
                     // Update the offset
                     $offset -= $count;
                 }
                 if ($offset < 0) {
                     $offset = 0;
                 }
             } catch (\EasyRdf_Parser_Exception $ex) {
                 \Log::error("Failed to parse turtle content from the LDF endpoint: {$endpoint}");
             }
         } else {
             \Log::warning("Couldn't fetch a proper response for the fragment: {$entire_fragment}.");
         }
     }
     return $graph;
 }