public function readData($source_definition, $rest_parameters = array()) { $uri = $source_definition['uri']; // Keep track of the prefix URI's $this->prefixes = array(); // Check for caching if (Cache::has($uri)) { $data = Cache::get($uri); } else { // Fetch the data $data = @file_get_contents($uri); if (!empty($data)) { Cache::put($uri, $data, $source_definition['cache']); } else { $uri = $source_definition['uri']; \App::abort(500, "Cannot retrieve data from the XML file located on {$uri}."); } } $data_result = new Data(); $data_result->data = $data; $data_result->semantic = $this->prefixes; $data_result->preferred_formats = $this->getPreferredFormats(); if (!empty($source_definition['geo_formatted']) && $source_definition['geo_formatted']) { $data_result->geo_formatted = true; $data_result->preferred_formats = array('geojson', 'map', 'php'); } return $data_result; }
public function readData($source_definition, $rest_parameters = array()) { $uri = $source_definition['uri']; // Check for caching if (Cache::has($uri)) { $data = Cache::get($uri); } else { // Fetch the data $data = []; if (!filter_var($uri, FILTER_VALIDATE_URL) === false) { $ch = curl_init(); curl_setopt($ch, CURLOPT_URL, $uri); curl_setopt($ch, CURLOPT_RETURNTRANSFER, true); $data = curl_exec($ch); curl_close($ch); } else { $data = @file_get_contents($uri); } if ($data) { Cache::put($uri, $data, $source_definition['cache']); } else { $uri = $source_definition['uri']; \App::abort(500, "Cannot retrieve data from the JSON file located on {$uri}."); } } $php_object = json_decode($data); $data_result = new Data(); $data_result->data = $php_object; $data_result->preferred_formats = $this->getPreferredFormats(); return $data_result; }
private function getPlainJson($uri) { $data = []; if (Cache::has($uri)) { return Cache::get($uri); } if (!filter_var($uri, FILTER_VALIDATE_URL) === false) { $parts = parse_url($uri); if ($parts['scheme'] != 'file') { $data = $this->getRemoteData($uri); } else { $data = @file_get_contents($uri); } } else { $data = @file_get_contents($uri); } if ($data) { Cache::put($uri, $data, $this->cache); } else { \App::abort(500, "Cannot retrieve data from the JSON file located on {$uri}."); } return $data; }
/** * Add triples to the graph and return it based on limit, offset and the SPARQL query * * @param string $base_uri * @param EasyRdf_Graph $graph * @param int $limit * @param int $offset * @param integer $depth The depth the queries should have, handlers should not override this if given * * @return EasyRdf_Graph */ public function addTriples($base_uri, $graph, $limit, $offset, $depth = null) { $total_triples = $graph->countTriples(); // Iterate the sparql endpoints foreach ($this->sparql_repo->getAll() as $sparql_source) { $endpoint = $sparql_source['endpoint']; $pw = $sparql_source['endpoint_password']; $user = $sparql_source['endpoint_user']; $endpoint = rtrim($endpoint, '/'); if (is_null($depth)) { $depth = $sparql_source['depth']; } $count_query = $this->query_builder->createCountQuery($base_uri, \Request::root(), $sparql_source['named_graph'], $depth); \Log::info("sparql : " . $count_query . " base uri : " . $base_uri); // Check if the response has been cached yet $cache_string = $this->buildCacheString($sparql_source['id'], $count_query); if (Cache::has($cache_string)) { $result = Cache::get($cache_string); } else { $count_query = urlencode($count_query); $count_query = str_replace("+", "%20", $count_query); $query_uri = $endpoint . '?query=' . $count_query . '&format=' . urlencode("application/sparql-results+json"); // Make a request with the count query to the SPARQL endpoint $result = $this->executeUri($query_uri, array(), $user, $pw); Cache::put($cache_string, $result, 5); } $response = json_decode($result); if (!empty($response)) { $count = $response->results->bindings[0]->count->value; // If the amount of matching triples is higher than the offset // add them and update the offset, if not higher, then only update the offset if ($count > $offset) { // Read the triples from the sparql endpoint $query_limit = $limit - $total_triples; $query = $this->query_builder->createFetchQuery($base_uri, \Request::root(), $sparql_source['named_graph'], $query_limit, $offset, $depth); $query = urlencode($query); $query = str_replace("+", "%20", $query); $query_uri = $endpoint . '?query=' . $query . '&format=' . urlencode("application/rdf+xml"); // Check for caching $cache_string = $this->buildCacheString($sparql_source['id'], $query_uri); if (Cache::has($cache_string)) { $result = Cache::get($cache_string); } else { $result = $this->executeUri($query_uri, array(), $user, $pw); } if (!empty($result) && $result[0] == '<') { // Parse the triple response and retrieve the triples from them $result_graph = new \EasyRdf_Graph(); $parser = new \EasyRdf_Parser_RdfXml(); $parser->parse($result_graph, $result, 'rdfxml', null); $graph = $this->mergeGraph($graph, $result_graph); $total_triples += $count - $offset; } else { $sparql_id = $sparql_source['id']; \Log::error("Something went wrong while fetching the triples from the sparql source with id {$sparql_id}. The error was " . $result . ". The query was : " . $query_uri); } } else { // Update the offset $offset -= $count; } if ($offset < 0) { $offset = 0; } } } return $graph; }
/** * Retrieve a Data object identified by $uri * * @param string $uri The identifier that identifies a resource * * @return \Response */ public function get($uri) { // Check permissions Auth::requirePermissions('dataset.view'); // Split for an (optional) extension list($uri, $extension) = $this->processURI($uri); // Check for caching // Based on: URI / Rest parameters / Query parameters / Paging headers $cache_string = $uri; list($limit, $offset) = Pager::calculateLimitAndOffset(); $cache_string .= '/limit=' . $limit . 'offset=' . $offset; $cache_string .= http_build_query(\Input::except('limit', 'offset', 'page', 'page_size')); $cache_string = sha1($cache_string); if (Cache::has($cache_string)) { return ContentNegotiator::getResponse(Cache::get($cache_string), $extension); } else { // Get definition $definition = $this->definition->getByIdentifier($uri); if ($definition) { // Get source definition $source_definition = $this->definition->getDefinitionSource($definition['source_id'], $definition['source_type']); if ($source_definition) { $source_type = $source_definition['type']; // Create the right datacontroller $controller_class = 'Tdt\\Core\\DataControllers\\' . $source_type . 'Controller'; $data_controller = \App::make($controller_class); // Get REST parameters $rest_parameters = str_replace($definition['collection_uri'] . '/' . $definition['resource_name'], '', $uri); $rest_parameters = ltrim($rest_parameters, '/'); $rest_parameters = explode('/', $rest_parameters); if (empty($rest_parameters[0]) && !is_numeric($rest_parameters[0])) { $rest_parameters = array(); } // Retrieve dataobject from datacontroller $data = $data_controller->readData($source_definition, $rest_parameters); $data->rest_parameters = $rest_parameters; // REST filtering if ($source_type != 'INSTALLED' && count($data->rest_parameters) > 0) { $data->data = self::applyRestFilter($data->data, $data->rest_parameters); } // Add definition to the object $data->definition = $definition; // Add source definition to the object $data->source_definition = $source_definition; // Add the available, supported formats to the object $format_helper = new FormatHelper(); $data->formats = $format_helper->getAvailableFormats($data); // Store in cache Cache::put($cache_string, $data, $source_definition['cache']); // Return the formatted response with content negotiation return ContentNegotiator::getResponse($data, $extension); } else { \App::abort(404, "Source for the definition could not be found."); } } else { // Coulnd't find a definition, but it might be a collection $resources = $this->definition->getByCollection($uri); if (count($resources) > 0) { $data = new Data(); $data->data = new \stdClass(); $data->data->datasets = array(); $data->data->collections = array(); foreach ($resources as $res) { // Check if it's a subcollection or a dataset $collection_uri = rtrim($res['collection_uri'], '/'); if ($collection_uri == $uri) { array_push($data->data->datasets, \URL::to($collection_uri . '/' . $res['resource_name'])); } else { // Push the subcollection if it's not already in the array if (!in_array(\URL::to($collection_uri), $data->data->collections)) { array_push($data->data->collections, \URL::to($collection_uri)); } } } // Fake a definition $data->definition = new \Definition(); $uri_array = explode('/', $uri); $last_chunk = array_pop($uri_array); $data->definition->collection_uri = join('/', $uri_array); $data->definition->resource_name = $last_chunk; // Return the formatted response with content negotiation return ContentNegotiator::getResponse($data, $extension); } else { \App::abort(404, "The dataset or collection you were looking for could not be found (URI: {$uri})."); } } } }
/** * Retrieve a Data object identified by $uri * * @param string $uri The identifier that identifies a resource * * @return \Response */ public function get($uri) { // Check permissions Auth::requirePermissions('dataset.view'); // Split for an (optional) extension list($uri, $extension) = $this->processURI($uri); // Check for caching // Based on: URI / Rest parameters / Query parameters / Paging headers $cache_string = $uri; list($limit, $offset) = Pager::calculateLimitAndOffset(); $cache_string .= '/limit=' . $limit . 'offset=' . $offset; $omit = ['limit', 'offset', 'page', 'page_size']; $query_string_params = \Input::get(); foreach ($query_string_params as $key => $val) { if (in_array($key, $omit)) { unset($query_string_params[$key]); } } $cache_string .= http_build_query($query_string_params); $cache_string = sha1($cache_string); if (Cache::has($cache_string)) { return ContentNegotiator::getResponse(Cache::get($cache_string), $extension); } else { // Get definition $definition = $this->definition->getByIdentifier($uri); if ($definition) { // Get source definition $source_definition = $this->definition->getDefinitionSource($definition['source_id'], $definition['source_type']); if ($source_definition) { $source_type = $source_definition['type']; // Create the right datacontroller $controller_class = 'Tdt\\Core\\DataControllers\\' . $source_type . 'Controller'; $data_controller = \App::make($controller_class); // Get REST parameters $uri_segments = explode('/', $uri); $rest_parameters = array_diff($uri_segments, array($definition['collection_uri'], $definition['resource_name'])); $rest_parameters = array_values($rest_parameters); $throttle_response = $this->applyThrottle($definition); if (!empty($throttle_response)) { return $throttle_response; } // Retrieve dataobject from datacontroller $data = $data_controller->readData($source_definition, $rest_parameters); // If the source type is XML, just return the XML contents, don't transform if (strtolower($source_type) == 'xml' && $extension == 'xml') { return $this->createXMLResponse($data->data); } $data->rest_parameters = $rest_parameters; // REST filtering if ($source_type != 'INSTALLED' && count($data->rest_parameters) > 0) { $data->data = self::applyRestFilter($data->data, $data->rest_parameters); } // Semantic paging with the hydra voc if ($data->is_semantic && !empty($data->paging)) { \EasyRdf_Namespace::set('hydra', 'http://www.w3.org/ns/hydra/core#'); $graph = $data->data; $url = \URL::to($definition['collection_uri'] . '/' . $definition['resource_name']); $request_url = \Request::url(); $graph->addResource($request_url, 'void:subset', $url); foreach ($data->paging as $key => $val) { $paged_url = $request_url . '?offset=' . $val[0] . '&limit=' . $val[1] . Pager::buildQuerystring(); switch ($key) { case 'next': $graph->addResource($request_url, 'hydra:nextPage', $paged_url); break; case 'previous': $graph->addResource($request_url, 'hydra:previousPage', $paged_url); break; case 'last': $graph->addResource($request_url, 'hydra:lastPage', $paged_url); break; case 'first': $graph->addResource($request_url, 'hydra:firstPage', $paged_url); break; } } $graph->addResource($url, 'a', 'dcat:Dataset'); $title = null; if (!empty($definition['title'])) { $title = $definition['title']; } else { $title = $definition['collection_uri'] . '/' . $definition['resource_name']; } $graph->addLiteral($url, 'dc:title', $title); $graph->addLiteral($url, 'dc:description', $source_definition['description']); $graph->addResource($url, 'dcat:distribution', $url . '.json'); $data->data = $graph; } // Add definition to the object $data->definition = $definition; // Add source definition to the object $data->source_definition = $source_definition; // Add the available, supported formats to the object $format_helper = new FormatHelper(); $data->formats = $format_helper->getAvailableFormats($data); // Store in cache Cache::put($cache_string, $data, $source_definition['cache']); // Return the formatted response with content negotiation return ContentNegotiator::getResponse($data, $extension); } else { \App::abort(404, "Source for the definition could not be found."); } } else { // Coulnd't find a definition, but it might be a collection $resources = $this->definition->getByCollection($uri); if (count($resources) > 0) { $data = new Data(); $data->data = new \stdClass(); $data->data->datasets = array(); $data->data->collections = array(); foreach ($resources as $res) { // Check if it's a subcollection or a dataset $collection_uri = rtrim($res['collection_uri'], '/'); if ($collection_uri == $uri) { array_push($data->data->datasets, \URL::to($collection_uri . '/' . $res['resource_name'])); } else { // Push the subcollection if it's not already in the array if (!in_array(\URL::to($collection_uri), $data->data->collections)) { array_push($data->data->collections, \URL::to($collection_uri)); } } } // Fake a definition $data->definition = new \Definition(); $uri_array = explode('/', $uri); $last_chunk = array_pop($uri_array); $data->definition->collection_uri = join('/', $uri_array); $data->definition->resource_name = $last_chunk; // Return the formatted response with content negotiation return ContentNegotiator::getResponse($data, $extension); } else { \App::abort(404, "The dataset or collection you were looking for could not be found (URI: {$uri})."); } } } }
/** * Resolve a graph pattern query (/all route) * * @param string $format The format of the request * * @return \Response */ public function solveQuery($format = null) { $data; if (!empty($format)) { $format = ltrim($format, '.'); } // Ignore the rest of the uri after /all and work with the request parameters as they were given $cache_string = sha1($this->getRawRequestURI(\Request::root())); // Check if the response to the query has been cached already if (Cache::has($cache_string)) { $data = Cache::get($cache_string); } else { // Get the graph pattern query string parameters from the request list($s, $p, $o) = $this->getTemplateParameters(); // Pass them to our sparql query builder SparqlQueryBuilder::setParameters(array($s, $p, $o)); $base_uri = null; // If no parameter has been filled in, the URI we have to match triples with is the root of our application if ($s == '?s' && $p == '?p' && $o == '?o') { $base_uri = \Request::root(); } // Fetch matching triples $result = $this->triples->getTriples($base_uri, \Request::get('limit', 100), \Request::get('offset', 0)); // If the graph contains no triples, then the graph pattern couldn't resolve to anything, 404 it is if ($result->countTriples() == 0) { \App::abort(404, "The resource couldn't be found, nor dereferenced."); } $definition = array('resource_name' => "all", 'collection_uri' => ""); $source_definition = array('description' => 'Semantic data collected out the configured semantic data sources.', 'type' => 'Semantic'); $data = new Data(); $data->definition = $definition; $data->source_definition = $source_definition; $data->data = $result; $data->is_semantic = true; // Add the available, supported formats to the object $format_helper = new FormatHelper(); $data->formats = $format_helper->getAvailableFormats($data); // Store in cache for a default of 5 minutes Cache::put($cache_string, $data, 5); } // Add the hydra namespace, it's not present in the easy rdf namespaces by default \EasyRdf_Namespace::set('hydra', 'http://www.w3.org/ns/hydra/core#'); // Return the formatted response with content negotiation $response = ContentNegotiator::getResponse($data, $format); // Pass a Vary header so that browsers know they have to take the accept header // into consideration when they apply caching client side $response->header('Vary', 'Accept'); // Allow CORS $response->header('Access-Control-Allow-Origin', '*'); return $response; }
/** * Add triples to the graph and return it based on limit, offset and the SPARQL query * * @param string $base_uri * @param EasyRdf_Graph $graph * @param int $limit * @param int $offset * * @return EasyRdf_Graph */ public function addTriples($base_uri, $graph, $limit, $offset) { $total_triples = $graph->countTriples(); // Iterate the LDF end points, note that ldf servers don't necessarily have page size's as a parameter // But rather have a fixed page size foreach ($this->ldf_repo->getAll() as $ldf_conf) { // Build the query string (raw) $query_string = $_SERVER['QUERY_STRING']; $q_string_raw = ''; $query_parts = explode('&', $query_string); // Don't let paging parameters in the re-constructed query string $invalid_q_string = array('page', 'page_size', 'limit', 'offset'); foreach ($query_parts as $part) { if (!empty($part)) { $couple = explode('=', $part); if (!in_array($couple[0], $invalid_q_string)) { $q_string_raw .= $couple[0] . '=' . $couple[1] . '&'; } } } $q_string_raw = rtrim($q_string_raw, '&'); $start_fragment = $ldf_conf['startfragment']; $entire_fragment = $start_fragment . '?' . $q_string_raw; $entire_fragment = rtrim($entire_fragment, '?'); // Make the LDF query (basic GET to the endpoint, should provide us with a hydra:totalItems or void:triples entry) $accept = array("Accept: text/turtle,*/*;q=0.0"); $response = ''; if (Cache::has($entire_fragment)) { $response = Cache::get($entire_fragment); } else { $response = $this->executeUri($entire_fragment, $accept); } if ($response) { // Try decoding it into turtle, if not something is wrong with the response body try { $tmp_graph = new \EasyRdf_Graph(); $parser = new \EasyRdf_Parser_Turtle(); \EasyRdf_Namespace::set('hydra', 'http://www.w3.org/ns/hydra/core#'); $parser->parse($tmp_graph, $response, 'turtle', null); // Fetch the count (hydra:totalItems or void:triples) $count = $tmp_graph->getLiteral($entire_fragment, 'hydra:totalItems'); $page_size = $tmp_graph->getLiteral($entire_fragment, 'hydra:itemsPerPage'); if (is_null($count)) { $count = $tmp_graph->getLiteral($entire_fragment, 'void:triples'); } if (is_null($count) || is_null($page_size)) { // Skip, the count has not been found on this endpoint $count = -1; \Log::warning("An LDF endpoint's count could not be retrieved from the uri: {$entire_fragment}"); } else { $count = $count->getValue(); $page_size = $page_size->getValue(); Cache::put($entire_fragment, $response, 5); } // If the amount of matching triples is higher than the offset // add them and update the offset, if not higher, then only update the offset if ($count > $offset) { // Read the triples from the LDF $query_limit = $limit - $total_triples; // There's no way of giving along the page size (not that we can presume) // So we have to make a numer of requests $amount_of_requests = ceil($query_limit / $page_size); // Calculate the page offset from the offset parameter $page_offset = ceil($offset / $page_size); for ($i = $page_offset; $i < $amount_of_requests + $page_offset; $i++) { $paged_fragment = $entire_fragment; if (!empty($q_string_raw)) { $paged_fragment .= '&page=' . $i; } else { $paged_fragment .= '?page=' . $i; } // Ask for turtle $accept = array('Accept: text/turtle'); $response = ''; if (Cache::has($paged_fragment)) { $response = Cache::get($paged_fragment); } else { $response = $this->executeUri($paged_fragment, $accept); } if ($response) { // Try decoding it into turtle, if not something is wrong with the response body try { $tmp_graph = new \EasyRdf_Graph(); $parser = new \EasyRdf_Parser_Turtle(); $parser->parse($tmp_graph, $response, 'turtle', $start_fragment); // Fetch the count (hydra:totalItems or void:triples) $total_items = $tmp_graph->getLiteral($paged_fragment, 'hydra:totalItems'); if (is_null($total_items)) { $total_items = $tmp_graph->getLiteral($paged_fragment, 'void:triples'); } if (!is_null($total_items)) { Cache::put($paged_fragment, $tmp_graph, 5); // This needs to be a function of a different helper class for LDF endpoints $tmp_graph = $this->rebaseGraph($start_fragment, $tmp_graph); $graph = $this->mergeGraph($graph, $tmp_graph); $total_triples += $page_size; } } catch (\EasyRdf_Parser_Exception $ex) { \Log::error("Failed to parse turtle content from the LDF endpoint: {$start_fragment}"); } } else { \Log::error("Something went wrong while fetching the triples from a LDF source. The error was " . $response . ". The query was : " . $paged_fragment); } } } else { // Update the offset $offset -= $count; } if ($offset < 0) { $offset = 0; } } catch (\EasyRdf_Parser_Exception $ex) { \Log::error("Failed to parse turtle content from the LDF endpoint: {$endpoint}"); } } else { \Log::warning("Couldn't fetch a proper response for the fragment: {$entire_fragment}."); } } return $graph; }