/** * Create the DCAT document of the published (non-draft) resources * * @param $pieces array of uri pieces * @return mixed \Data object with a graph of DCAT information */ private function createDcat() { $ns = $this->dcat->getNamespaces(); foreach ($ns as $prefix => $uri) { \EasyRdf_Namespace::set($prefix, $uri); } // Apply paging when fetching the definitions list($limit, $offset) = Pager::calculateLimitAndOffset(); $definition_count = $this->definitions->countPublished(); $definitions = $this->definitions->getAllPublished($limit, $offset); $oldest = $this->definitions->getOldest(); $describedDefinitions = array(); // Add the source type description to the definition foreach ($definitions as $definition) { $definition = array_merge($definition, $this->definitions->getFullDescription($definition['collection_uri'] . '/' . $definition['resource_name'])); array_push($describedDefinitions, $definition); } $graph = $this->dcat->getDcatDocument($describedDefinitions, $oldest); // Return the dcat feed in our internal data object $data_result = new Data(); $data_result->data = $graph; $data_result->is_semantic = true; $data_result->paging = Pager::calculatePagingHeaders($limit, $offset, $definition_count); // Add the semantic configuration for the ARC graph $data_result->semantic = new \stdClass(); $data_result->semantic->conf = array('ns' => $ns); $data_result->definition = new \stdClass(); $data_result->definition->resource_name = 'dcat'; $data_result->definition->collection_uri = 'info'; return $data_result; }
public function readData($source_definition, $rest_parameters = []) { list($limit, $offset) = Pager::calculateLimitAndOffset(); $collection = $this->getCollection($source_definition); // Parse the parameters from the query string (prefixed by q.) $all_parameters = \Input::get(); $query = []; foreach ($all_parameters as $key => $val) { if (substr($key, 0, 2) == 'q_') { $key = str_replace('q_', '', $key); $query[$key] = $val; } } $total_objects = $collection->count($query); $cursor = $collection->find($query)->skip($offset)->limit($limit); $results = []; foreach ($cursor as $result) { unset($result['_id']); $results[] = $result; } $paging = Pager::calculatePagingHeaders($limit, $offset, $total_objects); $data_result = new Data(); $data_result->data = $results; $data_result->paging = $paging; $data_result->preferred_formats = $this->getPreferredFormats(); return $data_result; }
private function getInfo($uri = null) { if (!empty($uri)) { if (!$this->definition->exists($uri)) { \App::abort(404, "No resource was found identified with " . $uri); } $description = $this->definition->getDescriptionInfo($uri); $result = new Data(); $result->data = $description; return ContentNegotiator::getResponse($result, 'json'); } list($limit, $offset) = Pager::calculateLimitAndOffset(); $definitions_info = $this->definition->getAllDefinitionInfo($limit, $offset); $definition_count = $this->definition->countPublished(); $result = new Data(); $result->paging = Pager::calculatePagingHeaders($limit, $offset, $definition_count); $result->data = $definitions_info; return ContentNegotiator::getResponse($result, 'json'); }
public function readData($source_definition, $rest_parameters = []) { list($limit, $offset) = Pager::calculateLimitAndOffset(); $query_param = \Input::get('query', '*'); // Check for authentication if (!empty($source_definition['username']) && !empty($source_definition['password'])) { $auth = $source_definition['username'] . ':' . $source_definition['password'] . '@'; $parts = parse_url($source_definition['host']); if (!empty($parts['scheme']) && $parts['scheme'] == 'https') { $schemeless_url = str_replace('https://', '', $source_definition['host']); $source_definition['host'] = 'https://' . $auth . $schemeless_url; } else { $schemeless_url = str_replace('http://', '', $source_definition['host']); $source_definition['host'] = 'http://' . $auth . $schemeless_url; } } $hosts = ['hosts' => [$source_definition['host'] . ':' . $source_definition['port']]]; $client = new Client($hosts); $search_params = []; $search_params['index'] = $source_definition['es_index']; $search_params['type'] = $source_definition['es_type']; $search_params['body']['query']['query_string']['query'] = $query_param; $search_params['from'] = $offset; $search_params['size'] = $limit; $results = $client->search($search_params); $data = []; $data_result = new Data(); if (!empty($results['hits']['total'])) { $paging = Pager::calculatePagingHeaders($limit, $offset, $results['hits']['total']); $filtered_hits = []; foreach ($results['hits']['hits'] as $hit) { $filtered_hits[] = $hit['_source']; } $data_result->data = $filtered_hits; } else { $data_result->data = []; $data_result->paging = []; } $data_result->preferred_formats = $this->getPreferredFormats(); return $data_result; }
public function readData($source_definition, $rest_parameters = []) { Pager::setDefaultLimit(500); list($limit, $offset) = Pager::calculateLimitAndOffset(); $client = new Client(['host' => $source_definition['host'], 'port' => $source_definition['port'], 'username' => $source_definition['username'], 'password' => $source_definition['password']]); $index = $client->getIndex($source_definition['es_index']); $type = $index->getType($source_definition['es_type']); $search = new Search($client); $search->addIndex($index); $search->addType($type); $query_param = \Input::get('query'); if (empty($query_param)) { $query = new MatchAll(); $search->setQuery($query); } else { $query = new SimpleQueryString($query_param); $search->setQuery($query); } $search->getQuery()->setFrom($offset); $search->getQuery()->setSize($limit); $resultSet = $search->search(); $data = new Data(); $data_results = []; foreach ($resultSet->getResults() as $result) { $data_result = $result->getData(); unset($data_result['__tdt_etl_timestamp__']); $data_results[] = $data_result; } $data->data = $data_results; if ($resultSet->getTotalHits() > 0) { $paging = Pager::calculatePagingHeaders($limit, $offset, $resultSet->getTotalHits()); $data->paging = $paging; } $data->preferred_formats = $this->getPreferredFormats(); return $data; }
public function readData($source_definition, $rest_parameters = array()) { list($limit, $offset) = Pager::calculateLimitAndOffset(); // Get the format, if it's CSV we allow a full read of the datasource without paging limitation list($uri, $extension) = $this->processURI(\Request::path()); $ignore_paging = false; if (strtolower($extension) == 'csv') { $ignore_paging = true; } // Disregard the paging when rest parameters are given if (!empty($rest_parameters)) { $limit = PHP_INT_MAX; $offset = 0; } // Get the current column configuration from the CSV file $parsed_columns = self::parseColumns($source_definition); // Check the given URI if (!empty($source_definition['uri'])) { $uri = $source_definition['uri']; } else { \App::abort(500, "No location of the CSV file has been passed, this is most likely due to a corrupt CSV definition."); } // Get data from definition $has_header_row = $source_definition['has_header_row']; $start_row = $source_definition['start_row']; $delimiter = $source_definition['delimiter']; // Get CSV columns $columns = $this->tabular_columns->getColumns($source_definition['id'], 'CsvDefinition'); // Check if they match with the freshly parsed columns if (count($parsed_columns) != count($columns)) { // Save the new config $this->tabular_columns->deleteBulk($source_definition['id'], 'CsvDefinition'); $this->tabular_columns->storeBulk($source_definition['id'], 'CsvDefinition', $columns); } else { foreach ($parsed_columns as $parsed_column) { $column = array_shift($columns); foreach ($parsed_column as $key => $val) { if ($val != $column[$key]) { // Save the new config $this->tabular_columns->deleteBulk($source_definition['id'], 'CsvDefinition'); $this->tabular_columns->storeBulk($source_definition['id'], 'CsvDefinition', $columns); break; } } } } // In any case (changed column configuration or not) we can set the columns to the parsed ones $columns = $parsed_columns; // Get the geo properties $geo_properties = $this->geo_properties->getGeoProperties($source_definition['id'], 'CsvDefinition'); $geo = array(); foreach ($geo_properties as $geo_prop) { $geo[$geo_prop['property']] = $geo_prop['path']; } if (!$columns) { // 500 error because this shouldn't happen in normal conditions // Columns are parsed upon adding a CSV resource and are always present \App::abort(500, "Cannot find the columns of the CSV file, this might be due to a corrupted database or because columns weren't added upon creating the CSV definition."); } // Create aliases for the columns $aliases = $this->tabular_columns->getColumnAliases($source_definition['id'], 'CsvDefinition'); $pk = null; foreach ($columns as $column) { if (!empty($column['is_pk'])) { $pk = $column['column_name_alias']; } } // Read the CSV file $resultobject = array(); $row_objects = array(); $rows = array(); $total_rows = 0; if ($has_header_row == 1) { $start_row++; } // Contains the amount of rows that we added to the resulting object $hits = 0; $ssl_options = array("ssl" => array("verify_peer" => false, "verify_peer_name" => false)); if (($handle = fopen($uri, "r", false, stream_context_create($ssl_options))) !== false) { while (($data = fgetcsv($handle, 0, $delimiter)) !== false) { if ($total_rows >= $start_row) { // Create the values array, containing the (aliased) name of the column // to the value of a the row which $data represents $values = $this->createValues($columns, $data); if ($offset <= $hits && $offset + $limit > $hits) { $obj = new \stdClass(); foreach ($values as $key => $value) { $obj->{$key} = $value; } if (empty($pk)) { array_push($row_objects, $obj); } else { if (!empty($row_objects[$obj->{$pk}])) { \Log::info("The primary key {$pk} has been used already for another record!"); } else { $row_objects[$obj->{$pk}] = $obj; } } } $hits++; } $total_rows++; if ($total_rows >= 10000 && !$ignore_paging) { break; } } fclose($handle); } else { \App::abort(500, "Cannot retrieve any data from the CSV file on location {$uri}."); } $paging = Pager::calculatePagingHeaders($limit, $offset, $total_rows); $data_result = new Data(); $data_result->data = $row_objects; $data_result->paging = $paging; $data_result->geo = $geo; $data_result->preferred_formats = $this->getPreferredFormats(); return $data_result; }
/** * Retrieve a Data object identified by $uri * * @param string $uri The identifier that identifies a resource * * @return \Response */ public function get($uri) { // Check permissions Auth::requirePermissions('dataset.view'); // Split for an (optional) extension list($uri, $extension) = $this->processURI($uri); // Check for caching // Based on: URI / Rest parameters / Query parameters / Paging headers $cache_string = $uri; list($limit, $offset) = Pager::calculateLimitAndOffset(); $cache_string .= '/limit=' . $limit . 'offset=' . $offset; $cache_string .= http_build_query(\Input::except('limit', 'offset', 'page', 'page_size')); $cache_string = sha1($cache_string); if (Cache::has($cache_string)) { return ContentNegotiator::getResponse(Cache::get($cache_string), $extension); } else { // Get definition $definition = $this->definition->getByIdentifier($uri); if ($definition) { // Get source definition $source_definition = $this->definition->getDefinitionSource($definition['source_id'], $definition['source_type']); if ($source_definition) { $source_type = $source_definition['type']; // Create the right datacontroller $controller_class = 'Tdt\\Core\\DataControllers\\' . $source_type . 'Controller'; $data_controller = \App::make($controller_class); // Get REST parameters $rest_parameters = str_replace($definition['collection_uri'] . '/' . $definition['resource_name'], '', $uri); $rest_parameters = ltrim($rest_parameters, '/'); $rest_parameters = explode('/', $rest_parameters); if (empty($rest_parameters[0]) && !is_numeric($rest_parameters[0])) { $rest_parameters = array(); } // Retrieve dataobject from datacontroller $data = $data_controller->readData($source_definition, $rest_parameters); $data->rest_parameters = $rest_parameters; // REST filtering if ($source_type != 'INSTALLED' && count($data->rest_parameters) > 0) { $data->data = self::applyRestFilter($data->data, $data->rest_parameters); } // Add definition to the object $data->definition = $definition; // Add source definition to the object $data->source_definition = $source_definition; // Add the available, supported formats to the object $format_helper = new FormatHelper(); $data->formats = $format_helper->getAvailableFormats($data); // Store in cache Cache::put($cache_string, $data, $source_definition['cache']); // Return the formatted response with content negotiation return ContentNegotiator::getResponse($data, $extension); } else { \App::abort(404, "Source for the definition could not be found."); } } else { // Coulnd't find a definition, but it might be a collection $resources = $this->definition->getByCollection($uri); if (count($resources) > 0) { $data = new Data(); $data->data = new \stdClass(); $data->data->datasets = array(); $data->data->collections = array(); foreach ($resources as $res) { // Check if it's a subcollection or a dataset $collection_uri = rtrim($res['collection_uri'], '/'); if ($collection_uri == $uri) { array_push($data->data->datasets, \URL::to($collection_uri . '/' . $res['resource_name'])); } else { // Push the subcollection if it's not already in the array if (!in_array(\URL::to($collection_uri), $data->data->collections)) { array_push($data->data->collections, \URL::to($collection_uri)); } } } // Fake a definition $data->definition = new \Definition(); $uri_array = explode('/', $uri); $last_chunk = array_pop($uri_array); $data->definition->collection_uri = join('/', $uri_array); $data->definition->resource_name = $last_chunk; // Return the formatted response with content negotiation return ContentNegotiator::getResponse($data, $extension); } else { \App::abort(404, "The dataset or collection you were looking for could not be found (URI: {$uri})."); } } } }
public function readData($source_definition, $rest_parameters = array()) { // It may take a while for the SHP to be read set_time_limit(0); // Get the limit and offset list($limit, $offset) = Pager::calculateLimitAndOffset(); // Disregard the paging when rest parameters are given if (!empty($rest_parameters)) { $limit = PHP_INT_MAX; $offset = 0; } $uri = $source_definition['uri']; $columns = array(); $this->epsg = $source_definition['epsg']; // The tmp folder of the system, if none is given // abort the process $tmp_path = sys_get_temp_dir(); if (empty($tmp_path)) { // If this occurs then the server is not configured correctly, thus a 500 error is thrown \App::abort(500, "The temp directory, retrieved by the operating system, could not be retrieved."); } // Get the columns $columns = $this->tabular_columns->getColumnAliases($source_definition['id'], 'ShpDefinition'); // Get the geo properties $geo_properties = $this->geo_property->getGeoProperties($source_definition['id'], 'ShpDefinition'); $geo = array(); foreach ($geo_properties as $geo_prop) { $geo[$geo_prop['property']] = $geo_prop['path']; } if (!$columns) { \App::abort(500, "Cannot find the columns of the SHP definition."); } try { // Create the array in which all the resulting objects will be placed $arrayOfRowObjects = array(); // Prepare the options to read the SHP file $options = array('noparts' => false); $is_url = substr($uri, 0, 4) == "http"; // If the shape files are located on an HTTP address, fetch them and store them locally if ($is_url) { $tmp_file_name = uniqid(); $tmp_file = $tmp_path . "/" . $tmp_file_name; file_put_contents($tmp_file . ".shp", file_get_contents(substr($uri, 0, strlen($uri) - 4) . ".shp")); file_put_contents($tmp_file . ".dbf", file_get_contents(substr($uri, 0, strlen($uri) - 4) . ".dbf")); file_put_contents($tmp_file . ".shx", file_get_contents(substr($uri, 0, strlen($uri) - 4) . ".shx")); // Along this file the class will use file.shx and file.dbf $shp = new ShapeReader($tmp_file . ".shp", $options); } else { $shp = new ShapeReader($uri, $options); // along this file the class will use file.shx and file.dbf } // Keep track of the total amount of rows $total_rows = 0; // Get the shape records in the binary file while ($record = $shp->getNext()) { if ($offset <= $total_rows && $offset + $limit > $total_rows) { // Every shape record is parsed as an anonymous object with the properties attached to it $rowobject = new \stdClass(); // Get the dBASE data $dbf_data = $record->getDbfData(); foreach ($dbf_data as $property => $value) { $property_alias = $columns[$property]; $property = trim($property); $property_alias = $columns[$property]; $rowobject->{$property_alias} = trim($value); } // Read the shape data $shp_data = $record->getShpData(); $shape_type = self::$RECORD_TYPES[$record->getTypeCode()]; // Get the projection code $projection = $this->projections->getByCode($this->epsg); $projCode = $projection['projection']; if (empty($projCode)) { \App::abort(400, "Could not find a supported EPSG code."); } $this->proj4 = new Proj4php(); $this->projSrc = new Proj('EPSG:' . $this->epsg, $this->proj4); $this->projDest = new Proj('EPSG:4326', $this->proj4); $geometry = []; switch (strtolower($shape_type)) { case 'point': $point = $this->parsePoint($shp_data); $rowobject->x = $point['x']; $rowobject->y = $point['y']; break; case 'polyline': $rowobject->parts = $this->parsePolyline($shp_data); break; case 'polygon': $rowobject->parts = $this->parsePolygon($shp_data); break; case 'multipoint': $rowobject->points = $this->parseMultipoint($shp_data); break; case 'pointz': $point = $this->parsePointZ($shp_data); $rowobject->x = $point['x']; $rowobject->y = $point['y']; $rowobject->z = $point['z']; break; case 'polylinez': $rowobject->parts = $this->parsePolylineZ($shp_data); break; case 'polygonz': $rowobject->parts = $this->parsePolygonZ($shp_data); break; case 'multipointz': $rowobject->points = $this->parseMultiPointZ($shp_data); break; } array_push($arrayOfRowObjects, $rowobject); } $total_rows++; if ($total_rows >= 10000) { break; } } // Calculate the paging headers properties $paging = Pager::calculatePagingHeaders($limit, $offset, $total_rows); $data_result = new Data(); $data_result->data = $arrayOfRowObjects; $data_result->geo = $geo; $data_result->paging = $paging; $data_result->preferred_formats = array('map', 'geojson'); return $data_result; } catch (Exception $ex) { \App::abort(500, "Something went wrong while putting the SHP files in a temporary directory or during the extraction of the SHP data. The error message is: {$ex->getMessage}()."); } }
public function readData($source_definition, $rest_parameters = array()) { $endpoint = $source_definition['endpoint']; $endpoint_user = $source_definition['endpoint_user']; $endpoint_password = $source_definition['endpoint_password']; $query = $source_definition['query']; $limitInQuery = false; //Check if the query is already paged. if (stripos($source_definition['query'], 'limit')) { $limitInQuery = true; } else { list($limit, $offset) = Pager::calculateLimitAndOffset(); // Sparql endpoints often have a built in limit on the amount of rows that they return // Avoid problems by capping the given limit by the Pager class if ($limit > self::$MAX_LIMIT) { $limit = self::$MAX_LIMIT; } } // Retrieve the necessary variables to read from a SPARQL endpoint $uri = \Request::url(); // Process the if and ifnot-statements in the query $query = $this->processLogicalStatements($query); // Process the parameters in the uri (to catch hashtag values for example) $query = $this->processParameters($query); // Create a count query for paging purposes, this assumes that a where clause is included in the query // Note that the where "clause" is obligatory but it's not mandatory it is preceded by a WHERE keyword $matches = array(); $keyword = ""; // If a select statement has been passed, we ask for JSON results // If a construct statement has been passed, we ask for RDF/XML // This piece of code can be removed in later versions as the query_type will be determined // upon saving/editing a query if (stripos($query, "select") !== false) { // SELECT query $keyword = "select"; } elseif (stripos($query, "construct") !== false) { // CONSTRUCT query $keyword = "construct"; } else { // No valid SPARQL keyword has been found, is checked during validation \App::abort(500, "No CONSTRUCT or SELECT statement has been found in the given query: {$query}"); } // Prepare the count query for paging purposes // This implies the removal of the select or construct statement // and only using the where statement // Make a distinction between select and construct since // construct will be followed by a {} sequence, whereas a select statement will not $prefix = ''; $filter = ''; // Covers FROM <...> FROM <...> WHERE{ } , FROM <...> FROM <...> { }, WHERE { }, { } $where_clause = '(.+((FROM.*<.+>)+.*{.+})|((GRAPH.*<.+>)+.*{.+})|.*?(WHERE.*{.+})|.*?({.+}))[a-zA-Z0-9]*?'; $matches = array(); if ($keyword == 'select') { $regex = $keyword . $where_clause; preg_match_all("/(.*?){$regex}/msi", $query, $matches); } else { preg_match_all("/(.*?){$keyword}(\\s*\\{[^{]+\\}){$where_clause}/mis", $query, $matches); } $prefix = $matches[1][0]; $filter = ""; // Preg match all has 3 entries for the where clause, pick the first hit if (!empty($matches[3][0])) { $filter = $matches[3][0]; } if (!empty($matches[4][0]) && empty($filter)) { $filter = $matches[4][0]; } if (!empty($matches[5][0]) && empty($filter)) { $filter = $matches[5][0]; } $last_element = end($matches); if (!empty($last_element[0]) && empty($filter)) { $filter = $last_element[0]; } if (empty($filter)) { \App::abort(500, "Failed to retrieve the where clause from the query: {$query}"); } if (!$limitInQuery) { // Prepare the query to count results $count_query = $matches[1][0] . ' SELECT (count(*) AS ?count) ' . $filter; $count_query = urlencode($count_query); $count_query = str_replace("+", "%20", $count_query); $count_uri = $endpoint . '?query=' . $count_query . '&format=' . urlencode("application/sparql-results+json"); $response = $this->executeUri($count_uri, $endpoint_user, $endpoint_password); $response = json_decode($response); // If something goes wrong, the resonse will either be null or false if (!$response) { \App::abort(500, "Something went wrong while executing the count query. The assembled URI was: {$count_uri}"); } $count = $response->results->bindings[0]->count->value; // Calculate page link headers, previous, next and last based on the count from the previous query $paging = Pager::calculatePagingHeaders($limit, $offset, $count); } $query = $source_definition['query']; $query = $this->processLogicalStatements($query); $query = $this->processParameters($query); if (!$limitInQuery) { if (!empty($offset)) { $query = $query . " OFFSET {$offset} "; } if (!empty($limit)) { $query = $query . " LIMIT {$limit}"; } } // Prepare the query with proper encoding for the request $query = str_replace('%23', '#', $query); $q = urlencode($query); $q = str_replace("+", "%20", $q); if ($keyword == 'select') { $query_uri = $endpoint . '?query=' . $q . '&format=' . urlencode("application/sparql-results+json"); $response = $this->executeUri($query_uri, $endpoint_user, $endpoint_password); $result = json_decode($response, true); if (!$result) { \App::abort(500, 'The query has been executed, but the endpoint failed to return sparql results in JSON.'); } $is_semantic = false; } else { $query_uri = $endpoint . '?query=' . $q . '&format=' . urlencode("application/rdf+xml"); $response = $this->executeUri($query_uri, $endpoint_user, $endpoint_password); // Parse the triple response and retrieve the triples from them $result = new Graph(); $parser = new RdfXml(); $parser->parse($result, $response, 'rdfxml', null); $is_semantic = true; } // Create the data object to return $data = new Data(); if (!$limitInQuery) { $data->paging = $paging; } $data->data = $result; $data->is_semantic = $is_semantic; $data->preferred_formats = $this->getPreferredFormats(); if ($is_semantic) { // Fetch the available namespaces and pass // them as a configuration of the semantic data result $ontologies = $this->ontologies->getAll(); $prefixes = array(); foreach ($ontologies as $ontology) { $prefixes[$ontology["prefix"]] = $ontology["uri"]; } $data->semantic = new \stdClass(); $data->semantic->conf = array('ns' => $prefixes); $data->preferred_formats = array('ttl', 'jsonld', 'rdf'); } // Determine which parameters were given in the query $matches = array(); $query_parameters = array(); preg_match_all("/\\\$\\{(.+?)\\}/", $source_definition['query'], $matches); if (!empty($matches[1])) { $matches = $matches[1]; foreach ($matches as $entry) { array_push($query_parameters, $entry); } } $data->optional_parameters = $query_parameters; return $data; }
/** * Retrieve a Data object identified by $uri * * @param string $uri The identifier that identifies a resource * * @return \Response */ public function get($uri) { // Check permissions Auth::requirePermissions('dataset.view'); // Split for an (optional) extension list($uri, $extension) = $this->processURI($uri); // Check for caching // Based on: URI / Rest parameters / Query parameters / Paging headers $cache_string = $uri; list($limit, $offset) = Pager::calculateLimitAndOffset(); $cache_string .= '/limit=' . $limit . 'offset=' . $offset; $omit = ['limit', 'offset', 'page', 'page_size']; $query_string_params = \Input::get(); foreach ($query_string_params as $key => $val) { if (in_array($key, $omit)) { unset($query_string_params[$key]); } } $cache_string .= http_build_query($query_string_params); $cache_string = sha1($cache_string); if (Cache::has($cache_string)) { return ContentNegotiator::getResponse(Cache::get($cache_string), $extension); } else { // Get definition $definition = $this->definition->getByIdentifier($uri); if ($definition) { // Get source definition $source_definition = $this->definition->getDefinitionSource($definition['source_id'], $definition['source_type']); if ($source_definition) { $source_type = $source_definition['type']; // Create the right datacontroller $controller_class = 'Tdt\\Core\\DataControllers\\' . $source_type . 'Controller'; $data_controller = \App::make($controller_class); // Get REST parameters $uri_segments = explode('/', $uri); $rest_parameters = array_diff($uri_segments, array($definition['collection_uri'], $definition['resource_name'])); $rest_parameters = array_values($rest_parameters); $throttle_response = $this->applyThrottle($definition); if (!empty($throttle_response)) { return $throttle_response; } // Retrieve dataobject from datacontroller $data = $data_controller->readData($source_definition, $rest_parameters); // If the source type is XML, just return the XML contents, don't transform if (strtolower($source_type) == 'xml' && $extension == 'xml') { return $this->createXMLResponse($data->data); } $data->rest_parameters = $rest_parameters; // REST filtering if ($source_type != 'INSTALLED' && count($data->rest_parameters) > 0) { $data->data = self::applyRestFilter($data->data, $data->rest_parameters); } // Semantic paging with the hydra voc if ($data->is_semantic && !empty($data->paging)) { \EasyRdf_Namespace::set('hydra', 'http://www.w3.org/ns/hydra/core#'); $graph = $data->data; $url = \URL::to($definition['collection_uri'] . '/' . $definition['resource_name']); $request_url = \Request::url(); $graph->addResource($request_url, 'void:subset', $url); foreach ($data->paging as $key => $val) { $paged_url = $request_url . '?offset=' . $val[0] . '&limit=' . $val[1] . Pager::buildQuerystring(); switch ($key) { case 'next': $graph->addResource($request_url, 'hydra:nextPage', $paged_url); break; case 'previous': $graph->addResource($request_url, 'hydra:previousPage', $paged_url); break; case 'last': $graph->addResource($request_url, 'hydra:lastPage', $paged_url); break; case 'first': $graph->addResource($request_url, 'hydra:firstPage', $paged_url); break; } } $graph->addResource($url, 'a', 'dcat:Dataset'); $title = null; if (!empty($definition['title'])) { $title = $definition['title']; } else { $title = $definition['collection_uri'] . '/' . $definition['resource_name']; } $graph->addLiteral($url, 'dc:title', $title); $graph->addLiteral($url, 'dc:description', $source_definition['description']); $graph->addResource($url, 'dcat:distribution', $url . '.json'); $data->data = $graph; } // Add definition to the object $data->definition = $definition; // Add source definition to the object $data->source_definition = $source_definition; // Add the available, supported formats to the object $format_helper = new FormatHelper(); $data->formats = $format_helper->getAvailableFormats($data); // Store in cache Cache::put($cache_string, $data, $source_definition['cache']); // Return the formatted response with content negotiation return ContentNegotiator::getResponse($data, $extension); } else { \App::abort(404, "Source for the definition could not be found."); } } else { // Coulnd't find a definition, but it might be a collection $resources = $this->definition->getByCollection($uri); if (count($resources) > 0) { $data = new Data(); $data->data = new \stdClass(); $data->data->datasets = array(); $data->data->collections = array(); foreach ($resources as $res) { // Check if it's a subcollection or a dataset $collection_uri = rtrim($res['collection_uri'], '/'); if ($collection_uri == $uri) { array_push($data->data->datasets, \URL::to($collection_uri . '/' . $res['resource_name'])); } else { // Push the subcollection if it's not already in the array if (!in_array(\URL::to($collection_uri), $data->data->collections)) { array_push($data->data->collections, \URL::to($collection_uri)); } } } // Fake a definition $data->definition = new \Definition(); $uri_array = explode('/', $uri); $last_chunk = array_pop($uri_array); $data->definition->collection_uri = join('/', $uri_array); $data->definition->resource_name = $last_chunk; // Return the formatted response with content negotiation return ContentNegotiator::getResponse($data, $extension); } else { \App::abort(404, "The dataset or collection you were looking for could not be found (URI: {$uri})."); } } } }
/** * Dereferences a URI only when the core application has no resource attributed to it * * @string $identifier The path of the URL * * @return \Response */ public function resolve($identifier) { // Split the identifier from it's format (URI, json) // Caveat: there is a small chance that e.g. URI.json is the full URI that // needs to be dereferenced. (https://github.com/tdt/triples/issues/48) list($identifier, $extension) = $this->processURI($identifier); $data; // If the identifier represents a dataset in core, ask core to deliver a response if ($this->isCoreDataset($identifier)) { $controller = \App::make('Tdt\\Core\\Datasets\\DatasetController'); $data = $controller->fetchData($identifier); $definition = $this->definition->getByIdentifier($identifier); $data->definition = $definition; $data->source_definition = $this->definition->getDefinitionSource($definition['source_id'], $definition['source_type']); $format_helper = new FormatHelper(); $data->formats = $format_helper->getAvailableFormats($data); // The identifier can be a core non-dataset resource (e.g. discovery) } else { if ($this->isCoreResource($identifier)) { $controller = \App::make('Tdt\\Core\\BaseController'); return $controller->handleRequest($identifier); // Could be a collection } else { if ($this->isCoreCollection($identifier)) { // Coulnd't find a definition, but it might be a collection $resources = $this->definition->getByCollection($identifier); $data = new Data(); $data->data = new \stdClass(); $data->data->datasets = array(); $data->data->collections = array(); if (count($resources) > 0) { foreach ($resources as $res) { // Check if it's a subcollection or a dataset $collection_uri = rtrim($res['collection_uri'], '/'); if ($collection_uri == $identifier) { array_push($data->data->datasets, \URL::to($collection_uri . '/' . $res['resource_name'])); } else { // Push the subcollection if it's not already in the array if (!in_array(\URL::to($collection_uri), $data->data->collections)) { array_push($data->data->collections, \URL::to($collection_uri)); } } } } // Fake a definition $data->definition = new \Definition(); $uri_array = explode('/', $identifier); $last_chunk = array_pop($uri_array); $data->definition->collection_uri = join('/', $uri_array); $data->definition->resource_name = $last_chunk; // Nothing works out, try to dereference the URI } else { // Rebuild the URI as is, the Symfony Request components url-decode everything // Dereferencing however needs to deal with the exact request URI's $cache_string = sha1($this->getRawRequestURI(\Request::url())); // Check if the cache already contains the dereferencing info if (Cache::has($cache_string)) { $data = Cache::get($cache_string); } else { $base_uri = \URL::to($identifier); if (empty($base_uri)) { $base_uri = \Request::root(); } // Calculate the limit and offset parameters list($limit, $offset) = Pager::calculateLimitAndOffset(); // Fetch the triples that can be used to dereference the URI $result = $this->triples->getTriples($base_uri, 100, $offset, true); // If the graph contains no triples, then the URI couldn't resolve to anything, 404 it is if ($result->countTriples() == 0) { \App::abort(404, "The resource could not be dereferenced. No matching triples were found."); } // Mock a tdt/core definition object that is used in the formatters $identifier_pieces = explode('/', $identifier); $resource_name = array_pop($identifier_pieces); $collection_uri = implode('/', $identifier_pieces); $definition = array('resource_name' => $resource_name, 'collection_uri' => $collection_uri, 'updated_at' => time(), 'created_at' => time(), 'source_type' => '__triplesCache'); $source_definition = array('description' => 'Semantic data collected retrieved from the configured semantic sources.', 'type' => 'Semantic'); $data = new Data(); $data->definition = $definition; $data->source_definition = $source_definition; $data->data = $result; $data->is_semantic = true; // Add the available, supported formats to the object $format_helper = new FormatHelper(); $data->formats = $format_helper->getAvailableFormats($data); // Store in cache for a default of 5 minutes Cache::put($cache_string, $data, 5); } } } } // Add the hydra namespace, it's not present in the easy rdf namespaces by default \EasyRdf_Namespace::set('hydra', 'http://www.w3.org/ns/hydra/core#'); // Return the formatted response with content negotiation $response = ContentNegotiator::getResponse($data, $extension); $response->header('Vary', 'Accept'); // Allow CORS $response->headers->set('Access-Control-Allow-Origin', '*'); return $response; }
public function readData($source_definition, $rest_parameters = array()) { // It may take a while for the SHP to be read set_time_limit(0); // Get the limit and offset list($limit, $offset) = Pager::calculateLimitAndOffset(); // Disregard the paging when rest parameters are given if (!empty($rest_parameters)) { $limit = PHP_INT_MAX; $offset = 0; } $uri = $source_definition['uri']; $columns = array(); $epsg = $source_definition['epsg']; // The tmp folder of the system, if none is given // abort the process $tmp_path = sys_get_temp_dir(); if (empty($tmp_path)) { // If this occurs then the server is not configured correctly, thus a 500 error is thrown \App::abort(500, "The temp directory, retrieved by the operating system, could not be retrieved."); } // Get the columns $columns = $this->tabular_columns->getColumnAliases($source_definition['id'], 'ShpDefinition'); // Get the geo properties $geo_properties = $this->geo_property->getGeoProperties($source_definition['id'], 'ShpDefinition'); $geo = array(); foreach ($geo_properties as $geo_prop) { $geo[$geo_prop['property']] = $geo_prop['path']; } if (!$columns) { \App::abort(500, "Cannot find the columns of the SHP definition."); } try { // Create the array in which all the resulting objects will be placed $arrayOfRowObjects = array(); // Prepare the options to read the SHP file $options = array('noparts' => false); $is_url = substr($uri, 0, 4) == "http"; // If the shape files are located on an HTTP address, fetch them and store them locally if ($is_url) { $tmp_file_name = uniqid(); $tmp_file = $tmp_path . "/" . $tmp_file_name; file_put_contents($tmp_file . ".shp", file_get_contents(substr($uri, 0, strlen($uri) - 4) . ".shp")); file_put_contents($tmp_file . ".dbf", file_get_contents(substr($uri, 0, strlen($uri) - 4) . ".dbf")); file_put_contents($tmp_file . ".shx", file_get_contents(substr($uri, 0, strlen($uri) - 4) . ".shx")); // Along this file the class will use file.shx and file.dbf $shp = new ShapeReader($tmp_file . ".shp", $options); } else { $shp = new ShapeReader($uri, $options); // along this file the class will use file.shx and file.dbf } // Keep track of the total amount of rows $total_rows = 0; // Get the shape records in the binary file while ($record = $shp->getNext()) { if ($offset <= $total_rows && $offset + $limit > $total_rows) { // Every shape record is parsed as an anonymous object with the properties attached to it $rowobject = new \stdClass(); // Get the dBASE data $dbf_data = $record->getDbfData(); foreach ($dbf_data as $property => $value) { $property_alias = $columns[$property]; $property = trim($property); $property_alias = $columns[$property]; $rowobject->{$property_alias} = trim($value); } // Read the shape data $shp_data = $record->getShpData(); if (!empty($epsg)) { $proj4 = new \Proj4php(); $projSrc = new \Proj4phpProj('EPSG:' . $epsg, $proj4); $projDest = new \Proj4phpProj('EPSG:4326', $proj4); } // It it's not a point, it's a collection of coordinates describing a shape if (!empty($shp_data['parts'])) { $parts = array(); foreach ($shp_data['parts'] as $part) { $points = array(); foreach ($part['points'] as $point) { $x = $point['x']; $y = $point['y']; // Translate the coordinates to WSG84 geo coordinates if (!empty($epsg)) { $pointSrc = new \proj4phpPoint($x, $y); $pointDest = $proj4->transform($projSrc, $projDest, $pointSrc); $x = $pointDest->x; $y = $pointDest->y; } $points[] = $x . ',' . $y; } array_push($parts, implode(" ", $points)); } // Parts only contains 1 shape, thus 1 geo entry $alias = reset($geo); $rowobject->{$alias} = implode(';', $parts); } if (isset($shp_data['x'])) { $x = $shp_data['x']; $y = $shp_data['y']; if (!empty($epsg)) { $pointSrc = new \proj4phpPoint($x, $y); $pointDest = $proj4->transform($projSrc, $projDest, $pointSrc); $x = $pointDest->x; $y = $pointDest->y; } $rowobject->{$geo}['longitude'] = $x; $rowobject->{$geo}['latitude'] = $y; } array_push($arrayOfRowObjects, $rowobject); } $total_rows++; } // Calculate the paging headers properties $paging = Pager::calculatePagingHeaders($limit, $offset, $total_rows); $data_result = new Data(); $data_result->data = $arrayOfRowObjects; $data_result->geo = $geo; $data_result->paging = $paging; $data_result->preferred_formats = array('map'); return $data_result; } catch (Exception $ex) { \App::abort(500, "Something went wrong while putting the SHP files in a temporary directory or during the extraction of the SHP data. The error message is: {$ex->getMessage}()."); } }
public function readData($source_definition, $rest_parameters = array()) { list($limit, $offset) = Pager::calculateLimitAndOffset(); // Disregard the paging when rest parameters are given if (!empty($rest_parameters)) { $limit = PHP_INT_MAX; $offset = 0; } // Get the current column configuration for the XLS sheet data $parsed_columns = self::parseColumns($source_definition); $uri = $source_definition['uri']; $sheet = $source_definition['sheet']; $has_header_row = $source_definition['has_header_row']; // Rows start at 1 in XLS, we have however documented that they start at 0 to be consistent with common sense and other // tabular sources such as CSV. $start_row = $source_definition['start_row'] + 1; // Retrieve the columns from XLS $columns = $this->tabular_columns->getColumns($source_definition['id'], 'XlsDefinition'); // Check if they match with the freshly parsed columns if (count($parsed_columns) != count($columns)) { // Save the new config $this->tabular_columns->deleteBulk($source_definition['id'], 'XlsDefinition'); $this->tabular_columns->storeBulk($source_definition['id'], 'XlsDefinition', $columns); } else { foreach ($parsed_columns as $parsed_column) { $column = array_shift($columns); foreach ($parsed_column as $key => $val) { if ($val != $column[$key]) { // Save the new config $this->tabular_columns->deleteBulk($source_definition['id'], 'XlsDefinition'); $this->tabular_columns->storeBulk($source_definition['id'], 'XlsDefinition', $columns); break; } } } } // In any case (changed column configuration or not) we can set the columns to the parsed ones $columns = $parsed_columns; if (empty($columns)) { \App::abort(500, "Cannot find the columns from the XLS definition."); } // Create aliases for the columns $aliases = $this->tabular_columns->getColumnAliases($source_definition['id'], 'XlsDefinition'); $pk = null; foreach ($columns as $column) { if (!empty($column['is_pk'])) { $pk = $column['column_name_alias']; } } // Create an array to store our objects in to return $row_objects = array(); // Get the temporary directory to store our excel files in if necessary $tmp_path = sys_get_temp_dir(); if (empty($tmp_path)) { \App::abort(500, "The temp directory, retrieved by the operating system, could not be retrieved."); } try { if (substr($uri, 0, 4) == "http") { $tmpFile = uniqid(); file_put_contents($tmp_path . "/" . $tmpFile, file_get_contents($uri)); $php_obj = self::loadExcel($tmp_path . "/" . $tmpFile, $this->getFileExtension($uri), $sheet); } else { $php_obj = self::loadExcel($uri, $this->getFileExtension($uri), $sheet); } if (empty($php_obj)) { \App::abort(500, "The Excel file could not be retrieved from the location {$uri}."); } $worksheet = $php_obj->getSheetByName($sheet); if (empty($worksheet)) { \App::abort(500, "The worksheet {$sheet} could not be found in the Excel file located on {$uri}."); } // The amount of rows added to the result $total_rows = 0; if ($has_header_row == 1) { $start_row++; } // Iterate all the rows of the Excell sheet foreach ($worksheet->getRowIterator() as $row) { $row_index = $row->getRowIndex(); // If our offset is ok, start parsing the data from the excell sheet if ($row_index > $start_row) { $cell_iterator = $row->getCellIterator(); $cell_iterator->setIterateOnlyExistingCells(false); // Only read rows that are allowed in the current requested page if ($offset <= $total_rows && $offset + $limit > $total_rows) { $rowobject = new \stdClass(); // Iterate each cell in the row, create an array of the values with the name of the column // Indices start from 1 in the Excel API $data = array(); foreach ($cell_iterator as $cell) { $data[$cell->columnIndexFromString($cell->getColumn()) - 1] = $cell->getCalculatedValue(); } $values = $this->createValues($columns, $data); foreach ($values as $key => $value) { $rowobject->{$key} = $value; } if (empty($pk)) { array_push($row_objects, $rowobject); } else { if (empty($row_objects[$rowobject->{$pk}])) { $row_objects[$rowobject->{$pk}] = $rowobject; } elseif (!empty($row_objects[$rowobject->{$pk}])) { $double = $rowobject->{$pk}; \Log::info("The primary key {$double} has been used already for another record!"); } else { $double = $rowobject->{$pk}; \Log::info("The primary key {$double} is empty."); } } } $total_rows++; if ($total_rows >= 10000) { break; } } } $php_obj->disconnectWorksheets(); $paging = Pager::calculatePagingHeaders($limit, $offset, $total_rows); $data_result = new Data(); $data_result->data = $row_objects; $data_result->paging = $paging; $data_result->preferred_formats = $this->getPreferredFormats(); return $data_result; } catch (Exception $ex) { App::abort(500, "Failed to retrieve data from the XLS file on location {$uri}."); } }
public function get($uri) { // Set permission Auth::requirePermissions('definition.view'); if (!empty($uri)) { if (!$this->definition->exists($uri)) { \App::abort(404, "No resource was found identified with " . $uri); } $description = $this->definition->getFullDescription($uri); $result = new Data(); $result->data = $description; return ContentNegotiator::getResponse($result, 'json'); } list($limit, $offset) = Pager::calculateLimitAndOffset(); $definitions = $this->definition->getAllFullDescriptions($limit, $offset); $definition_count = $this->definition->count(); $result = new Data(); $result->paging = Pager::calculatePagingHeaders($limit, $offset, $definition_count); $result->data = $definitions; return ContentNegotiator::getResponse($result, 'json'); }
public function readData($source_definition, $rest_parameters = array()) { //Check if the query is already paged. $limitInQuery = false; if (stripos($source_definition['query'], 'limit')) { $limitInQuery = true; } else { list($limit, $offset) = Pager::calculateLimitAndOffset(); } // Disregard the paging when rest parameters are given if (!empty($rest_parameters)) { $limit = 500; $offset = 0; } // Get the columns from the repository $columns = $this->columnsRepo->getColumns($source_definition['id'], 'MysqlDefinition'); // Get the geo properties $geo_properties = $this->geoRepo->getGeoProperties($source_definition['id'], 'MysqlDefinition'); $geo = array(); foreach ($geo_properties as $geo_prop) { $geo[$geo_prop['property']] = $geo_prop['path']; } if (!$columns) { // 500 error because this shouldn't happen in normal conditions // Columns are parsed upon adding a CSV resource and are always present \App::abort(500, "Cannot find the columns of the MySQL table file, this might be due to a corrupted database or a broken configuration."); } // Create aliases for the columns $aliases = $this->columnsRepo->getColumnAliases($source_definition['id'], 'MysqlDefinition'); $pk = null; foreach ($columns as $column) { if (!empty($column['is_pk'])) { $pk = $column['column_name_alias']; } } // Connect to the database $db_config = array('driver' => 'mysql', 'host' => $source_definition['host'], 'database' => $source_definition['database'], 'username' => $source_definition['username'], 'password' => $source_definition['password'], 'charset' => 'utf8', 'collation' => $source_definition['collation']); // Configure a connection \Config::set('database.connections.mysqltmp', $db_config); // Make a database connection $db = \DB::connection('mysqltmp'); try { $query = $source_definition['query']; // Get the total amount of records for the query for pagination preg_match("/select.*?(from.*)/msi", $query, $matches); if (empty($matches[1])) { \App::abort(400, 'Failed to make a count statement, make sure the SQL query is valid.'); } $count_query = 'select count(*) as count ' . $matches[1]; $count_result = $db->select($count_query); $total_rows = $count_result[0]->count; if (!$limitInQuery) { if (!empty($limit)) { $query .= ' limit ' . $limit; } if (!empty($offset)) { $query .= ' offset ' . $offset; } } $result = $db->select($query); } catch (QueryException $ex) { \App::abort(400, "A bad query has been made, make sure all passed statements are SQL friendly. The error message was: " . $ex->getMessage()); } // Get the paging headers $paging = Pager::calculatePagingHeaders($limit, $offset, $total_rows); $data_result = new Data(); $data_result->data = $result; $data_result->paging = $paging; $data_result->geo = $geo; $data_result->preferred_formats = $this->getPreferredFormats(); return $data_result; }
private function calculatePagingInfo($limit, $offset, $count) { $paging_info = Pager::calculatePagingHeaders($limit, $offset, $count); $paging = ['current' => ceil($offset / $limit) + 1, 'first' => 1, 'last' => ceil($count / $limit), 'limit' => $limit, 'offset' => $offset, 'total' => $count]; if (!empty($paging_info['next'])) { $paging['next'] = $paging['current'] + 1; } if (!empty($paging_info['previous'])) { $paging['previous'] = $paging['current'] - 1; } return $paging; }