protected function loadColumnsProperties(SystemTableMetaModelLoaderCallContext $callcontext, DataSourceMetaData $datasource) { $tableColumnsProperties = NULL; $webhcatProxy = new WebHCat_CURLProxy($datasource); // loading list of available tables $tableNames = $this->loadTableNames($datasource); if (isset($tableNames)) { // preparing requests to load table structure $tableHandlers = NULL; foreach ($tableNames as $tableName) { $tableHandlers[] = $webhcatProxy->initializeHandler('GET', "/templeton/v1/ddl/database/{$datasource->database}/table/$tableName"); } // processing available tables $executor = new MultipleCURLHandlerExecutor($tableHandlers); $executor->start(); while (($handler = $executor->findCompletedHandler()) !== FALSE) { $responseColumns = $executor->processResponse($handler); if (!isset($responseColumns['columns'])) { continue; } foreach ($responseColumns['columns'] as $columnIndex => $responseColumn) { $tableColumnsProperties[] = array( self::CN_TABLE_NAME => $responseColumns['table'], self::CN_COLUMN_NAME => $responseColumn['name'], self::CN_COLUMN_INDEX => $columnIndex, self::CN_COLUMN_TYPE => $responseColumn['type']); } } } return $tableColumnsProperties; }
public function execute( DataSourceHandler $handler, DataControllerCallContext $callcontext, $connection, $sql, __SQLDataSourceHandler__AbstractQueryCallbackProxy $callbackInstance) { $datasource = $connection->datasource; $outputFolder = uniqid('dp', TRUE); $webhcatProxy = new WebHCat_CURLProxy($datasource); // executing the sql statement $statementHandler = $webhcatProxy->initializeHandler( 'POST', '/templeton/v1/hive', array('execute' => $sql, 'statusdir' => $outputFolder, 'define=hive.cli.print.header' => 'true')); $executor = new SingleCURLHandlerExecutor($statementHandler); $responseJob = $executor->execute(); // ... and preparing job identifier if (!isset($responseJob['id'])) { LogHelper::log_debug($responseJob); throw new IllegalStateException(t( 'Job ID is not available: %error', array('%error' => (isset($responseJob['info']['stderr']) ? $responseJob['info']['stderr']: 'error message is not provided')))); } $jobId = $responseJob['id']; // waiting for the execution job to complete $jobHandler = $webhcatProxy->initializeHandler('GET', "/templeton/v1/queue/$jobId"); $executor = new SingleCURLHandlerExecutor($jobHandler); $responseJobStatus = NULL; while (TRUE) { $responseJobStatus = $executor->execute(); if ($responseJobStatus['completed'] == 'done') { break; } usleep(1000000); } if ($responseJobStatus['exitValue'] != 0) { throw new IllegalStateException(t( '%resourceId execution completed unsuccessfully: %errorCode', array('%resourceId' => $jobHandler->resourceId, '%errorCode' => $responseJobStatus['exitValue']))); } $webhdfsProxy = new WebHDFS_CURLProxy($datasource); // reading result of the execution $data = NULL; while (TRUE) { $resultHandler = $webhdfsProxy->initializeHandler( 'GET', "/webhdfs/v1/user/{$datasource->username}/$outputFolder/stdout", array('op' => 'OPEN')); $executor = new SingleCURLHandlerExecutor($resultHandler); $data = $executor->execute(); // the file should contain at least column names if (isset($data)) { break; } // it looks like the result is empty. That happens because the file is not flushed yet usleep(10000); } // deleting the output folder. We do not need it any more $resultHandler = $webhdfsProxy->initializeHandler( 'DELETE', "/webhdfs/v1/user/{$datasource->username}/$outputFolder", array('op' => 'DELETE', 'recursive' => 'true')); $executor = new SingleCURLHandlerExecutor($resultHandler); $executor->execute(); // parsing data $parsedDataProvider = new SampleDataPreparer(FALSE); $parser = new DelimiterDataParser("\t"); $parser->isHeaderPresent = TRUE; $parser->parse( new StreamDataProvider($data), array( new ColumnNamePreparer(), new ColumnPublicNamePreparer(), new ColumnTypeAutoDetector(), $parsedDataProvider)); // calculating column database type foreach ($parser->metadata->getColumns() as $column) { $databaseType = NULL; switch ($column->type->applicationType) { case StringDataTypeHandler::DATA_TYPE: $databaseType = 'string'; break; case IntegerDataTypeHandler::DATA_TYPE: $databaseType = 'int'; break; default: throw new UnsupportedOperationException(t( 'Cannot provide data type mapping for %columnName column: %datatype', array('%columnName' => $column->name, '%datatype' => $column->type->applicationType))); } $column->type->databaseType = $databaseType; } $statement = new HiveStatement($parser->metadata, $parsedDataProvider->records); return $callbackInstance->callback($callcontext, $connection, $statement); }