public function write($sourceFilename, $outputTableName, $table) { $csv = new CsvFile($sourceFilename); $header = []; foreach ($table['items'] as $item) { if ($item['type'] != 'IGNORE') { $header[] = $item['dbName']; } } $csv->getHeader(); $csv->next(); while ($csv->current() != null) { for ($i = 0; $i < 1000 && $csv->current() != null; $i++) { $cols = []; foreach ($csv->current() as $col) { $cols[] = "'" . $col . "'"; } $sql = sprintf("INSERT INTO {$outputTableName} (%s) VALUES (%s)", implode(',', $header), implode(',', $cols)); try { $stmt = oci_parse($this->db, $sql); oci_execute($stmt); } catch (\Exception $e) { throw new UserException("Query failed: " . $e->getMessage(), $e, ['query' => $sql]); } $csv->next(); } } }
public function process(CsvFile $file, $file_name) { $this->setCsvHeader($file->getHeader()); foreach ($file as $row) { if ($row == $this->csvHeader or $this->rowIsEmpty($row)) { continue; } $row = $this->buildAssoc($row); $product = $this->catalogRepository->productExists($row['sku']); if ($product) { $this->catalogRepository->updateStock($row); } else { $this->catalogRepository->createSimpleProduct($row); $this->catalogRepository->updateStock($row); } } echo "End of file: " . $file_name; }
public function export($query, $outputTable, $incremental = false, $primaryKey = null) { $outFilename = tempnam("/tmp/ex-db", $outputTable) . ".csv"; $csv = new CsvFile($outFilename); $this->logger->info("Exporting to " . $outputTable); try { $stmt = oci_parse($this->conn, $query); oci_execute($stmt); } catch (\Exception $e) { throw new DbException("DB query failed: " . $e->getMessage(), $e); } // write header and first line try { $resultRow = oci_fetch_assoc($stmt); } catch (\Exception $e) { throw new DbException("DB query fetch failed: " . $e->getMessage(), $e); } if (is_array($resultRow) && !empty($resultRow)) { $csv->writeRow(array_keys($resultRow)); if (isset($this->dbConfig['null'])) { $resultRow = $this->replaceNull($resultRow, $this->dbConfig['null']); } $resultRow = str_replace('""', '"', $resultRow); $csv->writeRow($resultRow); // write the rest try { while ($resultRow = oci_fetch_assoc($stmt)) { if (isset($this->dbConfig['null'])) { $resultRow = $this->replaceNull($resultRow, $this->dbConfig['null']); } $resultRow = str_replace('""', '"', $resultRow); $csv->writeRow($resultRow); } } catch (\Exception $e) { throw new DbException("DB query fetch failed: " . $e->getMessage(), $e); } if (!count($csv->getHeader())) { throw new ApplicationException("Trying to upload an empty file"); } $this->writeTable($csv, $outputTable, $incremental, $primaryKey); } else { $this->logger->warning("Query returned empty result. Nothing was imported."); } }
/** * @param CsvFile $file * @param LoadOptions $options * @param $primaryIndex * @return bool */ public function loadFile(CsvFile $file, LoadOptions $options, $primaryIndex = null) { $csvHeader = $file->getHeader(); $params = ['body' => []]; $iBulk = 1; foreach ($file as $i => $line) { // skip header if (!$i) { continue; } $lineData = array_combine($csvHeader, $line); if ($primaryIndex) { if (!array_key_exists($primaryIndex, $lineData)) { $this->logger->error(sprintf("CSV error: Missing id column %s on line %s", $primaryIndex, $i + 1)); return false; } $params['body'][] = ['index' => ['_index' => $options->getIndex(), '_type' => $options->getType(), '_id' => $lineData[$primaryIndex]]]; } else { $params['body'][] = ['index' => ['_index' => $options->getIndex(), '_type' => $options->getType()]]; } $params['body'][] = $lineData; if ($i % $options->getBulkSize() == 0) { $this->logger->info(sprintf("Write %s batch %d to %s start", $options->getType(), $iBulk, $options->getIndex())); $responses = $this->client->bulk($params); $this->logger->info(sprintf("Write %s batch %d to %s took %d ms", $options->getType(), $iBulk, $options->getIndex(), $responses['took'])); $params = ['body' => []]; if ($responses['errors'] !== false) { if (!empty($responses['items'])) { foreach ($responses['items'] as $itemResult) { if (!empty($itemResult['index']['error'])) { if (is_array($itemResult['index']['error'])) { $this->logger->error(sprintf("ES error: %s", $this->getErrorMessageFromErrorField($itemResult['index']['error']))); } else { $this->logger->error(sprintf("ES error: %s", $itemResult['index']['error'])); } return false; } } } return false; } $iBulk++; unset($responses); } } if (!empty($params['body'])) { $this->logger->info(sprintf("Write %s batch %d to %s start", $options->getType(), $iBulk, $options->getIndex())); $responses = $this->client->bulk($params); $this->logger->info(sprintf("Write %s batch %d to %s took %d ms", $options->getType(), $iBulk, $options->getIndex(), $responses['took'])); if ($responses['errors'] !== false) { if (!empty($responses['items'])) { foreach ($responses['items'] as $itemResult) { if (!empty($itemResult['index']['error'])) { if (is_array($itemResult['index']['error'])) { $this->logger->error(sprintf("ES error: %s", $this->getErrorMessageFromErrorField($itemResult['index']['error']))); } else { $this->logger->error(sprintf("ES error: %s", $itemResult['index']['error'])); } return false; } } } return false; } unset($responses); } return true; }
public function testRowTooLongShouldThrowException() { $csvFile = new CsvFile(__DIR__ . "/_data/csv-import/very-long-row.csv"); $this->setExpectedException("Keboola\\Db\\Import\\Exception", '', \Keboola\Db\Import\Exception::ROW_SIZE_TOO_LARGE); $this->import->import('very-long-row', $csvFile->getHeader(), [$csvFile]); }
public function queue_import_csv($filename) { only_admin_access(); if (!is_file($filename)) { return array('error' => "You have not provided a existing backup to restore."); } $csv = new \Keboola\Csv\CsvFile($filename); $head = $csv->getHeader(); if (!isset($head[2])) { $csv = new \Keboola\Csv\CsvFile($filename, ';'); $head = $csv->getHeader(); } else { if (isset($head[0]) and stristr($head[0], ';')) { $csv = new \Keboola\Csv\CsvFile($filename, ';'); $head = $csv->getHeader(); } } if (empty($head) or empty($csv)) { return array('error' => "CSV file cannot be parsed properly."); } $rows = array(); $i = 0; foreach ($csv as $row) { if ($i > 0) { $r = array(); if (is_array($row)) { foreach ($row as $k => $v) { if (isset($head[$k])) { $row[$head[$k]] = $v; $new_k = strtolower($head[$k]); $new_k = str_replace(' ', '_', $new_k); $new_k = str_replace('__', '_', $new_k); // $new_k = preg_replace("/[^a-zA-Z0-9_]+/", "", $new_k); $new_k = rtrim($new_k, '_'); $r[$new_k] = $v; } } } $rows[] = $r; } $i++; } $content_items = $rows; $content_items = $this->map_array($rows); return $this->batch_save($content_items); }
public function testInvalidManifestImport() { $s3bucket = getenv(self::AWS_S3_BUCKET_ENV); $initialFile = new \Keboola\Csv\CsvFile(__DIR__ . "/_data/csv-import/tw_accounts.csv"); $importFile = new \Keboola\Csv\CsvFile("s3://{$s3bucket}/02_tw_accounts.csv.invalid.manifest"); $import = $this->getImport('manifest'); $import->setIgnoreLines(1); try { $import->import('accounts-3', $initialFile->getHeader(), [$importFile]); $this->fail('Manifest should not be uploaded'); } catch (\Keboola\Db\Import\Exception $e) { $this->assertEquals(\Keboola\Db\Import\Exception::MANDATORY_FILE_NOT_FOUND, $e->getCode()); } }
public function tables() { $expectedEscaping = []; $file = new \Keboola\Csv\CsvFile(__DIR__ . '/_data/csv-import/escaping/standard-with-enclosures.csv'); foreach ($file as $row) { $expectedEscaping[] = $row; } $escapingHeader = array_shift($expectedEscaping); // remove header $expectedEscaping = array_values($expectedEscaping); $expectedAccounts = []; $file = new \Keboola\Csv\CsvFile(__DIR__ . '/_data/csv-import/tw_accounts.csv'); foreach ($file as $row) { $expectedAccounts[] = $row; } $accountsHeader = array_shift($expectedAccounts); // remove header $expectedAccounts = array_values($expectedAccounts); $file = new \Keboola\Csv\CsvFile(__DIR__ . '/_data/csv-import/tw_accounts.changedColumnsOrder.csv'); $accountChangedColumnsOrderHeader = $file->getHeader(); $s3bucket = getenv(self::AWS_S3_BUCKET_ENV); return [[[new CsvFile("s3://{$s3bucket}/empty.manifest")], $escapingHeader, [], 'out.csv_2Cols', 'manifest'], [[new CsvFile("s3://{$s3bucket}/standard-with-enclosures.csv")], $escapingHeader, $expectedEscaping, 'out.csv_2Cols'], [[new CsvFile("s3://{$s3bucket}/gzipped-standard-with-enclosures.csv.gz")], $escapingHeader, $expectedEscaping, 'out.csv_2Cols'], [[new CsvFile("s3://{$s3bucket}/standard-with-enclosures.tabs.csv", "\t")], $escapingHeader, $expectedEscaping, 'out.csv_2Cols'], [[new CsvFile("s3://{$s3bucket}/raw.rs.csv", "\t", '', '\\')], $escapingHeader, $expectedEscaping, 'out.csv_2Cols'], [[new CsvFile("s3://{$s3bucket}/tw_accounts.changedColumnsOrder.csv")], $accountChangedColumnsOrderHeader, $expectedAccounts, 'accounts'], [[new CsvFile("s3://{$s3bucket}/tw_accounts.csv")], $accountsHeader, $expectedAccounts, 'accounts'], [[new CsvFile("s3://{$s3bucket}/01_tw_accounts.csv.manifest")], $accountsHeader, $expectedAccounts, 'accounts', 'manifest'], [[new CsvFile("s3://{$s3bucket}/03_tw_accounts.csv.gzip.manifest")], $accountsHeader, $expectedAccounts, 'accounts', 'manifest'], [['schemaName' => $this->sourceSchemaName, 'tableName' => 'out.csv_2Cols'], $escapingHeader, [['a', 'b'], ['c', 'd']], 'out.csv_2Cols', 'copy'], [['schemaName' => $this->sourceSchemaName, 'tableName' => 'types'], $escapingHeader, [['c', '1'], ['d', '0']], 'types', 'copy'], [[new CsvFile("s3://{$s3bucket}/reserved-words.csv")], ['column', 'table'], [['table', 'column']], 'table', 'csv'], [[new CsvFile("s3://{$s3bucket}/with-ts.csv")], ['col1', 'col2', '_timestamp'], [['a', 'b', '2014-11-10 13:12:06'], ['c', 'd', '2014-11-10 14:12:06']], 'out.csv_2Cols'], [[new CsvFile("s3://{$s3bucket}/standard-with-enclosures.csv")], $escapingHeader, $expectedEscaping, 'out.no_timestamp_table', 'csv', ['useTimestamp' => false]]]; }
/** * @param string $inputFile Input CSV file with table data. * @param bool $calculateDeltas Set to true to add timeDelta column * @param string $idColumn Name of column with primary key. * @param string $sortColumn Name of column by which data are sorted. * @param string $parentColumn Name of column with parent Id * @param string $outDirectory Directory in which the output file will be stored. * @throws DBALException */ public function process($inputFile, $calculateDeltas, $idColumn, $sortColumn, $parentColumn, $outDirectory) { $dataTypes = [$idColumn => 'VARCHAR(255) NOT NULL DEFAULT \'\'', $parentColumn => 'VARCHAR(255)', $sortColumn => 'VARCHAR(255)']; $csv = new CsvFile($inputFile); $header = $csv->getHeader(); $tableDefinition = $this->getTableDefinition('source', $dataTypes, $idColumn, $header); $this->db->query('DROP TABLE IF EXISTS `source`'); $this->db->query('DROP VIEW IF EXISTS `out.source`'); $this->db->query('DROP TABLE IF EXISTS `out.source`'); $this->db->query('DROP TABLE IF EXISTS `tmp.Fill`'); $this->db->query($tableDefinition); $loadQuery = ' LOAD DATA LOCAL INFILE \'' . str_replace('\\', '/', $inputFile) . '\' INTO TABLE `source` FIELDS TERMINATED BY \',\' OPTIONALLY ENCLOSED BY \'"\' ESCAPED BY \'\' IGNORE 1 LINES;'; $this->db->query($loadQuery); // Alter columns $query = 'ALTER TABLE `source` CHANGE `' . $idColumn . '` `id` VARCHAR(255), CHANGE `' . $parentColumn . '` `parent` VARCHAR(255), CHANGE `' . $sortColumn . '` `sort` VARCHAR(255), ADD COLUMN `__root` VARCHAR(255), ADD COLUMN `__depth` INT(11) NOT NULL DEFAULT 0, ADD COLUMN `__tmpRoot` VARCHAR(255) NULL, ADD COLUMN `__position` VARCHAR(2000) NULL, ADD COLUMN `__position_relative` INT(11) NULL, ADD COLUMN `__position_depth` INT(11) NULL;'; $this->db->query($query); if ($calculateDeltas) { $query = ' ALTER TABLE `source` ADD COLUMN `__timestamp` INT(11) NOT NULL DEFAULT 0;'; $this->db->query($query); $query = ' UPDATE `source` SET `__timestamp` = UNIX_TIMESTAMP(`sort`);'; $this->db->query($query); } // Create indexes $query = 'ALTER TABLE `source` ADD KEY(`id`), ADD KEY(`parent`), ADD KEY(`id`, `parent`), ADD KEY(`sort`), ADD KEY(`__depth`), ADD KEY(`__position_depth`), ADD KEY(`__tmpRoot`)'; $this->db->query($query); // Detect Orphans (items with missing parents) and set them to null. $this->db->executeUpdate('UPDATE `source` t1 LEFT JOIN `source` t2 ON t1.`parent` = t2.`id` SET t1.`parent` = NULL WHERE t1.`parent` IS NOT NULL AND t2.`id` IS NULL;'); // Clean self referencing items $this->db->query('UPDATE `source` SET `parent` = NULL WHERE `parent` = `id`;'); // Set roots items where no parent is available $this->db->executeUpdate('UPDATE `source` SET `__root` = `id` WHERE `parent` IS NULL;'); // Set temporary root for all items (their direct parent) - tmpRoot will bubble up to real root. $this->db->query('UPDATE `source` SET `__tmpRoot` = `parent` WHERE `parent` IS NOT NULL;'); // Recursion - while there are any __tmpRoot items increase depth and set tmpRoot a level up $depth = 0; while ($this->db->executeUpdate('UPDATE `source` SET `__root` = `__tmpRoot` WHERE `__tmpRoot` IS NOT NULL;') > 0) { $depth++; $this->db->query('UPDATE `source` t1 JOIN `source` t2 ON t1.`__tmpRoot` = t2.`id` SET t1.`__tmpRoot` = t2.`parent`, t1.`__depth` = t1.`__depth` + 1 ;'); } // Table for creating position $this->db->query('CREATE TABLE `tmp.Fill` ( `id` VARCHAR(255), `__position` VARCHAR(2000), `__position_depth` INT(11), INDEX(`id`) );'); /* Create positions: For each level of depth there will be an increasing number like 00001 - easy to sort alphanumerically E.g. Record with depth level = 0 will get a 01234, a child item of this will be appended to it's parents position number, eg 01234 + 00001 => 0123400001 All lower depths are padded with zeros at the end, so the parent will look like 0123400000. Then this is sorted and inserted in a new table with and will get simple +1 increments. */ for ($i = 0; $i <= $depth; $i++) { // How long is the number of items in source table, used for numeric padding $this->db->query('SELECT @base := LENGTH(COUNT(*)) FROM `source`;'); $this->db->query('SELECT @increment := 0;'); $this->db->query('SELECT @depth := ' . $i . ';'); $this->db->query('TRUNCATE `tmp.Fill`;'); $this->db->query('INSERT INTO `tmp.Fill` SELECT t.`id`, CONCAT( IFNULL(parent.`__position`, \'\'), LPAD(@increment := @increment+1, @base, 0) ) AS `__position`, @increment AS `__position_depth` FROM `source` t LEFT JOIN `source` parent ON t.`parent` = parent.`id` WHERE t.`__depth` = @depth ORDER BY t.`sort` ASC;'); $this->db->query('UPDATE `source` t JOIN `tmp.Fill` f USING(`id`) SET t.`__position` = f.`__position`, t.`__position_depth` = f.`__position_depth` ;'); } $this->db->query('SELECT @increment := 0;'); $this->db->query('TRUNCATE `tmp.Fill`;'); // Flatten the position numbers $this->db->query('INSERT INTO `tmp.Fill` SELECT t.`id`, @increment := @increment+1 AS `__position`, 0 AS `__position_depth` FROM `source` t ORDER BY t.`__position` ASC;'); // And then back to source table $this->db->executeUpdate('UPDATE `source` t JOIN `tmp.Fill` f USING(`id`) SET t.`__position` = f.`__position` ;'); // Relative position $this->db->query('SELECT @increment := 0;'); $this->db->query('TRUNCATE `tmp.Fill`;'); $this->db->query('INSERT INTO `tmp.Fill` SELECT s.`id`, CAST(s.__position - r.__position AS SIGNED) AS `__position`, 0 AS `__position_depth` FROM `source` s LEFT JOIN `source` r ON s.`__root` = r.`id`;'); $this->db->query('UPDATE `source` t JOIN `tmp.Fill` f USING(`id`) SET t.`__position_relative` = f.`__position`;'); if ($calculateDeltas) { $this->db->query('ALTER TABLE source ADD INDEX `root_depth` (`__root`, `__depth`), ADD INDEX `root_relpos` (`__root`, `__position_relative`), ADD INDEX `root_depth_pos` (`__root`, `__depth`, `__position_depth`) ;'); $this->db->query('CREATE VIEW `out.source` AS SELECT source.`id` AS `' . $idColumn . '`, source.`__root` AS `root`, source.`__position` AS `position`, source.`__position_relative` AS `position_relative`, source.`__depth` AS `depth`, IF(source.`__timestamp` - root.`__timestamp` < 0, 0, source.`__timestamp` - root.`__timestamp`) AS `time_delta_runsum`, COALESCE( (source.`__timestamp` - previous.`__timestamp`), (source.`__timestamp` - previous_2.`__timestamp`), (source.`__timestamp` - previous_3.`__timestamp`), 0 ) AS `time_delta` FROM `source` source LEFT JOIN `source` root USE INDEX(`root_depth`) ON source.`__root` = root.`__root` AND root.`__depth` = 0 # Same level, direct previous LEFT JOIN `source` previous USE INDEX(`root_relpos`) ON source.`__root`= previous.`__root` AND previous.`__depth` = source.`__depth` AND previous.`__position_relative` = source.`__position_relative` - 1 # One level up, direct previous LEFT JOIN `source` previous_2 USE INDEX(`root_relpos`) ON source.`__root`= previous_2.`__root` AND previous_2.`__depth` = source.`__depth` - 1 AND previous_2.`__position_relative` = source.`__position_relative` - 1 # No direct previous, finding closest previous on the same level LEFT JOIN `source` previous_3 USE INDEX(`root_depth_pos`) ON source.`__root`= previous_3.`__root` AND previous_3.`__depth` = source.`__depth` AND previous_3.`__position_depth` = source.`__position_depth` - 1 ;'); } else { $this->db->query('CREATE VIEW `out.source` AS SELECT `id` AS `' . $idColumn . '`, `__root` AS `root`, `__position` AS `position`, `__position_relative` AS `position_relative`, `__depth` AS `depth` FROM `source`;'); } // Export Data $outFile = $outDirectory . DIRECTORY_SEPARATOR . 'destination.csv'; $command = 'mysql -u ' . $this->db->getUsername() . ' -p' . $this->db->getPassword() . ' -h ' . $this->db->getHost() . ' ' . $this->db->getDatabase() . ' --default-character-set=UTF8 --batch --execute ' . escapeshellarg('SELECT * FROM `out.source`;') . ' --quick | sed \'s/\\t/,/g\' > ' . $outFile; $process = new Process($command); $process->run(); if ($process->getExitCode() != 0) { $error = $process->getErrorOutput(); if (!$error) { $error = $process->getOutput(); } throw new DBALException('MySQL export error: ' . $error); } }
protected function writeTable(CsvFile $csv, $outputTable, $incremental, $primaryKey) { try { $tableNameArr = explode('.', $outputTable); $bucketId = $tableNameArr[0] . "." . $tableNameArr[1]; $tableName = $tableNameArr[2]; } catch (ContextErrorException $e) { throw new UserException("Wrong output table name.", $e); } if (!count($csv->getHeader())) { throw new ApplicationException("Trying to upload an empty file"); } try { if (!$this->storageApi->bucketExists($bucketId)) { $bucketArr = explode('.', $bucketId); $this->storageApi->createBucket(str_replace('c-', '', $bucketArr[1]), SapiClient::STAGE_IN, 'DB Extractor data bucket'); } if (!$this->storageApi->tableExists($outputTable)) { $this->storageApi->createTableAsync($bucketId, $tableName, $csv, array('primaryKey' => $primaryKey)); } else { // handle unexpected temporary errors like "unable to fork()" $success = false; $exception = null; for ($i = 0; $i < 2 && !$success; $i++) { try { $this->storageApi->writeTableAsync($outputTable, $csv, array('incremental' => $incremental)); $success = true; } catch (\Exception $e) { $exception = $e; $this->logger->warning("Error writing to SAPI", ['exception' => $exception]); } sleep(1); } if (!$success) { throw $exception; } } } catch (ClientException $e) { if ($e->getCode() < 500) { throw new UserException($e->getMessage(), $e); } else { throw new ApplicationException($e->getMessage(), $e); } } $this->logger->info("Table " . $tableName . " imported to Storage API"); }
public function testEmptyHeader() { $csvFile = new CsvFile(__DIR__ . '/_data/test-input.empty.csv', ',', '"'); $this->assertEquals(array(), $csvFile->getHeader()); }
/** * @dataProvider validCsvFiles * @param $fileName */ public function testRead($fileName, $delimiter) { $csvFile = new \Keboola\Csv\CsvFile(__DIR__ . '/_data/' . $fileName, $delimiter, '"'); $expected = array("id", "idAccount", "date", "totalFollowers", "followers", "totalStatuses", "statuses", "kloutScore", "timestamp"); $this->assertEquals($expected, $csvFile->getHeader()); }