/** * {@inheritdoc} */ public function writerTest() { $csv = new CsvFile($this->path); foreach ($this->generateRawData() as $row) { $csv->writeRow($row); } }
private function importFile($stagingTableName, CsvFile $csvFile) { $files = $this->getFilesToDownloadFromManifest($csvFile->getPathname()); foreach ($files as $path) { $newCsvPath = new CsvFile($path, $csvFile->getDelimiter(), $csvFile->getEnclosure(), $csvFile->getEscapedBy()); $this->importTable($stagingTableName, $newCsvPath); } }
public function run() { $csv = new CsvFile($this->fullPath()); $entries = Entry::all(); $header = array_keys((new Entry())->toArray()); $csv->writeRow($header); foreach ($entries as $entry) { $row = array_values($entry->toArray()); $csv->writeRow($row); } $this->updateFileSize(); return true; }
/** * @param $query * @param $outputTable * @param bool $incremental * @param null $primaryKey * @throws \Exception * @throws \Keboola\Csv\Exception * @throws null */ public function export($query, $outputTable, $incremental = false, $primaryKey = null) { $outFilename = tempnam("/tmp/ex-db", $outputTable) . ".csv"; $csv = new CsvFile($outFilename); $this->logger->info("Exporting to " . $outputTable); $cursorName = 'exdbcursor' . intval(microtime(true)); $curSql = "DECLARE {$cursorName} CURSOR FOR {$query}"; try { $this->conn->beginTransaction(); // cursors require a transaction. $stmt = $this->conn->prepare($curSql); $stmt->execute(); $innerStatement = $this->conn->prepare("FETCH 1 FROM {$cursorName}"); $innerStatement->execute(); // write header and first line $resultRow = $innerStatement->fetch(\PDO::FETCH_ASSOC); if (is_array($resultRow) && !empty($resultRow)) { $csv->writeRow(array_keys($resultRow)); if (isset($this->dbConfig['null'])) { $resultRow = $this->replaceNull($resultRow, $this->dbConfig['null']); } $csv->writeRow($resultRow); // write the rest $innerStatement = $this->conn->prepare("FETCH 5000 FROM {$cursorName}"); while ($innerStatement->execute() && count($resultRows = $innerStatement->fetchAll(\PDO::FETCH_ASSOC)) > 0) { foreach ($resultRows as $resultRow) { if (isset($this->dbConfig['null'])) { $resultRow = $this->replaceNull($resultRow, $this->dbConfig['null']); } $csv->writeRow($resultRow); } } // close the cursor $this->conn->exec("CLOSE {$cursorName}"); $this->conn->commit(); $this->writeTable($csv, $outputTable, $incremental, $primaryKey); } else { $this->logger->warning("Query returned empty result. Nothing was imported."); } } catch (\PDOException $e) { try { $this->conn->rollBack(); } catch (\Exception $e2) { } $innerStatement = null; $stmt = null; throw new DbException("DB query failed: " . $e->getMessage(), $e); } }
public function write($sourceFilename, $outputTableName, $table) { $csv = new CsvFile($sourceFilename); $header = []; foreach ($table['items'] as $item) { if ($item['type'] != 'IGNORE') { $header[] = $item['dbName']; } } $csv->getHeader(); $csv->next(); while ($csv->current() != null) { for ($i = 0; $i < 1000 && $csv->current() != null; $i++) { $cols = []; foreach ($csv->current() as $col) { $cols[] = "'" . $col . "'"; } $sql = sprintf("INSERT INTO {$outputTableName} (%s) VALUES (%s)", implode(',', $header), implode(',', $cols)); try { $stmt = oci_parse($this->db, $sql); oci_execute($stmt); } catch (\Exception $e) { throw new UserException("Query failed: " . $e->getMessage(), $e, ['query' => $sql]); } $csv->next(); } } }
private function generateCopyCommand($tempTableName, $columns, CsvFile $csvFile, array $options) { $tableNameEscaped = $this->tableNameEscaped($tempTableName); $columnsSql = implode(', ', array_map(function ($column) { return $this->quoteIdentifier($column); }, $columns)); $command = "COPY {$tableNameEscaped} ({$columnsSql}) " . " FROM {$this->connection->quote($csvFile->getPathname())}" . " CREDENTIALS 'aws_access_key_id={$this->s3key};aws_secret_access_key={$this->s3secret}' " . " DELIMITER '{$csvFile->getDelimiter()}' "; if ($csvFile->getEnclosure()) { $command .= "QUOTE {$this->connection->quote($csvFile->getEnclosure())} "; } if ($csvFile->getEscapedBy()) { // raw format if ($csvFile->getEscapedBy() != '\\') { throw new Exception('Only backshlash can be used as escape character'); } $command .= " ESCAPE "; } else { $command .= " CSV "; } if (!empty($options['isGzipped'])) { $command .= " GZIP "; } if (!empty($options['isManifest'])) { $command .= " MANIFEST "; } $command .= " IGNOREHEADER " . $this->getIgnoreLines(); // custom options $command .= " " . implode(" ", $options['copyOptions']); return $command; }
public function process(CsvFile $file, $file_name) { $this->setCsvHeader($file->getHeader()); foreach ($file as $row) { if ($row == $this->csvHeader or $this->rowIsEmpty($row)) { continue; } $row = $this->buildAssoc($row); $product = $this->catalogRepository->productExists($row['sku']); if ($product) { $this->catalogRepository->updateStock($row); } else { $this->catalogRepository->createSimpleProduct($row); $this->catalogRepository->updateStock($row); } } echo "End of file: " . $file_name; }
/** * Execute the job. * * @return void */ public function handle() { // Avoid PHP timeouts when querying large clusters set_time_limit(0); //Create Sabre XML Reader Object $reader = new Reader(); //Create Guzzle REST client Object $client = new Client(['headers' => ['Accept' => 'application/xml', 'Content-Type' => 'application/xml']]); //Pick up the csv report created in the calling controller $csvFile = new CsvFile($this->csvFileName); //Write the csv headers $csvFile->writeRow(explode(',', $this->csv_headers)); //Loop the devices in $this->deviceList to get firmware info foreach ($this->deviceList as $device) { //Only get firmware if the device is registered if ($device['IsRegistered']) { \Log::debug('Running with', [$device]); try { //Query the phones web interface for the XML device info $response = $client->get('http://' . $device['IpAddress'] . '/DeviceInformationX', ['connect_timeout' => 2]); } catch (RequestException $e) { \Log::debug('Phone request exception', [$e]); continue; } $body = $response->getBody()->getContents(); \Log::debug('XML:', [$body]); //Consume the XML with Sabre XML Reader if ($reader->xml($body)) { //Parse the XML $deviceInfoX = $response = $reader->parse(); } else { \Log::debug('Error, there was no XML', []); } //Find the index for XML key holding the Firmware information $index = searchMultiDimArray($deviceInfoX['value'], 'name', '{}versionID'); //Place the firmware info into our $device array $device['Firmware'] = $deviceInfoX['value'][$index]['value']; } //Write the firmware info to csv $csvFile->writeRow($device); } }
public function saveToCsv(array $data, Profile $profile, CsvFile $csv, $incremental = false) { $cnt = 0; /** @var Result $result */ foreach ($data as $result) { $metrics = $result->getMetrics(); $dimensions = $result->getDimensions(); // CSV Header if ($cnt == 0 && !$incremental) { $headerRow = array_merge(array('id', 'idProfile'), array_keys($dimensions), array_keys($metrics)); $csv->writeRow($headerRow); } if (isset($dimensions['date'])) { $dimensions['date'] = date('Y-m-d', strtotime($dimensions['date'])); } $row = array_merge(array_values($dimensions), array_values($metrics)); $outRow = array_merge(array(sha1($profile->getGoogleId() . implode('', $dimensions)), $profile->getGoogleId()), $row); $csv->writeRow($outRow); $cnt++; } }
protected function transpose($csvRow, $csvHeaderRaw, $csvTransposeHeader) { $transposeFrom = $this->config['transform']['transpose']['from']; $outRowArr = array_slice($csvRow, 0, $transposeFrom - 1); $transposeCsvRow = array_slice($csvRow, $transposeFrom - 1, null, true); foreach ($transposeCsvRow as $k => $v) { $outRowArr['key'] = $csvHeaderRaw[$k]; $outRowArr['value'] = $v; if (!is_null($csvTransposeHeader) && !empty($csvTransposeHeader[$k])) { $outRowArr[$this->config['header']['transpose']['name']] = $csvTransposeHeader[$k]; } $this->outputCsv->writeRow($outRowArr); } }
/** * upload all data to recommeder * */ public function upload(DataApi\Client $client) { if (!is_null($this->filePath)) { $batchSize = 25000; $csvFile = new CsvFile($this->filePath); $csvFile->rewind(); if (!$csvFile->valid()) { break; } $header = $csvFile->current(); $csvFile->next(); $interactionsBatch = new DataApi\Batch\InteractionsBatch(); $batchRowsCount = 0; while ($csvFile->valid()) { $attributes = array_combine($header, $csvFile->current()); $csvFile->next(); // process row try { if (is_numeric($attributes['timestamp'])) { $date = new DateTime('@' . $attributes['timestamp']); } else { $date = new DateTime($attributes['timestamp']); } } catch (Exception $e) { throw new Exception('Invalid date format in "' . $this->getName() . '" table'); } $interactionsBatch->addInteraction($attributes['user_id'], $attributes['item_id'], $attributes['interaction_id'], $date); $batchRowsCount += 1; if ($batchRowsCount == $batchSize || !$csvFile->valid()) { $client->insertInteractions($interactionsBatch); $interactionsBatch = new DataApi\Batch\InteractionsBatch(); $batchRowsCount = 0; } } } return $this; }
protected function executeQuery($query, CsvFile $csv) { $cursorName = 'exdbcursor' . intval(microtime(true)); $curSql = "DECLARE {$cursorName} CURSOR FOR {$query}"; try { $this->db->beginTransaction(); // cursors require a transaction. $stmt = $this->db->prepare($curSql); $stmt->execute(); $innerStatement = $this->db->prepare("FETCH 1 FROM {$cursorName}"); $innerStatement->execute(); // write header and first line $resultRow = $innerStatement->fetch(\PDO::FETCH_ASSOC); if (is_array($resultRow) && !empty($resultRow)) { $csv->writeRow(array_keys($resultRow)); if (isset($this->dbConfig['replaceNull'])) { $resultRow = $this->replaceNull($resultRow, $this->dbConfig['replaceNull']); } $csv->writeRow($resultRow); // write the rest $innerStatement = $this->db->prepare("FETCH 5000 FROM {$cursorName}"); while ($innerStatement->execute() && count($resultRows = $innerStatement->fetchAll(\PDO::FETCH_ASSOC)) > 0) { foreach ($resultRows as $resultRow) { if (isset($this->dbConfig['replaceNull'])) { $resultRow = $this->replaceNull($resultRow, $this->dbConfig['replaceNull']); } $csv->writeRow($resultRow); } } // close the cursor $this->db->exec("CLOSE {$cursorName}"); $this->db->commit(); return true; } else { $this->logger->warning("Query returned empty result. Nothing was imported."); return false; } } catch (\PDOException $e) { try { $this->db->rollBack(); } catch (\Exception $e2) { } $innerStatement = null; $stmt = null; throw $e; } }
private function generateCopyCommand($tableName, CsvFile $csvFile) { $csvOptions = []; $csvOptions[] = sprintf('FIELD_DELIMITER = %s', $this->quote($csvFile->getDelimiter())); if ($this->getIgnoreLines()) { $csvOptions[] = sprintf('SKIP_HEADER = %d', $this->getIgnoreLines()); } if ($csvFile->getEnclosure()) { $csvOptions[] = sprintf("FIELD_OPTIONALLY_ENCLOSED_BY = %s", $this->quote($csvFile->getEnclosure())); $csvOptions[] = "ESCAPE_UNENCLOSED_FIELD = NONE"; } elseif ($csvFile->getEscapedBy()) { $csvOptions[] = sprintf("ESCAPE_UNENCLOSED_FIELD = %s", $this->quote($csvFile->getEscapedBy())); } $command = sprintf("COPY INTO %s FROM %s \n CREDENTIALS = (AWS_KEY_ID = %s AWS_SECRET_KEY = %s)\n REGION = %s\n FILE_FORMAT = (TYPE=CSV %s)", $this->nameWithSchemaEscaped($tableName), $this->quote($csvFile->getPathname()), $this->quote($this->s3key), $this->quote($this->s3secret), $this->quote($this->s3region), implode(' ', $csvOptions)); return $command; }
/** * @param $table * @throws \Keboola\Juicer\Exception\ApplicationException */ public function write($table) { $tableName = $this->getTableName($table); $csv = new CsvFile($this->getSourceFileName($table)); $csv->next(); $header = $csv->current(); $processorFactory = new Processor($this->processorConfig[$tableName]); $processor = $processorFactory->getProcessor($header); $csv->next(); $eventsCnt = 0; while ($csv->current() != null) { $batch = []; for ($i = 0; $i < 1000 && $csv->current() != null; $i++) { $batch[] = $processor($csv->current()); $csv->next(); } $result = $this->client->addEvents([$tableName => $batch]); $eventsCnt += count($result[$tableName]); } Logger::log('info', sprintf('Created %s events.', $eventsCnt)); }
function write($sourceFilename, $outputTableName, $table) { $csv = new CsvFile($sourceFilename); // skip the header $csv->next(); $csv->next(); $columnsCount = count($csv->current()); $rowsPerInsert = intval(1000 / $columnsCount - 1); $this->db->beginTransaction(); while ($csv->current() !== false) { $sql = "INSERT INTO " . $this->escape($outputTableName) . " VALUES "; for ($i = 0; $i < $rowsPerInsert && $csv->current() !== false; $i++) { $sql .= sprintf("(%s),", implode(',', $this->encodeCsvRow($this->escapeCsvRow($csv->current()), $table['items']))); $csv->next(); } $sql = substr($sql, 0, -1); Logger::log('debug', sprintf("Executing query '%s'.", $sql)); $this->db->exec($sql); } $this->db->commit(); }
public function export($query, $outputTable, $incremental = false, $primaryKey = null) { $outFilename = $this->dataDir . "/out/tables/" . $outputTable . ".csv"; $outManifestFilename = $this->dataDir . "/out/tables/" . $outputTable . ".csv.manifest"; $csv = new CsvFile($outFilename); try { $stmt = $this->conn->prepare($query); $stmt->execute(); } catch (\PDOException $e) { throw new DbException("DB query failed: " . $e->getMessage(), $e); } // write header and first line try { $resultRow = $stmt->fetch(\PDO::FETCH_ASSOC); } catch (\PDOException $e) { throw new DbException("DB query fetch failed: " . $e->getMessage(), $e); } if (is_array($resultRow) && !empty($resultRow)) { $csv->writeRow(array_keys($resultRow)); $csv->writeRow($resultRow); // write the rest try { while ($resultRow = $stmt->fetch(\PDO::FETCH_ASSOC)) { $csv->writeRow($resultRow); } } catch (\PDOException $e) { throw new DbException("DB query fetch failed: " . $e->getMessage(), $e); } catch (\Exception $e) { // catch warnings throw new DbException("Db query fetch failed: " . $e->getMessage(), $e); } // write manifest file_put_contents($outManifestFilename, Yaml::dump(['destination' => $outputTable, 'incremental' => $incremental, 'primary_key' => $primaryKey])); } else { print_r("Query returned empty result. Nothing was imported."); } return $csv; }
public function export($query, $outputTable, $incremental = false, $primaryKey = null) { $outFilename = tempnam("/tmp/ex-db", $outputTable) . ".csv"; $csv = new CsvFile($outFilename); $this->logger->info("Exporting to " . $outputTable); try { $stmt = oci_parse($this->conn, $query); oci_execute($stmt); } catch (\Exception $e) { throw new DbException("DB query failed: " . $e->getMessage(), $e); } // write header and first line try { $resultRow = oci_fetch_assoc($stmt); } catch (\Exception $e) { throw new DbException("DB query fetch failed: " . $e->getMessage(), $e); } if (is_array($resultRow) && !empty($resultRow)) { $csv->writeRow(array_keys($resultRow)); if (isset($this->dbConfig['null'])) { $resultRow = $this->replaceNull($resultRow, $this->dbConfig['null']); } $resultRow = str_replace('""', '"', $resultRow); $csv->writeRow($resultRow); // write the rest try { while ($resultRow = oci_fetch_assoc($stmt)) { if (isset($this->dbConfig['null'])) { $resultRow = $this->replaceNull($resultRow, $this->dbConfig['null']); } $resultRow = str_replace('""', '"', $resultRow); $csv->writeRow($resultRow); } } catch (\Exception $e) { throw new DbException("DB query fetch failed: " . $e->getMessage(), $e); } if (!count($csv->getHeader())) { throw new ApplicationException("Trying to upload an empty file"); } $this->writeTable($csv, $outputTable, $incremental, $primaryKey); } else { $this->logger->warning("Query returned empty result. Nothing was imported."); } }
public function testNoCache() { $filePath = './tests/data/noCache/out/tables/getPost.get'; // first execution $output = shell_exec('php ./run.php --data=./tests/data/noCache'); self::assertEquals('Extractor finished successfully.' . PHP_EOL, $output); $this->assertFileExists($filePath); $csv = new CsvFile($filePath); $this->assertEquals(1, $csv->getColumnsCount()); $csv->next(); $data = $csv->current(); unset($csv); $firstDateTime = new \DateTime($data[0]); $this->rmDir('./tests/data/noCache/out'); sleep(3); // second execution $output = shell_exec('php ./run.php --data=./tests/data/noCache'); self::assertEquals('Extractor finished successfully.' . PHP_EOL, $output); $this->assertFileExists($filePath); $csv = new CsvFile($filePath); $this->assertEquals(1, $csv->getColumnsCount()); $csv->next(); $data = $csv->current(); unset($csv); $secondDateTime = new \DateTime($data[0]); $this->assertTrue($firstDateTime < $secondDateTime); $this->rmDir('./tests/data/noCache/out'); }
/** * @param CsvFile $file * @param LoadOptions $options * @param $primaryIndex * @return bool */ public function loadFile(CsvFile $file, LoadOptions $options, $primaryIndex = null) { $csvHeader = $file->getHeader(); $params = ['body' => []]; $iBulk = 1; foreach ($file as $i => $line) { // skip header if (!$i) { continue; } $lineData = array_combine($csvHeader, $line); if ($primaryIndex) { if (!array_key_exists($primaryIndex, $lineData)) { $this->logger->error(sprintf("CSV error: Missing id column %s on line %s", $primaryIndex, $i + 1)); return false; } $params['body'][] = ['index' => ['_index' => $options->getIndex(), '_type' => $options->getType(), '_id' => $lineData[$primaryIndex]]]; } else { $params['body'][] = ['index' => ['_index' => $options->getIndex(), '_type' => $options->getType()]]; } $params['body'][] = $lineData; if ($i % $options->getBulkSize() == 0) { $this->logger->info(sprintf("Write %s batch %d to %s start", $options->getType(), $iBulk, $options->getIndex())); $responses = $this->client->bulk($params); $this->logger->info(sprintf("Write %s batch %d to %s took %d ms", $options->getType(), $iBulk, $options->getIndex(), $responses['took'])); $params = ['body' => []]; if ($responses['errors'] !== false) { if (!empty($responses['items'])) { foreach ($responses['items'] as $itemResult) { if (!empty($itemResult['index']['error'])) { if (is_array($itemResult['index']['error'])) { $this->logger->error(sprintf("ES error: %s", $this->getErrorMessageFromErrorField($itemResult['index']['error']))); } else { $this->logger->error(sprintf("ES error: %s", $itemResult['index']['error'])); } return false; } } } return false; } $iBulk++; unset($responses); } } if (!empty($params['body'])) { $this->logger->info(sprintf("Write %s batch %d to %s start", $options->getType(), $iBulk, $options->getIndex())); $responses = $this->client->bulk($params); $this->logger->info(sprintf("Write %s batch %d to %s took %d ms", $options->getType(), $iBulk, $options->getIndex(), $responses['took'])); if ($responses['errors'] !== false) { if (!empty($responses['items'])) { foreach ($responses['items'] as $itemResult) { if (!empty($itemResult['index']['error'])) { if (is_array($itemResult['index']['error'])) { $this->logger->error(sprintf("ES error: %s", $this->getErrorMessageFromErrorField($itemResult['index']['error']))); } else { $this->logger->error(sprintf("ES error: %s", $itemResult['index']['error'])); } return false; } } } return false; } unset($responses); } return true; }
public function run($options = null) { $accounts = $this->configuration->getAccounts(); $status = array(); $dateFrom = isset($options['since']) ? date('Y-m-d', strtotime($options['since'])) : date('Y-m-d', strtotime('-4 days')); $dateTo = isset($options['until']) ? date('Y-m-d', strtotime($options['until'])) : date('Y-m-d', strtotime('-1 day')); $dataset = isset($options['dataset']) ? $options['dataset'] : null; if (isset($options['account'])) { if (!isset($accounts[$options['account']])) { throw new UserException(sprintf("Account '%s' does not exist.", $options['account'])); } $accounts = array($options['account'] => $accounts[$options['account']]); } if (isset($options['config'])) { if (!isset($accounts[$options['config']])) { throw new UserException(sprintf("Config '%s' does not exist.", $options['config'])); } $accounts = array($options['config'] => $accounts[$options['config']]); } if ($dataset != null && !isset($options['config']) && !isset($options['account'])) { throw new UserException("Missing parameter 'config'"); } /** @var Account $account */ foreach ($accounts as $accountId => $account) { // check if account has been authorized if (null == $account->getAttribute('accessToken')) { continue; } $this->currAccountId = $accountId; if (null == $account->getAttribute('outputBucket')) { $this->configuration->initDataBucket($account->getAccountId()); } else { if (!$this->configuration->getStorageApi()->bucketExists($account->getAttribute('outputBucket'))) { $outBucketArr = explode('.', $account->getAttribute('outputBucket')); $this->configuration->getStorageApi()->createBucket(str_replace('c-', '', $outBucketArr[1]), $outBucketArr[0], 'Google Analytics data bucket'); } } $this->gaApi->getApi()->setCredentials($account->getAccessToken(), $account->getRefreshToken()); $this->gaApi->getApi()->setRefreshTokenCallback(array($this, 'refreshTokenCallback')); $tmpFileInfo = $this->temp->createFile("profiles-" . $accountId . "-" . microtime() . ".csv"); $profilesCsv = new CsvFile($tmpFileInfo->getPathname()); $profilesCsv->writeRow(array('id', 'name')); /** @var Profile $profile */ foreach ($account->getProfiles() as $profile) { $profilesCsv->writeRow(array($profile->getGoogleId(), $profile->getName())); try { $configuration = $account->getConfiguration(); // Download just the dataset specified in request if ($dataset != null) { if (!isset($configuration[$dataset])) { throw new UserException(sprintf("Dataset '%s' doesn't exist", $dataset)); } $configuration = [$dataset => $configuration[$dataset]]; } foreach ($configuration as $tableName => $cfg) { // Download dataset only for specific profile if (isset($cfg['profile']) && $cfg['profile'] != $profile->getGoogleId()) { continue; } $antisampling = isset($cfg['antisampling']) ? $cfg['antisampling'] : false; $this->getData($account, $profile, $tableName, $dateFrom, $dateTo, $antisampling); $status[$accountId][$profile->getName()][$tableName] = 'ok'; } } catch (RequestException $e) { if ($e->getCode() == 401) { throw new UserException("Expired or wrong credentials, please reauthorize.", $e); } if ($e->getCode() == 403) { if (strtolower($e->getResponse()->getReasonPhrase()) == 'forbidden') { $this->logger->warning("You don't have access to Google Analytics resource. Probably you don't have access to profile, or profile doesn't exists anymore."); continue; } else { throw new UserException("Reason: " . $e->getResponse()->getReasonPhrase(), $e); } } if ($e->getCode() == 400) { throw new UserException($e->getMessage()); } if ($e->getCode() == 503) { throw new UserException("Google API error: " . $e->getMessage(), $e); } throw new ApplicationException($e->getResponse()->getBody(), $e); } } $this->getDataManager()->uploadCsv($profilesCsv->getPathname(), $this->getOutputTable($account, 'profiles')); } return $status; }
public function tables() { $expectedEscaping = []; $file = new \Keboola\Csv\CsvFile(__DIR__ . '/_data/csv-import/escaping/standard-with-enclosures.csv'); foreach ($file as $row) { $expectedEscaping[] = $row; } $escapingHeader = array_shift($expectedEscaping); // remove header $expectedEscaping = array_values($expectedEscaping); $expectedAccounts = []; $file = new \Keboola\Csv\CsvFile(__DIR__ . '/_data/csv-import/tw_accounts.csv'); foreach ($file as $row) { $expectedAccounts[] = $row; } $accountsHeader = array_shift($expectedAccounts); // remove header $expectedAccounts = array_values($expectedAccounts); $file = new \Keboola\Csv\CsvFile(__DIR__ . '/_data/csv-import/tw_accounts.changedColumnsOrder.csv'); $accountChangedColumnsOrderHeader = $file->getHeader(); $s3bucket = getenv(self::AWS_S3_BUCKET_ENV); return [[[new CsvFile("s3://{$s3bucket}/empty.manifest")], $escapingHeader, [], 'out.csv_2Cols', 'manifest'], [[new CsvFile("s3://{$s3bucket}/standard-with-enclosures.csv")], $escapingHeader, $expectedEscaping, 'out.csv_2Cols'], [[new CsvFile("s3://{$s3bucket}/gzipped-standard-with-enclosures.csv.gz")], $escapingHeader, $expectedEscaping, 'out.csv_2Cols'], [[new CsvFile("s3://{$s3bucket}/standard-with-enclosures.tabs.csv", "\t")], $escapingHeader, $expectedEscaping, 'out.csv_2Cols'], [[new CsvFile("s3://{$s3bucket}/raw.rs.csv", "\t", '', '\\')], $escapingHeader, $expectedEscaping, 'out.csv_2Cols'], [[new CsvFile("s3://{$s3bucket}/tw_accounts.changedColumnsOrder.csv")], $accountChangedColumnsOrderHeader, $expectedAccounts, 'accounts'], [[new CsvFile("s3://{$s3bucket}/tw_accounts.csv")], $accountsHeader, $expectedAccounts, 'accounts'], [[new CsvFile("s3://{$s3bucket}/01_tw_accounts.csv.manifest")], $accountsHeader, $expectedAccounts, 'accounts', 'manifest'], [[new CsvFile("s3://{$s3bucket}/03_tw_accounts.csv.gzip.manifest")], $accountsHeader, $expectedAccounts, 'accounts', 'manifest'], [['schemaName' => $this->sourceSchemaName, 'tableName' => 'out.csv_2Cols'], $escapingHeader, [['a', 'b'], ['c', 'd']], 'out.csv_2Cols', 'copy'], [['schemaName' => $this->sourceSchemaName, 'tableName' => 'types'], $escapingHeader, [['c', '1'], ['d', '0']], 'types', 'copy'], [[new CsvFile("s3://{$s3bucket}/reserved-words.csv")], ['column', 'table'], [['table', 'column']], 'table', 'csv'], [[new CsvFile("s3://{$s3bucket}/with-ts.csv")], ['col1', 'col2', '_timestamp'], [['a', 'b', '2014-11-10 13:12:06'], ['c', 'd', '2014-11-10 14:12:06']], 'out.csv_2Cols'], [[new CsvFile("s3://{$s3bucket}/standard-with-enclosures.csv")], $escapingHeader, $expectedEscaping, 'out.no_timestamp_table', 'csv', ['useTimestamp' => false]]]; }
public function testExecutor() { $executor = $this->getExecutor(self::DRIVER); $executor->run(); $conn = $this->getWriter(self::DRIVER)->getConnection(); $stmt = $conn->query("SELECT * FROM encoding"); $res = $stmt->fetchAll(\PDO::FETCH_ASSOC); $resFilename = tempnam('/tmp', 'db-wr-test-tmp'); $csv = new CsvFile($resFilename); $csv->writeRow(["col1", "col2"]); foreach ($res as $row) { $csv->writeRow($row); } $sourceFilename = $this->dataDir . "/" . self::DRIVER . "/in/tables/encoding.csv"; $this->assertFileEquals($sourceFilename, $resFilename); }
/** * upload all data to recommeder * */ public function upload(DataApi\Client $client) { if (!is_null($this->filePath)) { $batchSize = 5000; $csvFile = new CsvFile($this->filePath); $csvFile->rewind(); if (!$csvFile->valid()) { break; } $header = $csvFile->current(); $csvFile->next(); $itemsBatch = new DataApi\Batch\EntitiesBatch(); $batchRowsCount = 0; while ($csvFile->valid()) { $attributes = array_combine($header, $csvFile->current()); $csvFile->next(); // process row - remove id column etc. $pk = $this->manifest->getPrimaryKey()[0]; $id = $attributes[$pk]; unset($attributes[$pk]); $itemsBatch->addEntity($id, $attributes); $batchRowsCount += 1; if ($batchRowsCount == $batchSize || !$csvFile->valid()) { $client->insertOrUpdateItems($itemsBatch); $itemsBatch = new DataApi\Batch\EntitiesBatch(); $batchRowsCount = 0; } } } return $this; }
public function testInvalidManifestImport() { $s3bucket = getenv(self::AWS_S3_BUCKET_ENV); $initialFile = new \Keboola\Csv\CsvFile(__DIR__ . "/_data/csv-import/tw_accounts.csv"); $importFile = new \Keboola\Csv\CsvFile("s3://{$s3bucket}/02_tw_accounts.csv.invalid.manifest"); $import = $this->getImport('manifest'); $import->setIgnoreLines(1); try { $import->import('accounts-3', $initialFile->getHeader(), [$importFile]); $this->fail('Manifest should not be uploaded'); } catch (\Keboola\Db\Import\Exception $e) { $this->assertEquals(\Keboola\Db\Import\Exception::MANDATORY_FILE_NOT_FOUND, $e->getCode()); } }
public function testReadTablesS3Redshift() { // Create bucket if (!$this->client->bucketExists("in.c-docker-test-redshift")) { $this->client->createBucket("docker-test-redshift", Client::STAGE_IN, "Docker Testsuite", "redshift"); } // Create table if (!$this->client->tableExists("in.c-docker-test-redshift.test")) { $csv = new CsvFile($this->tmpDir . "/upload.csv"); $csv->writeRow(["Id", "Name"]); $csv->writeRow(["test", "test"]); $this->client->createTableAsync("in.c-docker-test-redshift", "test", $csv); $this->client->setTableAttribute("in.c-docker-test-redshift.test", "attr1", "val2"); } $root = $this->tmpDir; $reader = new Reader($this->client); $configuration = [["source" => "in.c-docker-test-redshift.test", "destination" => "test-redshift.csv"]]; $reader->downloadTables($configuration, $root . "/download", "s3"); $adapter = new TableManifestAdapter(); $manifest = $adapter->readFromFile($root . "/download/test-redshift.csv.manifest"); $this->assertEquals("in.c-docker-test-redshift.test", $manifest["id"]); $this->assertEquals("val2", $manifest["attributes"][0]["value"]); $this->assertS3info($manifest); }
public function testRowTooLongShouldThrowException() { $csvFile = new CsvFile(__DIR__ . "/_data/csv-import/very-long-row.csv"); $this->setExpectedException("Keboola\\Db\\Import\\Exception", '', \Keboola\Db\Import\Exception::ROW_SIZE_TOO_LARGE); $this->import->import('very-long-row', $csvFile->getHeader(), [$csvFile]); }
protected function mergeFiles(CsvFile $file1, CsvFile $file2) { // CsvFile::getHeader resets it to the first line, // so we need to forward it back to the end to append it // Also, this is a dirty, dirty hack for (; $file1->valid(); $file1->next()) { } $header = true; foreach ($file2 as $row) { if ($header) { $header = false; continue; } $file1->writeRow($row); } }
public function queue_import_csv($filename) { only_admin_access(); if (!is_file($filename)) { return array('error' => "You have not provided a existing backup to restore."); } $csv = new \Keboola\Csv\CsvFile($filename); $head = $csv->getHeader(); if (!isset($head[2])) { $csv = new \Keboola\Csv\CsvFile($filename, ';'); $head = $csv->getHeader(); } else { if (isset($head[0]) and stristr($head[0], ';')) { $csv = new \Keboola\Csv\CsvFile($filename, ';'); $head = $csv->getHeader(); } } if (empty($head) or empty($csv)) { return array('error' => "CSV file cannot be parsed properly."); } $rows = array(); $i = 0; foreach ($csv as $row) { if ($i > 0) { $r = array(); if (is_array($row)) { foreach ($row as $k => $v) { if (isset($head[$k])) { $row[$head[$k]] = $v; $new_k = strtolower($head[$k]); $new_k = str_replace(' ', '_', $new_k); $new_k = str_replace('__', '_', $new_k); // $new_k = preg_replace("/[^a-zA-Z0-9_]+/", "", $new_k); $new_k = rtrim($new_k, '_'); $r[$new_k] = $v; } } } $rows[] = $r; } $i++; } $content_items = $rows; $content_items = $this->map_array($rows); return $this->batch_save($content_items); }
public function runAction($parameters) { $path = $parameters["path"]; $skipped = 0; $processed = 0; foreach ($parameters['tables'] as $table) { $sourceType = !empty($table['tableId']) ? 'table' : 'file'; if ($sourceType == 'table') { $logPrefix = sprintf('Table %s - ', $table['tableId']); } else { $logPrefix = sprintf('File %s - ', $table['file']); } if (empty($table['export'])) { $this->logger->info($logPrefix . 'Skipped'); $skipped++; continue; } $this->logger->info($logPrefix . 'Export start'); // load options $options = new Options\LoadOptions(); $options->setIndex($table['index'])->setType($table['type']); if (!empty($config['elastic']['bulkSize'])) { $options->setBulkSize($config['elastic']['bulkSize']); } $idColumn = !empty($table['id']) ? $table['id'] : null; // source file if (!empty($table['tableId'])) { $file = new CsvFile(sprintf('%s/%s.csv', $path, $table['tableId'])); } else { $file = new CsvFile(sprintf('%s/%s', $path, $table['file'])); if (mb_strtolower($file->getExtension()) !== 'csv') { throw new Exception\UserException($logPrefix . 'Export failed. Only csv files are supported'); } } if (!$file->isFile()) { throw new Exception\UserException($logPrefix . 'Export failed. Missing csv file'); } $result = $this->writer->loadFile($file, $options, $idColumn); if (!$result) { throw new Exception\UserException($logPrefix . 'Export failed'); } else { $this->logger->info($logPrefix . 'Export finished', array()); } $processed++; } $this->logger->info(sprintf("Exported %d tables. %d was skipped", $processed, $skipped)); }
protected function countLines(CsvFile $csvFile) { $cnt = 0; foreach ($csvFile as $row) { $cnt++; } $csvFile->rewind(); return $cnt; }