public function write($sourceFilename, $outputTableName, $table) { $csv = new CsvFile($sourceFilename); $header = []; foreach ($table['items'] as $item) { if ($item['type'] != 'IGNORE') { $header[] = $item['dbName']; } } $csv->getHeader(); $csv->next(); while ($csv->current() != null) { for ($i = 0; $i < 1000 && $csv->current() != null; $i++) { $cols = []; foreach ($csv->current() as $col) { $cols[] = "'" . $col . "'"; } $sql = sprintf("INSERT INTO {$outputTableName} (%s) VALUES (%s)", implode(',', $header), implode(',', $cols)); try { $stmt = oci_parse($this->db, $sql); oci_execute($stmt); } catch (\Exception $e) { throw new UserException("Query failed: " . $e->getMessage(), $e, ['query' => $sql]); } $csv->next(); } } }
function write($sourceFilename, $outputTableName, $table) { $csv = new CsvFile($sourceFilename); // skip the header $csv->next(); $csv->next(); $columnsCount = count($csv->current()); $rowsPerInsert = intval(1000 / $columnsCount - 1); $this->db->beginTransaction(); while ($csv->current() !== false) { $sql = "INSERT INTO " . $this->escape($outputTableName) . " VALUES "; for ($i = 0; $i < $rowsPerInsert && $csv->current() !== false; $i++) { $sql .= sprintf("(%s),", implode(',', $this->encodeCsvRow($this->escapeCsvRow($csv->current()), $table['items']))); $csv->next(); } $sql = substr($sql, 0, -1); Logger::log('debug', sprintf("Executing query '%s'.", $sql)); $this->db->exec($sql); } $this->db->commit(); }
/** * @param $table * @throws \Keboola\Juicer\Exception\ApplicationException */ public function write($table) { $tableName = $this->getTableName($table); $csv = new CsvFile($this->getSourceFileName($table)); $csv->next(); $header = $csv->current(); $processorFactory = new Processor($this->processorConfig[$tableName]); $processor = $processorFactory->getProcessor($header); $csv->next(); $eventsCnt = 0; while ($csv->current() != null) { $batch = []; for ($i = 0; $i < 1000 && $csv->current() != null; $i++) { $batch[] = $processor($csv->current()); $csv->next(); } $result = $this->client->addEvents([$tableName => $batch]); $eventsCnt += count($result[$tableName]); } Logger::log('info', sprintf('Created %s events.', $eventsCnt)); }
/** * upload all data to recommeder * */ public function upload(DataApi\Client $client) { if (!is_null($this->filePath)) { $batchSize = 25000; $csvFile = new CsvFile($this->filePath); $csvFile->rewind(); if (!$csvFile->valid()) { break; } $header = $csvFile->current(); $csvFile->next(); $interactionsBatch = new DataApi\Batch\InteractionsBatch(); $batchRowsCount = 0; while ($csvFile->valid()) { $attributes = array_combine($header, $csvFile->current()); $csvFile->next(); // process row try { if (is_numeric($attributes['timestamp'])) { $date = new DateTime('@' . $attributes['timestamp']); } else { $date = new DateTime($attributes['timestamp']); } } catch (Exception $e) { throw new Exception('Invalid date format in "' . $this->getName() . '" table'); } $interactionsBatch->addInteraction($attributes['user_id'], $attributes['item_id'], $attributes['interaction_id'], $date); $batchRowsCount += 1; if ($batchRowsCount == $batchSize || !$csvFile->valid()) { $client->insertInteractions($interactionsBatch); $interactionsBatch = new DataApi\Batch\InteractionsBatch(); $batchRowsCount = 0; } } } return $this; }
public function testNoCache() { $filePath = './tests/data/noCache/out/tables/getPost.get'; // first execution $output = shell_exec('php ./run.php --data=./tests/data/noCache'); self::assertEquals('Extractor finished successfully.' . PHP_EOL, $output); $this->assertFileExists($filePath); $csv = new CsvFile($filePath); $this->assertEquals(1, $csv->getColumnsCount()); $csv->next(); $data = $csv->current(); unset($csv); $firstDateTime = new \DateTime($data[0]); $this->rmDir('./tests/data/noCache/out'); sleep(3); // second execution $output = shell_exec('php ./run.php --data=./tests/data/noCache'); self::assertEquals('Extractor finished successfully.' . PHP_EOL, $output); $this->assertFileExists($filePath); $csv = new CsvFile($filePath); $this->assertEquals(1, $csv->getColumnsCount()); $csv->next(); $data = $csv->current(); unset($csv); $secondDateTime = new \DateTime($data[0]); $this->assertTrue($firstDateTime < $secondDateTime); $this->rmDir('./tests/data/noCache/out'); }
/** * upload all data to recommeder * */ public function upload(DataApi\Client $client) { if (!is_null($this->filePath)) { $batchSize = 5000; $csvFile = new CsvFile($this->filePath); $csvFile->rewind(); if (!$csvFile->valid()) { break; } $header = $csvFile->current(); $csvFile->next(); $itemsBatch = new DataApi\Batch\EntitiesBatch(); $batchRowsCount = 0; while ($csvFile->valid()) { $attributes = array_combine($header, $csvFile->current()); $csvFile->next(); // process row - remove id column etc. $pk = $this->manifest->getPrimaryKey()[0]; $id = $attributes[$pk]; unset($attributes[$pk]); $itemsBatch->addEntity($id, $attributes); $batchRowsCount += 1; if ($batchRowsCount == $batchSize || !$csvFile->valid()) { $client->insertOrUpdateItems($itemsBatch); $itemsBatch = new DataApi\Batch\EntitiesBatch(); $batchRowsCount = 0; } } } return $this; }
protected function mergeFiles(CsvFile $file1, CsvFile $file2) { // CsvFile::getHeader resets it to the first line, // so we need to forward it back to the end to append it // Also, this is a dirty, dirty hack for (; $file1->valid(); $file1->next()) { } $header = true; foreach ($file2 as $row) { if ($header) { $header = false; continue; } $file1->writeRow($row); } }
public function testIterator() { $csvFile = new CsvFile(__DIR__ . '/_data/test-input.csv'); $expected = array("id", "idAccount", "date", "totalFollowers", "followers", "totalStatuses", "statuses", "kloutScore", "timestamp"); // header line $csvFile->rewind(); $this->assertEquals($expected, $csvFile->current()); // first line $csvFile->next(); $this->assertTrue($csvFile->valid()); // second line $csvFile->next(); $this->assertTrue($csvFile->valid()); // file end $csvFile->next(); $this->assertFalse($csvFile->valid()); }