Ejemplo n.º 1
0
 public function write($sourceFilename, $outputTableName, $table)
 {
     $csv = new CsvFile($sourceFilename);
     $header = [];
     foreach ($table['items'] as $item) {
         if ($item['type'] != 'IGNORE') {
             $header[] = $item['dbName'];
         }
     }
     $csv->getHeader();
     $csv->next();
     while ($csv->current() != null) {
         for ($i = 0; $i < 1000 && $csv->current() != null; $i++) {
             $cols = [];
             foreach ($csv->current() as $col) {
                 $cols[] = "'" . $col . "'";
             }
             $sql = sprintf("INSERT INTO {$outputTableName} (%s) VALUES (%s)", implode(',', $header), implode(',', $cols));
             try {
                 $stmt = oci_parse($this->db, $sql);
                 oci_execute($stmt);
             } catch (\Exception $e) {
                 throw new UserException("Query failed: " . $e->getMessage(), $e, ['query' => $sql]);
             }
             $csv->next();
         }
     }
 }
Ejemplo n.º 2
0
 function write($sourceFilename, $outputTableName, $table)
 {
     $csv = new CsvFile($sourceFilename);
     // skip the header
     $csv->next();
     $csv->next();
     $columnsCount = count($csv->current());
     $rowsPerInsert = intval(1000 / $columnsCount - 1);
     $this->db->beginTransaction();
     while ($csv->current() !== false) {
         $sql = "INSERT INTO " . $this->escape($outputTableName) . " VALUES ";
         for ($i = 0; $i < $rowsPerInsert && $csv->current() !== false; $i++) {
             $sql .= sprintf("(%s),", implode(',', $this->encodeCsvRow($this->escapeCsvRow($csv->current()), $table['items'])));
             $csv->next();
         }
         $sql = substr($sql, 0, -1);
         Logger::log('debug', sprintf("Executing query '%s'.", $sql));
         $this->db->exec($sql);
     }
     $this->db->commit();
 }
Ejemplo n.º 3
0
 /**
  * @param $table
  * @throws \Keboola\Juicer\Exception\ApplicationException
  */
 public function write($table)
 {
     $tableName = $this->getTableName($table);
     $csv = new CsvFile($this->getSourceFileName($table));
     $csv->next();
     $header = $csv->current();
     $processorFactory = new Processor($this->processorConfig[$tableName]);
     $processor = $processorFactory->getProcessor($header);
     $csv->next();
     $eventsCnt = 0;
     while ($csv->current() != null) {
         $batch = [];
         for ($i = 0; $i < 1000 && $csv->current() != null; $i++) {
             $batch[] = $processor($csv->current());
             $csv->next();
         }
         $result = $this->client->addEvents([$tableName => $batch]);
         $eventsCnt += count($result[$tableName]);
     }
     Logger::log('info', sprintf('Created %s events.', $eventsCnt));
 }
 /**
  * upload all data to recommeder
  * 
  */
 public function upload(DataApi\Client $client)
 {
     if (!is_null($this->filePath)) {
         $batchSize = 25000;
         $csvFile = new CsvFile($this->filePath);
         $csvFile->rewind();
         if (!$csvFile->valid()) {
             break;
         }
         $header = $csvFile->current();
         $csvFile->next();
         $interactionsBatch = new DataApi\Batch\InteractionsBatch();
         $batchRowsCount = 0;
         while ($csvFile->valid()) {
             $attributes = array_combine($header, $csvFile->current());
             $csvFile->next();
             // process row
             try {
                 if (is_numeric($attributes['timestamp'])) {
                     $date = new DateTime('@' . $attributes['timestamp']);
                 } else {
                     $date = new DateTime($attributes['timestamp']);
                 }
             } catch (Exception $e) {
                 throw new Exception('Invalid date format in "' . $this->getName() . '" table');
             }
             $interactionsBatch->addInteraction($attributes['user_id'], $attributes['item_id'], $attributes['interaction_id'], $date);
             $batchRowsCount += 1;
             if ($batchRowsCount == $batchSize || !$csvFile->valid()) {
                 $client->insertInteractions($interactionsBatch);
                 $interactionsBatch = new DataApi\Batch\InteractionsBatch();
                 $batchRowsCount = 0;
             }
         }
     }
     return $this;
 }
Ejemplo n.º 5
0
 public function testNoCache()
 {
     $filePath = './tests/data/noCache/out/tables/getPost.get';
     // first execution
     $output = shell_exec('php ./run.php --data=./tests/data/noCache');
     self::assertEquals('Extractor finished successfully.' . PHP_EOL, $output);
     $this->assertFileExists($filePath);
     $csv = new CsvFile($filePath);
     $this->assertEquals(1, $csv->getColumnsCount());
     $csv->next();
     $data = $csv->current();
     unset($csv);
     $firstDateTime = new \DateTime($data[0]);
     $this->rmDir('./tests/data/noCache/out');
     sleep(3);
     // second execution
     $output = shell_exec('php ./run.php --data=./tests/data/noCache');
     self::assertEquals('Extractor finished successfully.' . PHP_EOL, $output);
     $this->assertFileExists($filePath);
     $csv = new CsvFile($filePath);
     $this->assertEquals(1, $csv->getColumnsCount());
     $csv->next();
     $data = $csv->current();
     unset($csv);
     $secondDateTime = new \DateTime($data[0]);
     $this->assertTrue($firstDateTime < $secondDateTime);
     $this->rmDir('./tests/data/noCache/out');
 }
Ejemplo n.º 6
0
 /**
  * upload all data to recommeder
  * 
  */
 public function upload(DataApi\Client $client)
 {
     if (!is_null($this->filePath)) {
         $batchSize = 5000;
         $csvFile = new CsvFile($this->filePath);
         $csvFile->rewind();
         if (!$csvFile->valid()) {
             break;
         }
         $header = $csvFile->current();
         $csvFile->next();
         $itemsBatch = new DataApi\Batch\EntitiesBatch();
         $batchRowsCount = 0;
         while ($csvFile->valid()) {
             $attributes = array_combine($header, $csvFile->current());
             $csvFile->next();
             // process row - remove id column etc.
             $pk = $this->manifest->getPrimaryKey()[0];
             $id = $attributes[$pk];
             unset($attributes[$pk]);
             $itemsBatch->addEntity($id, $attributes);
             $batchRowsCount += 1;
             if ($batchRowsCount == $batchSize || !$csvFile->valid()) {
                 $client->insertOrUpdateItems($itemsBatch);
                 $itemsBatch = new DataApi\Batch\EntitiesBatch();
                 $batchRowsCount = 0;
             }
         }
     }
     return $this;
 }
Ejemplo n.º 7
0
 protected function mergeFiles(CsvFile $file1, CsvFile $file2)
 {
     // CsvFile::getHeader resets it to the first line,
     // so we need to forward it back to the end to append it
     // Also, this is a dirty, dirty hack
     for (; $file1->valid(); $file1->next()) {
     }
     $header = true;
     foreach ($file2 as $row) {
         if ($header) {
             $header = false;
             continue;
         }
         $file1->writeRow($row);
     }
 }
Ejemplo n.º 8
0
 public function testIterator()
 {
     $csvFile = new CsvFile(__DIR__ . '/_data/test-input.csv');
     $expected = array("id", "idAccount", "date", "totalFollowers", "followers", "totalStatuses", "statuses", "kloutScore", "timestamp");
     // header line
     $csvFile->rewind();
     $this->assertEquals($expected, $csvFile->current());
     // first line
     $csvFile->next();
     $this->assertTrue($csvFile->valid());
     // second line
     $csvFile->next();
     $this->assertTrue($csvFile->valid());
     // file end
     $csvFile->next();
     $this->assertFalse($csvFile->valid());
 }