public function exportToFile($path, $query, $gzip = false, $isParallel = true, $overwriteExistingFiles = false) { $path = ltrim(preg_replace('#/+#', "/", $path), "/"); $s3path = $this->s3Fs->getRealpath($path); mdebug("export file to temp s3, prefix = %s", $s3path); $suffix = $gzip ? "\\.gz" : ""; $clearPattern = "#^" . preg_quote($path, "#") . "#"; $downloadPattern = "#^" . preg_quote($path, "#") . "([0-9]{2,})?(_part_[0-9]{2,})?{$suffix}\$#"; mdebug("Clear pattern for %s is %s", $path, $clearPattern); mdebug("Download pattern for %s is %s", $path, $downloadPattern); // clear remote path try { $finder = $this->s3Fs->getFinder(); $finder->path($clearPattern); if ($finder->count() > 0) { if ($overwriteExistingFiles) { foreach ($finder as $splFileInfo) { $this->s3Fs->delete($splFileInfo->getRelativePathname()); } } else { throw new \RuntimeException(sprintf("The path is not empty on remote end, path = %s", $path)); } } } catch (\InvalidArgumentException $e) { if (strpos($e->getMessage(), "directory does not exist") === false) { throw $e; } } // clear local path $finder = $this->localFs->getFinder(); $finder->path($clearPattern); if ($finder->count() > 0) { if ($overwriteExistingFiles) { foreach ($finder as $splFileInfo) { $this->localFs->delete($splFileInfo->getRelativePathname()); } } else { throw new \RuntimeException(sprintf("The path is not empty locally, path = %s", $path)); } } $tempCredential = $this->sts->getTemporaryCredential(); $this->connection->unloadToS3($query, $s3path, $tempCredential, true, $gzip, $isParallel); $finder = $this->s3Fs->getFinder(); $finder->path($downloadPattern); foreach ($finder as $splFileInfo) { //var_dump($splFileInfo->getRelativePathname()); $partName = $splFileInfo->getRelativePathname(); $fh = $this->s3Fs->readStream($partName); $this->localFs->putStream($partName, $fh); fclose($fh); $this->s3Fs->delete($partName); } }
public function importFromFile($path, $table, $columns, $gzip = false, $overwriteS3Files = false) { $timestamp = microtime(true) . getmypid(); $path = ltrim(preg_replace('#/+#', "/", $path), "/"); $suffix = $gzip ? "\\.gz" : ""; $uploadPattern = "#^" . preg_quote($path, "#") . "([0-9]{2,})?(_part_[0-9]{2,})?{$suffix}\$#"; $clearPattern = "#^" . preg_quote($path, "#") . ".*/" . $timestamp . "\$#"; mdebug("Upload pattern is %s", $uploadPattern); mdebug("Clear pattern is %s", $clearPattern); $localFinder = $this->localFs->getFinder(); $localFinder->path($uploadPattern); if ($localFinder->count() == 0) { throw new \RuntimeException(sprintf("No import files found at path: %s, pattern = %s", $path, $uploadPattern)); } try { $s3Finder = $this->s3Fs->getFinder(); $s3Finder->path($clearPattern); if ($s3Finder->count() > 0) { if ($overwriteS3Files) { foreach ($s3Finder as $splFileInfo) { $this->s3Fs->delete($splFileInfo->getRelativePathname()); } } else { throw new \RuntimeException(sprintf("The path is not empty on remote end, path = %s", $path)); } } } catch (\InvalidArgumentException $e) { if (strpos($e->getMessage(), "directory does not exist") === false) { throw $e; } } $uploaded = []; foreach ($localFinder as $splFileInfo) { $relativePathname = $splFileInfo->getRelativePathname(); $remoteName = $relativePathname . "/" . $timestamp; $fh = $this->localFs->readStream($relativePathname); // IMPORTANT: use write stream so that s3fs doesn't check for key exisistence, which in turn would bread the strong consistency of s3 $this->s3Fs->writeStream($remoteName, $fh); fclose($fh); $uploaded[] = $remoteName; mdebug("Uploaded %s to %s", $relativePathname, $remoteName); } //$inStream = $this->localFs->readStream($path); //$this->s3Fs->putStream($path, $inStream); $s3path = $this->s3Fs->getRealpath($path); mdebug("uploaded file to temp s3, path = %s", $s3path); $tempCredential = $this->sts->getTemporaryCredential(); $this->connection->copyFromS3($table, $columns, $s3path, $this->s3Region, $tempCredential, true, $gzip); foreach ($uploaded as $relativePathname) { $this->s3Fs->delete($relativePathname); } }
public function testDataImportExportWithGzipAndParallel() { $exportPrefix = "redshift_ut_" . time(); $this->testDataImport(true); $exporter = new RedshiftExporter(self::$rs, self::$localFs, self::$s3Fs, self::$s3Region, self::$sts); $exporter->exportToFile($exportPrefix, "SELECT * FROM php_redshift_test", true, true, true); $exportedCount = 0; $finder = self::$localFs->getFinder(); $finder->path("#^" . preg_quote($exportPrefix, "#") . "#"); $unloaded = []; foreach ($finder as $splFileInfo) { $relativePathname = $splFileInfo->getRelativePathname(); $unloaded[] = $relativePathname; $content = self::$localFs->read($relativePathname); mdebug(gzdecode($content)); $fh = fopen('php://memory', 'r+'); fwrite($fh, gzdecode($content)); rewind($fh); $reader = new DrdStreamReader($fh, self::FIELDS); while ($reader->readRecord()) { $exportedCount++; } fclose($fh); } self::assertEquals(5, $exportedCount); // test import of parallel data $importer = new RedshiftImporter(self::$rs, self::$localFs, self::$s3Fs, self::$s3Region, self::$sts); $importer->importFromFile($exportPrefix, 'php_redshift_test', self::FIELDS, true, true); $stmt = self::$rs->prepare("SELECT COUNT(*) FROM php_redshift_test"); $stmt->execute(); $result = $stmt->fetchColumn(); self::assertEquals(10, $result); foreach ($unloaded as $relativePathname) { self::$localFs->delete($relativePathname); } }
* Date: 2016-01-11 * Time: 17:52 */ require_once 'vendor/autoload.php'; use Oasis\Mlib\AwsWrappers\S3Client; use Oasis\Mlib\AwsWrappers\StsClient; use Oasis\Mlib\FlysystemWrappers\ExtendedAwsS3Adapter; use Oasis\Mlib\FlysystemWrappers\ExtendedFilesystem; use Oasis\Mlib\FlysystemWrappers\ExtendedLocal; use Oasis\Mlib\Redshift\DrdStreamWriter; use Oasis\Mlib\Redshift\RedshiftConnection; use Oasis\Mlib\Redshift\RedshiftExporter; use Oasis\Mlib\Redshift\RedshiftImporter; $awsConfig = ['profile' => "oasis-minhao", 'region' => 'ap-northeast-1']; $sts = new StsClient(['profile' => 'oasis-minhao', 'region' => 'ap-northeast-1']); $rs = RedshiftConnection::getConnection(["host" => "oas-dmp-test.cikskyn4dlgm.ap-northeast-1.redshift.amazonaws.com", "port" => 5439, "dbname" => "oasdmp", "user" => "oasdmp", "password" => "NU9qEG3nR8"]); $localFs = new ExtendedFilesystem(new ExtendedLocal('/tmp')); $s3Fs = new ExtendedFilesystem(new ExtendedAwsS3Adapter(new S3Client($awsConfig), "minhao-dev", "/tmp")); $importer = new RedshiftImporter($rs, $localFs, $s3Fs, 'ap-northeast-1', $sts); $exporter = new RedshiftExporter($rs, $localFs, $s3Fs, 'ap-northeast-1', $sts); $columns = explode(",", "a1,a2,a3,a4,a5,a6,a7"); $dataPath = 'data'; $localFs->put($dataPath, ''); $drd_os = $localFs->appendStream($dataPath); $writer = new DrdStreamWriter($drd_os, $columns); for ($i = 0; $i < 10; ++$i) { $data = []; for ($j = 0; $j < 7; ++$j) { $data['a' . ($j + 1)] = mt_rand(1, 10) + $j * 10; } $writer->writeRecord($data);