Ejemplo n.º 1
0
 public function exportToFile($path, $query, $gzip = false, $isParallel = true, $overwriteExistingFiles = false)
 {
     $path = ltrim(preg_replace('#/+#', "/", $path), "/");
     $s3path = $this->s3Fs->getRealpath($path);
     mdebug("export file to temp s3, prefix = %s", $s3path);
     $suffix = $gzip ? "\\.gz" : "";
     $clearPattern = "#^" . preg_quote($path, "#") . "#";
     $downloadPattern = "#^" . preg_quote($path, "#") . "([0-9]{2,})?(_part_[0-9]{2,})?{$suffix}\$#";
     mdebug("Clear pattern for %s is %s", $path, $clearPattern);
     mdebug("Download pattern for %s is %s", $path, $downloadPattern);
     // clear remote path
     try {
         $finder = $this->s3Fs->getFinder();
         $finder->path($clearPattern);
         if ($finder->count() > 0) {
             if ($overwriteExistingFiles) {
                 foreach ($finder as $splFileInfo) {
                     $this->s3Fs->delete($splFileInfo->getRelativePathname());
                 }
             } else {
                 throw new \RuntimeException(sprintf("The path is not empty on remote end, path = %s", $path));
             }
         }
     } catch (\InvalidArgumentException $e) {
         if (strpos($e->getMessage(), "directory does not exist") === false) {
             throw $e;
         }
     }
     // clear local path
     $finder = $this->localFs->getFinder();
     $finder->path($clearPattern);
     if ($finder->count() > 0) {
         if ($overwriteExistingFiles) {
             foreach ($finder as $splFileInfo) {
                 $this->localFs->delete($splFileInfo->getRelativePathname());
             }
         } else {
             throw new \RuntimeException(sprintf("The path is not empty locally, path = %s", $path));
         }
     }
     $tempCredential = $this->sts->getTemporaryCredential();
     $this->connection->unloadToS3($query, $s3path, $tempCredential, true, $gzip, $isParallel);
     $finder = $this->s3Fs->getFinder();
     $finder->path($downloadPattern);
     foreach ($finder as $splFileInfo) {
         //var_dump($splFileInfo->getRelativePathname());
         $partName = $splFileInfo->getRelativePathname();
         $fh = $this->s3Fs->readStream($partName);
         $this->localFs->putStream($partName, $fh);
         fclose($fh);
         $this->s3Fs->delete($partName);
     }
 }
Ejemplo n.º 2
0
 public function importFromFile($path, $table, $columns, $gzip = false, $overwriteS3Files = false)
 {
     $timestamp = microtime(true) . getmypid();
     $path = ltrim(preg_replace('#/+#', "/", $path), "/");
     $suffix = $gzip ? "\\.gz" : "";
     $uploadPattern = "#^" . preg_quote($path, "#") . "([0-9]{2,})?(_part_[0-9]{2,})?{$suffix}\$#";
     $clearPattern = "#^" . preg_quote($path, "#") . ".*/" . $timestamp . "\$#";
     mdebug("Upload pattern is %s", $uploadPattern);
     mdebug("Clear pattern is %s", $clearPattern);
     $localFinder = $this->localFs->getFinder();
     $localFinder->path($uploadPattern);
     if ($localFinder->count() == 0) {
         throw new \RuntimeException(sprintf("No import files found at path: %s, pattern = %s", $path, $uploadPattern));
     }
     try {
         $s3Finder = $this->s3Fs->getFinder();
         $s3Finder->path($clearPattern);
         if ($s3Finder->count() > 0) {
             if ($overwriteS3Files) {
                 foreach ($s3Finder as $splFileInfo) {
                     $this->s3Fs->delete($splFileInfo->getRelativePathname());
                 }
             } else {
                 throw new \RuntimeException(sprintf("The path is not empty on remote end, path = %s", $path));
             }
         }
     } catch (\InvalidArgumentException $e) {
         if (strpos($e->getMessage(), "directory does not exist") === false) {
             throw $e;
         }
     }
     $uploaded = [];
     foreach ($localFinder as $splFileInfo) {
         $relativePathname = $splFileInfo->getRelativePathname();
         $remoteName = $relativePathname . "/" . $timestamp;
         $fh = $this->localFs->readStream($relativePathname);
         // IMPORTANT: use write stream so that s3fs doesn't check for key exisistence, which in turn would bread the strong consistency of s3
         $this->s3Fs->writeStream($remoteName, $fh);
         fclose($fh);
         $uploaded[] = $remoteName;
         mdebug("Uploaded %s to %s", $relativePathname, $remoteName);
     }
     //$inStream = $this->localFs->readStream($path);
     //$this->s3Fs->putStream($path, $inStream);
     $s3path = $this->s3Fs->getRealpath($path);
     mdebug("uploaded file to temp s3, path = %s", $s3path);
     $tempCredential = $this->sts->getTemporaryCredential();
     $this->connection->copyFromS3($table, $columns, $s3path, $this->s3Region, $tempCredential, true, $gzip);
     foreach ($uploaded as $relativePathname) {
         $this->s3Fs->delete($relativePathname);
     }
 }
 public function testDataImportExportWithGzipAndParallel()
 {
     $exportPrefix = "redshift_ut_" . time();
     $this->testDataImport(true);
     $exporter = new RedshiftExporter(self::$rs, self::$localFs, self::$s3Fs, self::$s3Region, self::$sts);
     $exporter->exportToFile($exportPrefix, "SELECT * FROM php_redshift_test", true, true, true);
     $exportedCount = 0;
     $finder = self::$localFs->getFinder();
     $finder->path("#^" . preg_quote($exportPrefix, "#") . "#");
     $unloaded = [];
     foreach ($finder as $splFileInfo) {
         $relativePathname = $splFileInfo->getRelativePathname();
         $unloaded[] = $relativePathname;
         $content = self::$localFs->read($relativePathname);
         mdebug(gzdecode($content));
         $fh = fopen('php://memory', 'r+');
         fwrite($fh, gzdecode($content));
         rewind($fh);
         $reader = new DrdStreamReader($fh, self::FIELDS);
         while ($reader->readRecord()) {
             $exportedCount++;
         }
         fclose($fh);
     }
     self::assertEquals(5, $exportedCount);
     // test import of parallel data
     $importer = new RedshiftImporter(self::$rs, self::$localFs, self::$s3Fs, self::$s3Region, self::$sts);
     $importer->importFromFile($exportPrefix, 'php_redshift_test', self::FIELDS, true, true);
     $stmt = self::$rs->prepare("SELECT COUNT(*) FROM php_redshift_test");
     $stmt->execute();
     $result = $stmt->fetchColumn();
     self::assertEquals(10, $result);
     foreach ($unloaded as $relativePathname) {
         self::$localFs->delete($relativePathname);
     }
 }
Ejemplo n.º 4
0
 * Date: 2016-01-11
 * Time: 17:52
 */
require_once 'vendor/autoload.php';
use Oasis\Mlib\AwsWrappers\S3Client;
use Oasis\Mlib\AwsWrappers\StsClient;
use Oasis\Mlib\FlysystemWrappers\ExtendedAwsS3Adapter;
use Oasis\Mlib\FlysystemWrappers\ExtendedFilesystem;
use Oasis\Mlib\FlysystemWrappers\ExtendedLocal;
use Oasis\Mlib\Redshift\DrdStreamWriter;
use Oasis\Mlib\Redshift\RedshiftConnection;
use Oasis\Mlib\Redshift\RedshiftExporter;
use Oasis\Mlib\Redshift\RedshiftImporter;
$awsConfig = ['profile' => "oasis-minhao", 'region' => 'ap-northeast-1'];
$sts = new StsClient(['profile' => 'oasis-minhao', 'region' => 'ap-northeast-1']);
$rs = RedshiftConnection::getConnection(["host" => "oas-dmp-test.cikskyn4dlgm.ap-northeast-1.redshift.amazonaws.com", "port" => 5439, "dbname" => "oasdmp", "user" => "oasdmp", "password" => "NU9qEG3nR8"]);
$localFs = new ExtendedFilesystem(new ExtendedLocal('/tmp'));
$s3Fs = new ExtendedFilesystem(new ExtendedAwsS3Adapter(new S3Client($awsConfig), "minhao-dev", "/tmp"));
$importer = new RedshiftImporter($rs, $localFs, $s3Fs, 'ap-northeast-1', $sts);
$exporter = new RedshiftExporter($rs, $localFs, $s3Fs, 'ap-northeast-1', $sts);
$columns = explode(",", "a1,a2,a3,a4,a5,a6,a7");
$dataPath = 'data';
$localFs->put($dataPath, '');
$drd_os = $localFs->appendStream($dataPath);
$writer = new DrdStreamWriter($drd_os, $columns);
for ($i = 0; $i < 10; ++$i) {
    $data = [];
    for ($j = 0; $j < 7; ++$j) {
        $data['a' . ($j + 1)] = mt_rand(1, 10) + $j * 10;
    }
    $writer->writeRecord($data);