public function exportToFile($path, $query, $gzip = false, $isParallel = true, $overwriteExistingFiles = false) { $path = ltrim(preg_replace('#/+#', "/", $path), "/"); $s3path = $this->s3Fs->getRealpath($path); mdebug("export file to temp s3, prefix = %s", $s3path); $suffix = $gzip ? "\\.gz" : ""; $clearPattern = "#^" . preg_quote($path, "#") . "#"; $downloadPattern = "#^" . preg_quote($path, "#") . "([0-9]{2,})?(_part_[0-9]{2,})?{$suffix}\$#"; mdebug("Clear pattern for %s is %s", $path, $clearPattern); mdebug("Download pattern for %s is %s", $path, $downloadPattern); // clear remote path try { $finder = $this->s3Fs->getFinder(); $finder->path($clearPattern); if ($finder->count() > 0) { if ($overwriteExistingFiles) { foreach ($finder as $splFileInfo) { $this->s3Fs->delete($splFileInfo->getRelativePathname()); } } else { throw new \RuntimeException(sprintf("The path is not empty on remote end, path = %s", $path)); } } } catch (\InvalidArgumentException $e) { if (strpos($e->getMessage(), "directory does not exist") === false) { throw $e; } } // clear local path $finder = $this->localFs->getFinder(); $finder->path($clearPattern); if ($finder->count() > 0) { if ($overwriteExistingFiles) { foreach ($finder as $splFileInfo) { $this->localFs->delete($splFileInfo->getRelativePathname()); } } else { throw new \RuntimeException(sprintf("The path is not empty locally, path = %s", $path)); } } $tempCredential = $this->sts->getTemporaryCredential(); $this->connection->unloadToS3($query, $s3path, $tempCredential, true, $gzip, $isParallel); $finder = $this->s3Fs->getFinder(); $finder->path($downloadPattern); foreach ($finder as $splFileInfo) { //var_dump($splFileInfo->getRelativePathname()); $partName = $splFileInfo->getRelativePathname(); $fh = $this->s3Fs->readStream($partName); $this->localFs->putStream($partName, $fh); fclose($fh); $this->s3Fs->delete($partName); } }
public function importFromFile($path, $table, $columns, $gzip = false, $overwriteS3Files = false) { $timestamp = microtime(true) . getmypid(); $path = ltrim(preg_replace('#/+#', "/", $path), "/"); $suffix = $gzip ? "\\.gz" : ""; $uploadPattern = "#^" . preg_quote($path, "#") . "([0-9]{2,})?(_part_[0-9]{2,})?{$suffix}\$#"; $clearPattern = "#^" . preg_quote($path, "#") . ".*/" . $timestamp . "\$#"; mdebug("Upload pattern is %s", $uploadPattern); mdebug("Clear pattern is %s", $clearPattern); $localFinder = $this->localFs->getFinder(); $localFinder->path($uploadPattern); if ($localFinder->count() == 0) { throw new \RuntimeException(sprintf("No import files found at path: %s, pattern = %s", $path, $uploadPattern)); } try { $s3Finder = $this->s3Fs->getFinder(); $s3Finder->path($clearPattern); if ($s3Finder->count() > 0) { if ($overwriteS3Files) { foreach ($s3Finder as $splFileInfo) { $this->s3Fs->delete($splFileInfo->getRelativePathname()); } } else { throw new \RuntimeException(sprintf("The path is not empty on remote end, path = %s", $path)); } } } catch (\InvalidArgumentException $e) { if (strpos($e->getMessage(), "directory does not exist") === false) { throw $e; } } $uploaded = []; foreach ($localFinder as $splFileInfo) { $relativePathname = $splFileInfo->getRelativePathname(); $remoteName = $relativePathname . "/" . $timestamp; $fh = $this->localFs->readStream($relativePathname); // IMPORTANT: use write stream so that s3fs doesn't check for key exisistence, which in turn would bread the strong consistency of s3 $this->s3Fs->writeStream($remoteName, $fh); fclose($fh); $uploaded[] = $remoteName; mdebug("Uploaded %s to %s", $relativePathname, $remoteName); } //$inStream = $this->localFs->readStream($path); //$this->s3Fs->putStream($path, $inStream); $s3path = $this->s3Fs->getRealpath($path); mdebug("uploaded file to temp s3, path = %s", $s3path); $tempCredential = $this->sts->getTemporaryCredential(); $this->connection->copyFromS3($table, $columns, $s3path, $this->s3Region, $tempCredential, true, $gzip); foreach ($uploaded as $relativePathname) { $this->s3Fs->delete($relativePathname); } }
public function testDataImportExportWithGzipAndParallel() { $exportPrefix = "redshift_ut_" . time(); $this->testDataImport(true); $exporter = new RedshiftExporter(self::$rs, self::$localFs, self::$s3Fs, self::$s3Region, self::$sts); $exporter->exportToFile($exportPrefix, "SELECT * FROM php_redshift_test", true, true, true); $exportedCount = 0; $finder = self::$localFs->getFinder(); $finder->path("#^" . preg_quote($exportPrefix, "#") . "#"); $unloaded = []; foreach ($finder as $splFileInfo) { $relativePathname = $splFileInfo->getRelativePathname(); $unloaded[] = $relativePathname; $content = self::$localFs->read($relativePathname); mdebug(gzdecode($content)); $fh = fopen('php://memory', 'r+'); fwrite($fh, gzdecode($content)); rewind($fh); $reader = new DrdStreamReader($fh, self::FIELDS); while ($reader->readRecord()) { $exportedCount++; } fclose($fh); } self::assertEquals(5, $exportedCount); // test import of parallel data $importer = new RedshiftImporter(self::$rs, self::$localFs, self::$s3Fs, self::$s3Region, self::$sts); $importer->importFromFile($exportPrefix, 'php_redshift_test', self::FIELDS, true, true); $stmt = self::$rs->prepare("SELECT COUNT(*) FROM php_redshift_test"); $stmt->execute(); $result = $stmt->fetchColumn(); self::assertEquals(10, $result); foreach ($unloaded as $relativePathname) { self::$localFs->delete($relativePathname); } }
/** * @depends testAppendStreamOnNewFile * * @param ExtendedFilesystem $fs * * @return ExtendedFilesystem */ public function testFinder(ExtendedFilesystem $fs) { $fs->put('a/b/c.txt', 'aaa'); $fs->put('a/b/d.txt', 'aaa'); $fs->put('a/b/d.jpg', 'aaa'); $fs->put('a/b/x.txt', 'aaa'); $fs->put('a/c/e.txt', 'aaa'); $finder = $fs->getFinder('a'); $finder->path('#b/[cd]\\.txt#'); $this->assertEquals(count($finder), 2); }