Example #1
0
 /**
  * @param Request $request
  * @return mixed
  */
 public function download(RequestInterface $request)
 {
     $backoffTry = 0;
     $response = null;
     do {
         if ($backoffTry > 0) {
             sleep(pow(2, $backoffTry));
         }
         // TODO refresh request may come here
         try {
             $response = $this->client->__soapCall($request->getFunction(), $request->getParams(), $request->getOptions(), $request->getInputHeader(), $outputHeaders);
         } catch (\SoapFault $e) {
             $backoffTry++;
             $errData = array("code" => $e->getCode(), "message" => $e->getMessage(), "faultcode" => isset($e->faultcode) ? $e->faultcode : null, "faultstring" => isset($e->faultstring) ? $e->faultstring : null, "detail" => isset($e->detail) ? (array) $e->detail : null);
             // Do not retry if max. retry count is reached OR the error isn't on server(TODO?):  || $errData["faultcode"] == "SOAP-ENV:Client"
             if ($backoffTry >= $this->backoffTryCount) {
                 $e = new UserException("Soap call failed:" . $e->getCode() . ": " . $e->getMessage(), 400, $e);
                 $e->setData($errData);
                 throw $e;
             } else {
                 Logger::log("debug", "Soap call error, retrying:" . $e->getCode() . ": " . $e->getMessage(), $errData);
             }
         }
     } while ($response === null);
     return $response;
 }
Example #2
0
 public function testProcessNoData()
 {
     $logHandler = new \Monolog\Handler\TestHandler();
     $logger = new \Monolog\Logger('test', [$logHandler]);
     Logger::setLogger($logger);
     $parser = new Json(Parser::create($logger));
     $parser->process([], 'empty');
     self::assertTrue($logHandler->hasDebug("No data returned in 'empty'"));
 }
Example #3
0
 /**
  * Parse the data
  * @param array $data shall be the response body
  * @param string $type data type
  */
 public function process(array $data, $type, $parentId = null)
 {
     try {
         $this->parser->process($data, $type, $parentId);
     } catch (NoDataException $e) {
         Logger::log('debug', "No data returned in '{$type}'");
     } catch (JsonParserException $e) {
         throw new UserException("Error parsing response JSON: " . $e->getMessage(), 500, $e, $e->getData());
     }
 }
 /**
  * Try to find the data array within $response.
  *
  * @param array|object $response
  * @param array $config
  * @return array
  * @todo support array of dataFields
  *     - would return object with results, changing the class' API
  *     - parse would just have to loop through if it returns an object
  *     - and append $type with the dataField
  * @deprecated Use response module
  */
 public function process($response, JobConfig $jobConfig)
 {
     $config = $jobConfig->getConfig();
     // If dataField doesn't say where the data is in a response, try to find it!
     if (!empty($config['dataField'])) {
         if (is_array($config['dataField'])) {
             if (empty($config['dataField']['path'])) {
                 throw new UserException("'dataField.path' must be set!");
             }
             $path = $config['dataField']['path'];
         } elseif (is_scalar($config['dataField'])) {
             $path = $config['dataField'];
         } else {
             throw new UserException("'dataField' must be either a path string or an object with 'path' attribute.");
         }
         $data = Utils::getDataFromPath($path, $response, ".");
         if (empty($data)) {
             Logger::log('warning', "dataField '{$path}' contains no data!");
             $data = [];
         } elseif (!is_array($data)) {
             // In case of a single object being returned
             $data = [$data];
         }
     } elseif (is_array($response)) {
         // Simplest case, the response is just the dataset
         $data = $response;
     } elseif (is_object($response)) {
         // Find arrays in the response
         $arrays = [];
         foreach ($response as $key => $value) {
             if (is_array($value)) {
                 $arrays[$key] = $value;
             }
             // TODO else {$this->metadata[$key] = json_encode($value);} ? return [$data,$metadata];
         }
         $arrayNames = array_keys($arrays);
         if (count($arrays) == 1) {
             $data = $arrays[$arrayNames[0]];
         } elseif (count($arrays) == 0) {
             Logger::log('warning', "No data array found in response! (endpoint: {$config['endpoint']})", ['response' => json_encode($response)]);
             $data = [];
         } else {
             $e = new UserException("More than one array found in response! Use 'dataField' parameter to specify a key to the data array. (endpoint: {$config['endpoint']}, arrays in response root: " . join(", ", $arrayNames) . ")");
             $e->setData(['response' => json_encode($response), 'arrays found' => $arrayNames]);
             throw $e;
         }
     } else {
         $e = new UserException('Unknown response from API.');
         $e->setData(['response' => json_encode($response)]);
         throw $e;
     }
     return $data;
 }
 /**
  * No change to JSON parser structure should happen when nothing is parsed!
  */
 public function testRunMetadataUpdate()
 {
     $logger = $this->getLogger('test', true);
     Logger::setLogger($logger);
     $meta = ['json_parser.struct' => ['tickets.via' => ['channel' => 'scalar', 'source' => 'object']], 'time' => ['previousStart' => 123]];
     $cfg = new Config('testApp', 'testCfg', []);
     $api = Api::create(['baseUrl' => 'http://example.com'], $cfg);
     $ex = new GenericExtractor(new Temp());
     $ex->setLogger($logger);
     $ex->setApi($api);
     $ex->setMetadata($meta);
     $ex->run($cfg);
     $after = $ex->getMetadata();
     self::assertEquals($meta['json_parser.struct'], $after['json_parser.struct']);
     self::assertArrayHasKey('time', $after);
 }
Example #6
0
 /**
  * @param $table
  * @throws \Keboola\Juicer\Exception\ApplicationException
  */
 public function write($table)
 {
     $tableName = $this->getTableName($table);
     $csv = new CsvFile($this->getSourceFileName($table));
     $csv->next();
     $header = $csv->current();
     $processorFactory = new Processor($this->processorConfig[$tableName]);
     $processor = $processorFactory->getProcessor($header);
     $csv->next();
     $eventsCnt = 0;
     while ($csv->current() != null) {
         $batch = [];
         for ($i = 0; $i < 1000 && $csv->current() != null; $i++) {
             $batch[] = $processor($csv->current());
             $csv->next();
         }
         $result = $this->client->addEvents([$tableName => $batch]);
         $eventsCnt += count($result[$tableName]);
     }
     Logger::log('info', sprintf('Created %s events.', $eventsCnt));
 }
Example #7
0
 function write($sourceFilename, $outputTableName, $table)
 {
     $csv = new CsvFile($sourceFilename);
     // skip the header
     $csv->next();
     $csv->next();
     $columnsCount = count($csv->current());
     $rowsPerInsert = intval(1000 / $columnsCount - 1);
     $this->db->beginTransaction();
     while ($csv->current() !== false) {
         $sql = "INSERT INTO " . $this->escape($outputTableName) . " VALUES ";
         for ($i = 0; $i < $rowsPerInsert && $csv->current() !== false; $i++) {
             $sql .= sprintf("(%s),", implode(',', $this->encodeCsvRow($this->escapeCsvRow($csv->current()), $table['items'])));
             $csv->next();
         }
         $sql = substr($sql, 0, -1);
         Logger::log('debug', sprintf("Executing query '%s'.", $sql));
         $this->db->exec($sql);
     }
     $this->db->commit();
 }
 /**
  * {@inheritdoc}
  * Verify the latest response isn't identical as the last one
  * to prevent infinite loop on awkward pagination APIs
  */
 public function run()
 {
     $this->buildParams($this->config);
     $parentId = $this->getParentId();
     $request = $this->firstPage($this->config);
     while ($request !== false) {
         $response = $this->download($request);
         $responseHash = sha1(serialize($response));
         if ($responseHash == $this->lastResponseHash) {
             Logger::log("DEBUG", sprintf("Job '%s' finished when last response matched the previous!", $this->getJobId()));
             $this->scroller->reset();
             break;
         } else {
             $data = $this->runResponseModules($response, $this->config);
             $data = $this->filterResponse($this->config, $data);
             $this->parse($data, $parentId);
             $this->lastResponseHash = $responseHash;
         }
         $request = $this->nextPage($this->config, $response, $data);
     }
 }
Example #9
0
 public function run()
 {
     $writerFactory = new WriterFactory();
     /** @var WriterInterface $writer */
     $writer = $writerFactory->get($this->config['parameters']['db']);
     $uploaded = [];
     $tables = $this->config['parameters']['tables'];
     foreach ($tables as $table) {
         $ignoreExport = false;
         if (isset($options['table'])) {
             $ignoreExport = true;
         }
         if (!$writer->isTableValid($table, $ignoreExport)) {
             Logger::log('warning', sprintf("Table '%s' not exported", $table["tableId"]));
             continue;
         }
         $sourceTableId = $table['tableId'];
         $outputTableName = $table['dbName'];
         // @todo: select not ignored columns in csv somehow - during data preparation
         $sourceFilename = $this->dataDir . "/in/tables/" . $sourceTableId . ".csv";
         try {
             if ($writer->isAsync()) {
                 $writer->drop($outputTableName);
                 $writer->create($table);
                 //                $writer->writeAsync($fileInfo, $outputTableName);
             } else {
                 $writer->drop($outputTableName);
                 $writer->create($table);
                 $writer->write($sourceFilename, $outputTableName, $table);
             }
         } catch (\Exception $e) {
             throw new UserException($e->getMessage(), 400, $e);
         }
         $uploaded[] = $sourceTableId;
     }
     return ['status' => 'ok', 'uploaded' => $uploaded];
 }
Example #10
0
 /**
  * Cannot use dataProvider because that gets set up before all tests
  * and the delay causes issues
  */
 public function testCurlBackoff()
 {
     // mapped curl error
     $retries = 3;
     $handler = new \Monolog\Handler\TestHandler();
     $logger = new \Monolog\Logger("test", [$handler]);
     Logger::setLogger($logger);
     $client = RestClient::create([], ['maxRetries' => $retries, 'curl' => ['codes' => [6]]]);
     try {
         $client->download(new RestRequest('http://keboolakeboolakeboola.com'));
         $this->fail("Request shoul fail");
     } catch (\Exception $e) {
         $this->assertCount($retries, $handler->getRecords());
         foreach ($handler->getRecords() as $record) {
             $this->assertEquals(100, $record['level']);
             $this->assertRegExp('/retrying/ui', $record['message']);
             $this->assertRegExp('/curl error 6\\:/ui', $record['context']['message']);
         }
         $this->assertRegExp('/curl error 6\\:/ui', $e->getMessage());
         $this->assertTrue($e instanceof \Keboola\Juicer\Exception\UserException);
     }
     // non-mapped curl error
     $retries = 3;
     $handler = new \Monolog\Handler\TestHandler();
     $logger = new \Monolog\Logger("test", [$handler]);
     Logger::setLogger($logger);
     $client = RestClient::create([], ['maxRetries' => $retries, 'curl' => ['codes' => [77]]]);
     try {
         $client->download(new RestRequest('http://keboolakeboolakeboola.com'));
         $this->fail("Request shoul fail");
     } catch (\Exception $e) {
         $this->assertCount(0, $handler->getRecords());
         $this->assertRegExp('/curl error 6\\:/ui', $e->getMessage());
         $this->assertTrue($e instanceof \Keboola\Juicer\Exception\UserException);
     }
 }
Example #11
0
 public function testMergeResults()
 {
     Logger::setLogger($this->getLogger('testMergeResults', true));
     $configFirst = JobConfig::create(['endpoint' => '1st', 'dataType' => 'first']);
     $configTags = JobConfig::create(['endpoint' => '2nd', 'dataType' => 'tags']);
     $config = new Config('ex', 'test', []);
     $config->setAttributes(['mappings' => ['first' => ['id' => ['type' => 'column', 'mapping' => ['destination' => 'item_id']], 'tags' => ['type' => 'table', 'destination' => 'tags', 'tableMapping' => ['user' => ['mapping' => ['destination' => 'user', 'primaryKey' => true]], 'tag' => ['mapping' => ['destination' => 'tag', 'primaryKey' => true]]], 'parentKey' => ['disable' => true]]], 'tags' => ['user' => ['mapping' => ['destination' => 'user', 'primaryKey' => true]], 'tag' => ['mapping' => ['destination' => 'tag', 'primaryKey' => true]]]]]);
     $firstData = json_decode('[
         {
             "id": 1,
             "arr": [1,2,3]
         },
         {
             "id": 2,
             "arr": ["a","b","c"],
             "tags": [
                 {
                     "user": "asd",
                     "tag": "tag1"
                 },
                 {
                     "user": "asd",
                     "tag": "tag2"
                 }
             ]
         }
     ]');
     $secondData = json_decode('[
         {
             "user": "asd",
             "tag": "tag3"
         },
         {
             "user": "asd",
             "tag": "tag4"
         }
     ]');
     $parser = JsonMap::create($config);
     $parser->process($firstData, $configFirst->getDataType());
     $parser->process($secondData, $configTags->getDataType());
     self::assertEquals(['"user","tag"' . PHP_EOL, '"asd","tag1"' . PHP_EOL, '"asd","tag2"' . PHP_EOL, '"asd","tag3"' . PHP_EOL, '"asd","tag4"' . PHP_EOL], file($parser->getResults()['tags']));
 }
Example #12
0
 /**
  * Create expontential backoff for GuzzleClient
  *
  * options
  *  - maxRetries: (integer) max retries count
  *  - http
  *      - retryHeader (string) header containing retry time header
  *      - codes (array) list of status codes to retry on
  * - curl
  *      - codes (array) list of error codes to retry on
  *
  * @param array $options
  * @return RetrySubscriber
  */
 private static function createBackoff(array $options)
 {
     $headerName = isset($options['http']['retryHeader']) ? $options['http']['retryHeader'] : 'Retry-After';
     $httpRetryCodes = isset($options['http']['codes']) ? $options['http']['codes'] : [500, 502, 503, 504, 408, 420, 429];
     $maxRetries = isset($options['maxRetries']) ? (int) $options['maxRetries'] : 10;
     $curlRetryCodes = isset($options['curl']['codes']) ? $options['curl']['codes'] : [CURLE_OPERATION_TIMEOUTED, CURLE_COULDNT_RESOLVE_HOST, CURLE_COULDNT_CONNECT, CURLE_SSL_CONNECT_ERROR, CURLE_GOT_NOTHING];
     return new RetrySubscriber(['filter' => RetrySubscriber::createChainFilter([RetrySubscriber::createStatusFilter($httpRetryCodes), RetrySubscriber::createCurlFilter($curlRetryCodes)]), 'max' => $maxRetries, 'delay' => function ($retries, AbstractTransferEvent $event) use($headerName) {
         $delay = self::getRetryDelay($retries, $event, $headerName);
         $errData = ["http_code" => !empty($event->getTransferInfo()['http_code']) ? $event->getTransferInfo()['http_code'] : null, "body" => is_null($event->getResponse()) ? null : (string) $event->getResponse()->getBody(), "url" => !empty($event->getTransferInfo()['url']) ? $event->getTransferInfo()['url'] : $event->getRequest()->getUrl()];
         if ($event instanceof ErrorEvent) {
             $errData["message"] = $event->getException()->getMessage();
         }
         Logger::log("DEBUG", "Http request failed, retrying in {$delay}s", $errData);
         // ms > s
         return 1000 * $delay;
     }]);
 }
Example #13
0
 public function setUp()
 {
     Logger::setStrict(false);
 }
Example #14
0
 public function onBefore(BeforeEvent $event)
 {
     Logger::log("DEBUG", (string) $event->getRequest());
 }
Example #15
0
<?php

use Keboola\Juicer\Common\Logger;
use Keboola\Juicer\Exception\ApplicationException;
use Keboola\Juicer\Exception\UserException;
use Keboola\MetricsWriter\Application;
use Symfony\Component\Yaml\Yaml;
require_once dirname(__FILE__) . "/bootstrap.php";
Logger::initLogger(APP_NAME);
try {
    $arguments = getopt("d::", ["data::"]);
    if (!isset($arguments["data"])) {
        throw new UserException('Data folder not set.');
    }
    $config = Yaml::parse(file_get_contents($arguments["data"] . "/config.yml"));
    $config['dataFolder'] = $arguments['data'];
    $app = new Application($config);
    $app->run();
} catch (UserException $e) {
    Logger::log('error', $e->getMessage(), (array) $e->getData());
    exit(1);
} catch (ApplicationException $e) {
    Logger::log('error', $e->getMessage(), (array) $e->getData());
    exit($e->getCode() > 1 ? $e->getCode() : 2);
} catch (\Exception $e) {
    Logger::log('error', $e->getMessage(), ['errFile' => $e->getFile(), 'errLine' => $e->getLine(), 'trace' => $e->getTrace()]);
    exit(2);
}
Logger::log('info', "Writer finished successfully.");
exit(0);
Example #16
0
function userError(UserException $e)
{
    Logger::log('error', $e->getMessage(), (array) $e->getData());
    exit(1);
}
 public function setUp()
 {
     Logger::initLogger('ex-generic_test');
 }
Example #18
0
 public function run()
 {
     $temp = new Temp(APP_NAME);
     Logger::initLogger(APP_NAME);
     $arguments = getopt("d::", ["data::"]);
     if (!isset($arguments["data"])) {
         throw new UserException('Data folder not set.');
     }
     $configuration = new Configuration($arguments['data'], APP_NAME, $temp);
     $configs = $configuration->getMultipleConfigs();
     $metadata = $configuration->getConfigMetadata() ?: [];
     $metadata['time']['previousStart'] = empty($metadata['time']['previousStart']) ? 0 : $metadata['time']['previousStart'];
     $metadata['time']['currentStart'] = time();
     $modules = $this->loadModules($configuration);
     $authorization = $configuration->getAuthorization();
     $cacheStorage = $this->initCacheStorage($configuration);
     $results = [];
     foreach ($configs as $config) {
         // Reinitialize logger depending on debug status
         if ($config->getAttribute('debug')) {
             Logger::initLogger(APP_NAME, true);
         } else {
             Logger::initLogger(APP_NAME);
         }
         $api = $configuration->getApi($config, $authorization);
         if (!empty($config->getAttribute('outputBucket'))) {
             $outputBucket = $config->getAttribute('outputBucket');
         } elseif (!empty($config->getConfigName())) {
             $outputBucket = 'ex-api-' . $api->getName() . "-" . $config->getConfigName();
         } else {
             $outputBucket = "__kbc_default";
         }
         $extractor = new GenericExtractor($temp);
         $extractor->setLogger(Logger::getLogger());
         if ($cacheStorage) {
             $extractor->enableCache($cacheStorage);
         }
         if (!empty($results[$outputBucket])) {
             $extractor->setParser($results[$outputBucket]['parser']);
         }
         $extractor->setApi($api);
         $extractor->setMetadata($metadata);
         $extractor->setModules($modules);
         $extractor->run($config);
         $metadata = $extractor->getMetadata();
         $results[$outputBucket]['parser'] = $extractor->getParser();
         $results[$outputBucket]['incremental'] = $config->getAttribute('incrementalOutput');
     }
     foreach ($results as $bucket => $result) {
         Logger::log('debug', "Processing results for {$bucket}.");
         $configuration->storeResults($result['parser']->getResults(), $bucket == "__kbc_default" ? null : $bucket, true, $result['incremental']);
         // move files and flatten file structure
         $folderFinder = new Finder();
         $fs = new Filesystem();
         $folders = $folderFinder->directories()->in($arguments['data'] . "/out/tables")->depth(0);
         foreach ($folders as $folder) {
             //$files = $finder->files()->in($folder->getPathname())->depth(0);
             $filesFinder = new Finder();
             $files = $filesFinder->files()->in($folder->getPathname())->depth(0);
             foreach ($files as $file) {
                 $destination = $arguments['data'] . "/out/tables/" . basename($folder->getPathname()) . "." . basename($file->getPathname());
                 // maybe move will be better?
                 $fs->copy($file->getPathname(), $destination);
                 $fs->remove($file);
             }
         }
         $fs->remove($folders);
     }
     $metadata['time']['previousStart'] = $metadata['time']['currentStart'];
     unset($metadata['time']['currentStart']);
     $configuration->saveConfigMetadata($metadata);
 }
Example #19
0
 /**
  * Create a child job with current client and parser
  * @param JobConfig $config
  * @return static
  */
 protected function createChild(JobConfig $config, array $parentResults)
 {
     // Clone the config to prevent overwriting the placeholder(s) in endpoint
     $job = new static(clone $config, $this->client, $this->parser);
     $params = [];
     $placeholders = !empty($config->getConfig()['placeholders']) ? $config->getConfig()['placeholders'] : [];
     if (empty($placeholders)) {
         Logger::log("WARNING", "No 'placeholders' set for '" . $config->getConfig()['endpoint'] . "'");
     }
     foreach ($placeholders as $placeholder => $field) {
         $params[$placeholder] = $this->getPlaceholder($placeholder, $field, $parentResults);
     }
     // Add parent params as well (for 'tagging' child-parent data)
     // Same placeholder in deeper nesting replaces parent value
     if (!empty($this->parentParams)) {
         $params = array_replace($this->parentParams, $params);
     }
     $job->setParams($params);
     $job->setParentResults($parentResults);
     return $job;
 }
Example #20
0
 protected function mergeResults(array $results, array $files)
 {
     foreach ($files as $name => $file) {
         if (array_key_exists($name, $results)) {
             Logger::log('debug', "Merging results for '{$name}'.");
             $existingHeader = $results[$name]->getHeader();
             $newHeader = $file->getHeader();
             if ($existingHeader !== $newHeader) {
                 throw new UserException("Multiple results for '{$name}' table have different columns!", 0, null, ['differentColumns' => array_diff($existingHeader, $newHeader)]);
             }
             $this->mergeFiles($results[$name], $file);
         } else {
             $results[$name] = $file;
         }
     }
     return $results;
 }