/** * @param Request $request * @return mixed */ public function download(RequestInterface $request) { $backoffTry = 0; $response = null; do { if ($backoffTry > 0) { sleep(pow(2, $backoffTry)); } // TODO refresh request may come here try { $response = $this->client->__soapCall($request->getFunction(), $request->getParams(), $request->getOptions(), $request->getInputHeader(), $outputHeaders); } catch (\SoapFault $e) { $backoffTry++; $errData = array("code" => $e->getCode(), "message" => $e->getMessage(), "faultcode" => isset($e->faultcode) ? $e->faultcode : null, "faultstring" => isset($e->faultstring) ? $e->faultstring : null, "detail" => isset($e->detail) ? (array) $e->detail : null); // Do not retry if max. retry count is reached OR the error isn't on server(TODO?): || $errData["faultcode"] == "SOAP-ENV:Client" if ($backoffTry >= $this->backoffTryCount) { $e = new UserException("Soap call failed:" . $e->getCode() . ": " . $e->getMessage(), 400, $e); $e->setData($errData); throw $e; } else { Logger::log("debug", "Soap call error, retrying:" . $e->getCode() . ": " . $e->getMessage(), $errData); } } } while ($response === null); return $response; }
public function testProcessNoData() { $logHandler = new \Monolog\Handler\TestHandler(); $logger = new \Monolog\Logger('test', [$logHandler]); Logger::setLogger($logger); $parser = new Json(Parser::create($logger)); $parser->process([], 'empty'); self::assertTrue($logHandler->hasDebug("No data returned in 'empty'")); }
/** * Parse the data * @param array $data shall be the response body * @param string $type data type */ public function process(array $data, $type, $parentId = null) { try { $this->parser->process($data, $type, $parentId); } catch (NoDataException $e) { Logger::log('debug', "No data returned in '{$type}'"); } catch (JsonParserException $e) { throw new UserException("Error parsing response JSON: " . $e->getMessage(), 500, $e, $e->getData()); } }
/** * Try to find the data array within $response. * * @param array|object $response * @param array $config * @return array * @todo support array of dataFields * - would return object with results, changing the class' API * - parse would just have to loop through if it returns an object * - and append $type with the dataField * @deprecated Use response module */ public function process($response, JobConfig $jobConfig) { $config = $jobConfig->getConfig(); // If dataField doesn't say where the data is in a response, try to find it! if (!empty($config['dataField'])) { if (is_array($config['dataField'])) { if (empty($config['dataField']['path'])) { throw new UserException("'dataField.path' must be set!"); } $path = $config['dataField']['path']; } elseif (is_scalar($config['dataField'])) { $path = $config['dataField']; } else { throw new UserException("'dataField' must be either a path string or an object with 'path' attribute."); } $data = Utils::getDataFromPath($path, $response, "."); if (empty($data)) { Logger::log('warning', "dataField '{$path}' contains no data!"); $data = []; } elseif (!is_array($data)) { // In case of a single object being returned $data = [$data]; } } elseif (is_array($response)) { // Simplest case, the response is just the dataset $data = $response; } elseif (is_object($response)) { // Find arrays in the response $arrays = []; foreach ($response as $key => $value) { if (is_array($value)) { $arrays[$key] = $value; } // TODO else {$this->metadata[$key] = json_encode($value);} ? return [$data,$metadata]; } $arrayNames = array_keys($arrays); if (count($arrays) == 1) { $data = $arrays[$arrayNames[0]]; } elseif (count($arrays) == 0) { Logger::log('warning', "No data array found in response! (endpoint: {$config['endpoint']})", ['response' => json_encode($response)]); $data = []; } else { $e = new UserException("More than one array found in response! Use 'dataField' parameter to specify a key to the data array. (endpoint: {$config['endpoint']}, arrays in response root: " . join(", ", $arrayNames) . ")"); $e->setData(['response' => json_encode($response), 'arrays found' => $arrayNames]); throw $e; } } else { $e = new UserException('Unknown response from API.'); $e->setData(['response' => json_encode($response)]); throw $e; } return $data; }
/** * No change to JSON parser structure should happen when nothing is parsed! */ public function testRunMetadataUpdate() { $logger = $this->getLogger('test', true); Logger::setLogger($logger); $meta = ['json_parser.struct' => ['tickets.via' => ['channel' => 'scalar', 'source' => 'object']], 'time' => ['previousStart' => 123]]; $cfg = new Config('testApp', 'testCfg', []); $api = Api::create(['baseUrl' => 'http://example.com'], $cfg); $ex = new GenericExtractor(new Temp()); $ex->setLogger($logger); $ex->setApi($api); $ex->setMetadata($meta); $ex->run($cfg); $after = $ex->getMetadata(); self::assertEquals($meta['json_parser.struct'], $after['json_parser.struct']); self::assertArrayHasKey('time', $after); }
/** * @param $table * @throws \Keboola\Juicer\Exception\ApplicationException */ public function write($table) { $tableName = $this->getTableName($table); $csv = new CsvFile($this->getSourceFileName($table)); $csv->next(); $header = $csv->current(); $processorFactory = new Processor($this->processorConfig[$tableName]); $processor = $processorFactory->getProcessor($header); $csv->next(); $eventsCnt = 0; while ($csv->current() != null) { $batch = []; for ($i = 0; $i < 1000 && $csv->current() != null; $i++) { $batch[] = $processor($csv->current()); $csv->next(); } $result = $this->client->addEvents([$tableName => $batch]); $eventsCnt += count($result[$tableName]); } Logger::log('info', sprintf('Created %s events.', $eventsCnt)); }
function write($sourceFilename, $outputTableName, $table) { $csv = new CsvFile($sourceFilename); // skip the header $csv->next(); $csv->next(); $columnsCount = count($csv->current()); $rowsPerInsert = intval(1000 / $columnsCount - 1); $this->db->beginTransaction(); while ($csv->current() !== false) { $sql = "INSERT INTO " . $this->escape($outputTableName) . " VALUES "; for ($i = 0; $i < $rowsPerInsert && $csv->current() !== false; $i++) { $sql .= sprintf("(%s),", implode(',', $this->encodeCsvRow($this->escapeCsvRow($csv->current()), $table['items']))); $csv->next(); } $sql = substr($sql, 0, -1); Logger::log('debug', sprintf("Executing query '%s'.", $sql)); $this->db->exec($sql); } $this->db->commit(); }
/** * {@inheritdoc} * Verify the latest response isn't identical as the last one * to prevent infinite loop on awkward pagination APIs */ public function run() { $this->buildParams($this->config); $parentId = $this->getParentId(); $request = $this->firstPage($this->config); while ($request !== false) { $response = $this->download($request); $responseHash = sha1(serialize($response)); if ($responseHash == $this->lastResponseHash) { Logger::log("DEBUG", sprintf("Job '%s' finished when last response matched the previous!", $this->getJobId())); $this->scroller->reset(); break; } else { $data = $this->runResponseModules($response, $this->config); $data = $this->filterResponse($this->config, $data); $this->parse($data, $parentId); $this->lastResponseHash = $responseHash; } $request = $this->nextPage($this->config, $response, $data); } }
public function run() { $writerFactory = new WriterFactory(); /** @var WriterInterface $writer */ $writer = $writerFactory->get($this->config['parameters']['db']); $uploaded = []; $tables = $this->config['parameters']['tables']; foreach ($tables as $table) { $ignoreExport = false; if (isset($options['table'])) { $ignoreExport = true; } if (!$writer->isTableValid($table, $ignoreExport)) { Logger::log('warning', sprintf("Table '%s' not exported", $table["tableId"])); continue; } $sourceTableId = $table['tableId']; $outputTableName = $table['dbName']; // @todo: select not ignored columns in csv somehow - during data preparation $sourceFilename = $this->dataDir . "/in/tables/" . $sourceTableId . ".csv"; try { if ($writer->isAsync()) { $writer->drop($outputTableName); $writer->create($table); // $writer->writeAsync($fileInfo, $outputTableName); } else { $writer->drop($outputTableName); $writer->create($table); $writer->write($sourceFilename, $outputTableName, $table); } } catch (\Exception $e) { throw new UserException($e->getMessage(), 400, $e); } $uploaded[] = $sourceTableId; } return ['status' => 'ok', 'uploaded' => $uploaded]; }
/** * Cannot use dataProvider because that gets set up before all tests * and the delay causes issues */ public function testCurlBackoff() { // mapped curl error $retries = 3; $handler = new \Monolog\Handler\TestHandler(); $logger = new \Monolog\Logger("test", [$handler]); Logger::setLogger($logger); $client = RestClient::create([], ['maxRetries' => $retries, 'curl' => ['codes' => [6]]]); try { $client->download(new RestRequest('http://keboolakeboolakeboola.com')); $this->fail("Request shoul fail"); } catch (\Exception $e) { $this->assertCount($retries, $handler->getRecords()); foreach ($handler->getRecords() as $record) { $this->assertEquals(100, $record['level']); $this->assertRegExp('/retrying/ui', $record['message']); $this->assertRegExp('/curl error 6\\:/ui', $record['context']['message']); } $this->assertRegExp('/curl error 6\\:/ui', $e->getMessage()); $this->assertTrue($e instanceof \Keboola\Juicer\Exception\UserException); } // non-mapped curl error $retries = 3; $handler = new \Monolog\Handler\TestHandler(); $logger = new \Monolog\Logger("test", [$handler]); Logger::setLogger($logger); $client = RestClient::create([], ['maxRetries' => $retries, 'curl' => ['codes' => [77]]]); try { $client->download(new RestRequest('http://keboolakeboolakeboola.com')); $this->fail("Request shoul fail"); } catch (\Exception $e) { $this->assertCount(0, $handler->getRecords()); $this->assertRegExp('/curl error 6\\:/ui', $e->getMessage()); $this->assertTrue($e instanceof \Keboola\Juicer\Exception\UserException); } }
public function testMergeResults() { Logger::setLogger($this->getLogger('testMergeResults', true)); $configFirst = JobConfig::create(['endpoint' => '1st', 'dataType' => 'first']); $configTags = JobConfig::create(['endpoint' => '2nd', 'dataType' => 'tags']); $config = new Config('ex', 'test', []); $config->setAttributes(['mappings' => ['first' => ['id' => ['type' => 'column', 'mapping' => ['destination' => 'item_id']], 'tags' => ['type' => 'table', 'destination' => 'tags', 'tableMapping' => ['user' => ['mapping' => ['destination' => 'user', 'primaryKey' => true]], 'tag' => ['mapping' => ['destination' => 'tag', 'primaryKey' => true]]], 'parentKey' => ['disable' => true]]], 'tags' => ['user' => ['mapping' => ['destination' => 'user', 'primaryKey' => true]], 'tag' => ['mapping' => ['destination' => 'tag', 'primaryKey' => true]]]]]); $firstData = json_decode('[ { "id": 1, "arr": [1,2,3] }, { "id": 2, "arr": ["a","b","c"], "tags": [ { "user": "******", "tag": "tag1" }, { "user": "******", "tag": "tag2" } ] } ]'); $secondData = json_decode('[ { "user": "******", "tag": "tag3" }, { "user": "******", "tag": "tag4" } ]'); $parser = JsonMap::create($config); $parser->process($firstData, $configFirst->getDataType()); $parser->process($secondData, $configTags->getDataType()); self::assertEquals(['"user","tag"' . PHP_EOL, '"asd","tag1"' . PHP_EOL, '"asd","tag2"' . PHP_EOL, '"asd","tag3"' . PHP_EOL, '"asd","tag4"' . PHP_EOL], file($parser->getResults()['tags'])); }
/** * Create expontential backoff for GuzzleClient * * options * - maxRetries: (integer) max retries count * - http * - retryHeader (string) header containing retry time header * - codes (array) list of status codes to retry on * - curl * - codes (array) list of error codes to retry on * * @param array $options * @return RetrySubscriber */ private static function createBackoff(array $options) { $headerName = isset($options['http']['retryHeader']) ? $options['http']['retryHeader'] : 'Retry-After'; $httpRetryCodes = isset($options['http']['codes']) ? $options['http']['codes'] : [500, 502, 503, 504, 408, 420, 429]; $maxRetries = isset($options['maxRetries']) ? (int) $options['maxRetries'] : 10; $curlRetryCodes = isset($options['curl']['codes']) ? $options['curl']['codes'] : [CURLE_OPERATION_TIMEOUTED, CURLE_COULDNT_RESOLVE_HOST, CURLE_COULDNT_CONNECT, CURLE_SSL_CONNECT_ERROR, CURLE_GOT_NOTHING]; return new RetrySubscriber(['filter' => RetrySubscriber::createChainFilter([RetrySubscriber::createStatusFilter($httpRetryCodes), RetrySubscriber::createCurlFilter($curlRetryCodes)]), 'max' => $maxRetries, 'delay' => function ($retries, AbstractTransferEvent $event) use($headerName) { $delay = self::getRetryDelay($retries, $event, $headerName); $errData = ["http_code" => !empty($event->getTransferInfo()['http_code']) ? $event->getTransferInfo()['http_code'] : null, "body" => is_null($event->getResponse()) ? null : (string) $event->getResponse()->getBody(), "url" => !empty($event->getTransferInfo()['url']) ? $event->getTransferInfo()['url'] : $event->getRequest()->getUrl()]; if ($event instanceof ErrorEvent) { $errData["message"] = $event->getException()->getMessage(); } Logger::log("DEBUG", "Http request failed, retrying in {$delay}s", $errData); // ms > s return 1000 * $delay; }]); }
public function setUp() { Logger::setStrict(false); }
public function onBefore(BeforeEvent $event) { Logger::log("DEBUG", (string) $event->getRequest()); }
<?php use Keboola\Juicer\Common\Logger; use Keboola\Juicer\Exception\ApplicationException; use Keboola\Juicer\Exception\UserException; use Keboola\MetricsWriter\Application; use Symfony\Component\Yaml\Yaml; require_once dirname(__FILE__) . "/bootstrap.php"; Logger::initLogger(APP_NAME); try { $arguments = getopt("d::", ["data::"]); if (!isset($arguments["data"])) { throw new UserException('Data folder not set.'); } $config = Yaml::parse(file_get_contents($arguments["data"] . "/config.yml")); $config['dataFolder'] = $arguments['data']; $app = new Application($config); $app->run(); } catch (UserException $e) { Logger::log('error', $e->getMessage(), (array) $e->getData()); exit(1); } catch (ApplicationException $e) { Logger::log('error', $e->getMessage(), (array) $e->getData()); exit($e->getCode() > 1 ? $e->getCode() : 2); } catch (\Exception $e) { Logger::log('error', $e->getMessage(), ['errFile' => $e->getFile(), 'errLine' => $e->getLine(), 'trace' => $e->getTrace()]); exit(2); } Logger::log('info', "Writer finished successfully."); exit(0);
function userError(UserException $e) { Logger::log('error', $e->getMessage(), (array) $e->getData()); exit(1); }
public function setUp() { Logger::initLogger('ex-generic_test'); }
public function run() { $temp = new Temp(APP_NAME); Logger::initLogger(APP_NAME); $arguments = getopt("d::", ["data::"]); if (!isset($arguments["data"])) { throw new UserException('Data folder not set.'); } $configuration = new Configuration($arguments['data'], APP_NAME, $temp); $configs = $configuration->getMultipleConfigs(); $metadata = $configuration->getConfigMetadata() ?: []; $metadata['time']['previousStart'] = empty($metadata['time']['previousStart']) ? 0 : $metadata['time']['previousStart']; $metadata['time']['currentStart'] = time(); $modules = $this->loadModules($configuration); $authorization = $configuration->getAuthorization(); $cacheStorage = $this->initCacheStorage($configuration); $results = []; foreach ($configs as $config) { // Reinitialize logger depending on debug status if ($config->getAttribute('debug')) { Logger::initLogger(APP_NAME, true); } else { Logger::initLogger(APP_NAME); } $api = $configuration->getApi($config, $authorization); if (!empty($config->getAttribute('outputBucket'))) { $outputBucket = $config->getAttribute('outputBucket'); } elseif (!empty($config->getConfigName())) { $outputBucket = 'ex-api-' . $api->getName() . "-" . $config->getConfigName(); } else { $outputBucket = "__kbc_default"; } $extractor = new GenericExtractor($temp); $extractor->setLogger(Logger::getLogger()); if ($cacheStorage) { $extractor->enableCache($cacheStorage); } if (!empty($results[$outputBucket])) { $extractor->setParser($results[$outputBucket]['parser']); } $extractor->setApi($api); $extractor->setMetadata($metadata); $extractor->setModules($modules); $extractor->run($config); $metadata = $extractor->getMetadata(); $results[$outputBucket]['parser'] = $extractor->getParser(); $results[$outputBucket]['incremental'] = $config->getAttribute('incrementalOutput'); } foreach ($results as $bucket => $result) { Logger::log('debug', "Processing results for {$bucket}."); $configuration->storeResults($result['parser']->getResults(), $bucket == "__kbc_default" ? null : $bucket, true, $result['incremental']); // move files and flatten file structure $folderFinder = new Finder(); $fs = new Filesystem(); $folders = $folderFinder->directories()->in($arguments['data'] . "/out/tables")->depth(0); foreach ($folders as $folder) { //$files = $finder->files()->in($folder->getPathname())->depth(0); $filesFinder = new Finder(); $files = $filesFinder->files()->in($folder->getPathname())->depth(0); foreach ($files as $file) { $destination = $arguments['data'] . "/out/tables/" . basename($folder->getPathname()) . "." . basename($file->getPathname()); // maybe move will be better? $fs->copy($file->getPathname(), $destination); $fs->remove($file); } } $fs->remove($folders); } $metadata['time']['previousStart'] = $metadata['time']['currentStart']; unset($metadata['time']['currentStart']); $configuration->saveConfigMetadata($metadata); }
/** * Create a child job with current client and parser * @param JobConfig $config * @return static */ protected function createChild(JobConfig $config, array $parentResults) { // Clone the config to prevent overwriting the placeholder(s) in endpoint $job = new static(clone $config, $this->client, $this->parser); $params = []; $placeholders = !empty($config->getConfig()['placeholders']) ? $config->getConfig()['placeholders'] : []; if (empty($placeholders)) { Logger::log("WARNING", "No 'placeholders' set for '" . $config->getConfig()['endpoint'] . "'"); } foreach ($placeholders as $placeholder => $field) { $params[$placeholder] = $this->getPlaceholder($placeholder, $field, $parentResults); } // Add parent params as well (for 'tagging' child-parent data) // Same placeholder in deeper nesting replaces parent value if (!empty($this->parentParams)) { $params = array_replace($this->parentParams, $params); } $job->setParams($params); $job->setParentResults($parentResults); return $job; }
protected function mergeResults(array $results, array $files) { foreach ($files as $name => $file) { if (array_key_exists($name, $results)) { Logger::log('debug', "Merging results for '{$name}'."); $existingHeader = $results[$name]->getHeader(); $newHeader = $file->getHeader(); if ($existingHeader !== $newHeader) { throw new UserException("Multiple results for '{$name}' table have different columns!", 0, null, ['differentColumns' => array_diff($existingHeader, $newHeader)]); } $this->mergeFiles($results[$name], $file); } else { $results[$name] = $file; } } return $results; }