/** * @param Task $task * @param Datasource $testingDatasource * @return ScoringResult * @throws \Exception */ public function evaluateTask(Task $task, Datasource $testingDatasource) { #region sestavení PMML a následné vytvoření scoreru $pmml = $this->prepareTaskPmml($task); $url = $this->serverUrl . '/scorer'; try { $response = self::curlRequestResponse($url, $pmml, '', ['Content-Type' => 'application/xml; charset=utf-8']); $response = Json::decode($response, Json::FORCE_ARRAY); if (@$response['code'] == 201 && !empty($response['id'])) { $scorerId = $response['id']; } else { throw new \Exception(@$response['description']); } } catch (\Exception $e) { throw new \Exception('Scorer creation failed!', 500, $e); } #endregion sestavení PMML a následné vytvoření scoreru #region postupné posílání řádků z testovací DB tabulky $database = $this->databaseFactory->getDatabaseInstance($testingDatasource->getDbConnection(), $task->miner->user); $dbDatasource = $database->getDbDatasource($testingDatasource->dbDatasourceId > 0 ? $testingDatasource->dbDatasourceId : $testingDatasource->dbTable); $dbRowsCount = $dbDatasource->size; $testedRowsCount = 0; /** @var ScoringResult[] $partialResults */ $partialResults = []; $url .= '/' . $scorerId; //export jednotlivých řádků z DB a jejich otestování while ($testedRowsCount < $dbRowsCount) { //připravení JSONu a jeho odeslání $dbValuesRows = $database->getDbValuesRows($dbDatasource, $testedRowsCount, self::ROWS_PER_TEST); $json = Json::encode($dbValuesRows->getRowsAsArray()); $response = self::curlRequestResponse($url, $json, '', ['Content-Type' => 'application/json; charset=utf-8']); $response = Json::decode($response, Json::FORCE_ARRAY); if ($response["code"] != 200) { throw new \Exception('Invalid scorer response!'); } //vytvoření objektu s výsledky $scoringResult = new EasyMinerScoringResult($response); $partialResult = $scoringResult->getScoringConfusionMatrix()->getScoringResult(true); $partialResults[] = $partialResult; //TODO tady bude v budoucnu možné doplnit zpracování celé kontingenční tabulky //připočtení řádků a uvolnění paměti unset($scoringResult); $testedRowsCount += self::ROWS_PER_TEST; } #endregion postupné posílání řádků z testovací DB tabulky #region sestavení celkového výsledku a jeho vrácení return ScoringResult::merge($partialResults); #endregion sestavení celkového výsledku a jeho vrácení }
/** * @param Task $task * @param Datasource $testingDatasource * @return ScoringResult */ public function evaluateTask(Task $task, Datasource $testingDatasource) { $rulesXmlFileName = $task->taskId . '.xml'; /** @var string $rulesXmlFilePath - cesta souboru s pravidly v XML */ $rulesXmlFilePath = @$this->params['tempDirectory'] . '/' . $rulesXmlFileName; $associationRulesXmlSerializer = new AssociationRulesXmlSerializer($task->rules); $rulesXml = $associationRulesXmlSerializer->getXml()->asXML(); file_put_contents($rulesXmlFilePath, $rulesXml); $database = $this->databaseFactory->getDatabaseInstance($testingDatasource->getDbConnection(), $task->miner->user); $dbDatasource = $database->getDbDatasource($testingDatasource->dbDatasourceId > 0 ? $testingDatasource->dbDatasourceId : $testingDatasource->dbTable); $dbRowsCount = $dbDatasource->size; $testedRowsCount = 0; /** @var ScoringResult[] $partialResults */ $partialResults = []; //export jednotlivých řádků z DB a jejich otestování while ($testedRowsCount < $dbRowsCount) { $csv = CsvSerializer::prepareCsvFromDatabase($database, $dbDatasource, $testedRowsCount, self::ROWS_PER_TEST, ';', '"'); $csvFileName = $testingDatasource->datasourceId . '-' . $testedRowsCount . '-' . self::ROWS_PER_TEST . '.csv'; /** @var string $csvFilePath - cesta k CSV souboru s částí dat */ $csvFilePath = @$this->params['tempDirectory'] . '/' . $csvFileName; file_put_contents($csvFilePath, $csv); $url = $this->serverUrl . '?rulesXml=' . $this->getTempFileUrl($rulesXmlFileName) . '&dataCsv=' . $this->getTempFileUrl($csvFileName); //try{ $response = self::curlRequestResponse($url); $xml = simplexml_load_string($response); $partialResult = new ScoringResult(); $partialResult->truePositive = (string) $xml->truePositive; $partialResult->falsePositive = (string) $xml->falsePositive; $partialResult->rowsCount = (string) $xml->rowsCount; $partialResults[] = $partialResult; unset($xml); //}catch (\Exception $e){ // /*ignore error...*/ //} unlink($csvFilePath); $testedRowsCount += self::ROWS_PER_TEST; } //XXX unlink($rulesXmlFilePath); //sestavení celkového výsledku return ScoringResult::merge($partialResults); }
/** * Funkce pro aktualizaci info o datových sloupcích v DB * @param Datasource &$datasource * @param User $user */ public function updateDatasourceColumns(Datasource &$datasource, User $user) { $database = $this->databaseFactory->getDatabaseInstance($datasource->getDbConnection(), $user); $dbDatasource = $database->getDbDatasource($datasource->dbDatasourceId ? $datasource->dbDatasourceId : $datasource->name); if ($datasource->size != $dbDatasource->size) { $datasource->size = $dbDatasource->size; } if ($dbDatasource->name != $datasource->name) { $datasource->name = $dbDatasource->name; } if ($datasource->isModified()) { $this->saveDatasource($datasource); } $dbFields = $database->getDbFields($dbDatasource); #region připravení seznamu aktuálně existujících datasourceColumns /** @var DatasourceColumn[] $existingDatasourceColumnsByDbDatasourceFieldId */ $existingDatasourceColumnsByDbDatasourceFieldId = []; /** @var DatasourceColumn[] $existingDatasourceColumnsByName */ $existingDatasourceColumnsByName = []; /** @var DatasourceColumn[] $datasourceColumns */ $datasourceColumns = $datasource->datasourceColumns; if (!empty($datasourceColumns)) { foreach ($datasourceColumns as &$datasourceColumn) { if (!empty($datasourceColumn->dbDatasourceFieldId)) { $existingDatasourceColumnsByDbDatasourceFieldId[$datasourceColumn->dbDatasourceFieldId] = $datasourceColumn; } else { $existingDatasourceColumnsByName[$datasourceColumn->name] = $datasourceColumn; } } } #endregion #region aktualizace seznamu sloupců získaných z DB if (!empty($dbFields)) { foreach ($dbFields as $dbField) { if (!empty($dbField->id) && is_int($dbField->id) && isset($existingDatasourceColumnsByDbDatasourceFieldId[$dbField->id])) { //sloupec s daným ID již je v databázi $datasourceColumn = $existingDatasourceColumnsByDbDatasourceFieldId[$dbField->id]; $modified = false; if ($datasourceColumn->name != $dbField->name) { $datasourceColumn->name = $dbField->name; $modified = true; } if ($datasourceColumn->type != $dbField->type) { $datasourceColumn->type = $dbField->type; $modified = true; } if (!$datasourceColumn->active) { $datasourceColumn->active = true; $modified = true; } if ($modified) { $this->datasourceColumnsRepository->persist($datasourceColumn); } unset($existingDatasourceColumnsByDbDatasourceFieldId[$dbField->id]); } elseif (!empty($dbField->name) && isset($existingDatasourceColumnsByName[$dbField->name])) { //sloupec najdeme podle jména $datasourceColumn = $existingDatasourceColumnsByName[$dbField->name]; $modified = false; if ($datasourceColumn->type != $dbField->type) { $datasourceColumn->type = $dbField->type; $modified = true; } if (!$datasourceColumn->active) { $datasourceColumn->active = true; $modified = true; } if ($modified) { $this->datasourceColumnsRepository->persist($datasourceColumn); } unset($existingDatasourceColumnsByName[$dbField->name]); } else { //máme tu nový datový sloupec $datasourceColumn = new DatasourceColumn(); $datasourceColumn->datasource = $datasource; $datasourceColumn->name = $dbField->name; if (is_int($dbField->id)) { $datasourceColumn->dbDatasourceFieldId = $dbField->id; } $datasourceColumn->active = true; $datasourceColumn->type = $dbField->type; $this->datasourceColumnsRepository->persist($datasourceColumn); } } } #endregion #region deaktivace již neexistujících sloupců if (!empty($existingDatasourceColumnsByDbDatasourceFieldId)) { foreach ($existingDatasourceColumnsByDbDatasourceFieldId as &$datasourceColumn) { if ($datasourceColumn->active) { $datasourceColumn->active = false; $this->datasourceColumnsRepository->persist($datasourceColumn); } } } if (!empty($existingDatasourceColumnsByName)) { foreach ($existingDatasourceColumnsByName as &$datasourceColumn) { if ($datasourceColumn->active) { $datasourceColumn->active = false; $this->datasourceColumnsRepository->persist($datasourceColumn); } } } #endregion //aktualizace datového zdroje z DB $datasource = $this->findDatasource($datasource->datasourceId); }
/** * Funkce pro export pole s informacemi z DataDictionary a TransformationDictionary * @param Datasource $datasource * @param Metasource|null $metasource * @return array */ public function exportDictionariesArr(Datasource $datasource, Metasource $metasource = null) { $this->databasesFacade->openDatabase($datasource->getDbConnection()); $output = array('dataDictionary' => array(), 'transformationDictionary' => array(), 'recordCount' => $this->databasesFacade->getRowsCount($datasource->dbTable)); #region datafields foreach ($datasource->datasourceColumns as $datasourceColumn) { $output['dataDictionary'][$datasourceColumn->name] = $datasourceColumn->type == DatasourceColumn::TYPE_STRING ? 'string' : 'integer'; //TODO kontrola, jaké má smysl vracet datové typy.... } #endregion datafields #region atributy if (!empty($metasource) && !empty($metasource->attributes)) { $this->databasesFacade->openDatabase($metasource->getDbConnection()); foreach ($metasource->attributes as $attribute) { $valuesArr = array(); try { $valuesStatistics = $this->databasesFacade->getColumnValuesStatistic($metasource->attributesTable, $attribute->name, true); if (!empty($valuesStatistics->valuesArr)) { foreach ($valuesStatistics->valuesArr as $value => $count) { $valuesArr[] = $value; } } } catch (\Exception $e) { } $output['transformationDictionary'][$attribute->name] = array('choices' => $valuesArr); } } #endregion atributy return $output; }