function insertImplementationFromXML($xml, $configuration, $implementation_base = array()) { $ci =& get_instance(); $implementation_objects = all_tags_from_xml($xml, array_custom_filter($configuration, array('plain', 'array'))); $implementation = all_tags_from_xml($xml, array_custom_filter($configuration, array('string', 'csv')), $implementation_base); // insert the implementation itself $version = $ci->Implementation->incrementVersionNumber($implementation['name']); $implementation['fullName'] = $implementation['name'] . '(' . $version . ')'; $implementation['version'] = $version; if (array_key_exists('source_md5', $implementation)) { if (array_key_exists('external_version', $implementation) === false) { $implementation['external_version'] = $implementation['source_md5']; } } elseif (array_key_exists('binary_md5', $implementation)) { if (array_key_exists('external_version', $implementation) === false) { $implementation['external_version'] = $implementation['binary_md5']; } } if (array_key_exists('implements', $implementation)) { if (in_array($implementation['implements'], $ci->supportedMetrics) == false && in_array($implementation['implements'], $ci->supportedAlgorithms == false)) { return false; } } // information illegal to insert unset($implementation['source_md5']); unset($implementation['binary_md5']); // tags also not insertable. but handled differently. $tags = array(); if (array_key_exists('tag', $implementation)) { $tags = str_getcsv($implementation['tag']); unset($implementation['tag']); } $res = $ci->Implementation->insert($implementation); if ($res === false) { return false; } foreach ($tags as $tag) { $error = -1; tag_item('implementation', $res, $tag, $implementation_base['uploader'], $error); } // add to elastic search index. $ci->elasticsearch->index('flow', $res); // insert all important "components" foreach ($implementation_objects as $key => $value) { if ($key == 'component') { foreach ($value as $entry) { $component = $entry->implementation->children('oml', true); $similarComponent = $ci->Implementation->compareToXml($entry->implementation); if ($similarComponent === false) { $component->version = $ci->Implementation->incrementVersionNumber($component->name); $componentFullName = $component->name . '(' . $component->version . ')'; $succes = insertImplementationFromXML($component, $configuration, array('uploadDate' => now(), 'uploader' => $implementation['uploader'])); if ($succes == false) { return false; } $ci->Implementation->addComponent($res, $succes, trim($entry->identifier)); } else { $ci->Implementation->addComponent($res, $similarComponent, trim($entry->identifier)); } } } elseif ($key == 'bibliographical_reference') { foreach ($value as $entry) { $children = $entry->children('oml', true); $children->implementation_id = $res; $succes = $ci->Bibliographical_reference->insert($children); } } elseif ($key == 'parameter') { foreach ($value as $entry) { $children = $entry->children('oml', true); $succes = $ci->Input->insert(array('fullName' => $implementation['fullName'] . '_' . $children->name, 'implementation_id' => $res, 'name' => trim($children->name), 'defaultValue' => property_exists($children, 'default_value') ? trim($children->default_value) : null, 'description' => property_exists($children, 'description') ? trim($children->description) : null, 'dataType' => property_exists($children, 'data_type') ? trim($children->data_type) : null, 'recommendedRange' => property_exists($children, 'recommended_range') ? trim($children->recommendedRange) : null)); } } } return $res; }
private function data_upload() { // get correct description if ($this->input->post('description')) { // get description from string upload $description = $this->input->post('description', false); if (validateXml($description, xsd('openml.data.upload', $this->controller, $this->version), $xmlErrors, false) == false) { $this->returnError(131, $this->version, $this->openmlGeneralErrorCode, $xmlErrors); return; } $xml = simplexml_load_string($description); } elseif (isset($_FILES['description']) && check_uploaded_file($_FILES['description']) == true) { // get description from file upload $description = $_FILES['description']; if (validateXml($description['tmp_name'], xsd('openml.data.upload', $this->controller, $this->version), $xmlErrors) == false) { $this->returnError(131, $this->version, $this->openmlGeneralErrorCode, $xmlErrors); return; } $xml = simplexml_load_file($description['tmp_name']); } else { $this->returnError(135, $this->version); return; } //check if this is an update or a new dataset $update = false; if ($xml->children('oml', true)->{'id'}) { $update = true; } if (!$this->ion_auth->in_group($this->groups_upload_rights, $this->user_id)) { $this->returnError(104, $this->version); return; } //check and register the data files, return url $file_id = null; $datasetUrlProvided = property_exists($xml->children('oml', true), 'url'); $datasetFileProvided = isset($_FILES['dataset']); if ($datasetUrlProvided && $datasetFileProvided) { $this->returnError(140, $this->version); return; } elseif ($datasetFileProvided) { $message = ''; if (!check_uploaded_file($_FILES['dataset'], false, $message)) { $this->returnError(130, $this->version, $this->openmlGeneralErrorCode, 'File dataset: ' . $message); return; } $access_control = 'public'; $access_control_option = $xml->children('oml', true)->{'visibility'}; if ($access_control_option != 'public') { $access_control = 'private'; } $file_id = $this->File->register_uploaded_file($_FILES['dataset'], $this->data_folders['dataset'], $this->user_id, 'dataset', $access_control); if ($file_id === false) { $this->returnError(132, $this->version); return; } $file_record = $this->File->getById($file_id); $destinationUrl = $this->data_controller . 'download/' . $file_id . '/' . $file_record->filename_original; } elseif ($datasetUrlProvided) { $destinationUrl = '' . $xml->children('oml', true)->url; } elseif ($update) { $destinationUrl = false; } else { $this->returnError(141, $this->version); return; } //build dataset object with new fields to be stored if (!$update) { // ***** NEW DATASET ***** $name = '' . $xml->children('oml', true)->{'name'}; $version = $this->Dataset->incrementVersionNumber($name); $dataset = array('name' => $name, 'version' => $version, 'url' => $destinationUrl, 'upload_date' => now(), 'last_update' => now(), 'uploader' => $this->user_id, 'isOriginal' => 'true', 'file_id' => $file_id); // extract all other necessary info from the XML description $dataset = all_tags_from_xml($xml->children('oml', true), $this->xml_fields_dataset, $dataset); } else { // ***** UPDATED DATASET ***** $id = $xml->children('oml', true)->{'id'}; $dataset = $this->Dataset->getById($id); if ($dataset === false) { $this->returnError(144, $this->version); return; } if ($destinationUrl) { $dataset = array('last_update' => now(), 'url' => $destinationUrl, 'file_id' => $file_id); } else { $dataset = array('last_update' => now()); } // extract all other necessary info from the XML description $dataset = all_tags_from_xml($xml->children('oml', true), $this->xml_fields_dataset_update, $dataset); } // handle tags $tags = array(); if (array_key_exists('tag', $dataset)) { $tags = str_getcsv($dataset['tag']); unset($dataset['tag']); } /* * * * * THE ACTUAL INSERTION * * * */ if (!$update) { // ***** NEW DATASET ***** $id = $this->Dataset->insert($dataset); if (!$id) { $this->returnError(134, $this->version); return; } // insert tags. foreach ($tags as $tag) { $error = -1; tag_item('dataset', $id, $tag, $this->user_id, $error); } // TODO: handle tags for updated data sets as well. } else { // ***** UPDATED DATASET ***** $id = '' . $xml->children('oml', true)->{'id'}; // ignore id, description (should not be altered) unset($dataset['id']); unset($dataset['description']); // remove ignore attributes if none specified if (!array_key_exists('ignore_attribute', $dataset)) { $dataset['ignore_attribute'] = NULL; } // reset data features so that they are recalculated $dataset['processed'] = NULL; $dataset['error'] = 'false'; $this->Data_feature->deleteWhere('`did` = "' . $id . '"'); $this->Data_quality->deleteWhere('`data` = "' . $id . '"'); // the actual update $response = $this->Dataset->update($id, $dataset); } // update elastic search index. $this->elasticsearch->index('data', $id); // create initial wiki page if (!$update) { $this->wiki->export_to_wiki($id); } // create $this->xmlContents('data-upload', $this->version, array('id' => $id)); }
private function _openml_run_upload() { /* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * Everything that needs to be done for EVERY task, * * Including the unsupported tasks * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */ // check uploaded file $description = isset($_FILES['description']) ? $_FILES['description'] : false; if (!check_uploaded_file($description)) { $this->_returnError(202); return; } // validate xml if (validateXml($description['tmp_name'], xsd('openml.run.upload'), $xmlErrors) == false) { $this->_returnError(203, $this->openmlGeneralErrorCode, $xmlErrors); return; } // fetch xml $xml = simplexml_load_file($description['tmp_name']); if ($xml === false) { $this->_returnError(219); return; } $run_xml = all_tags_from_xml($xml->children('oml', true), $this->xml_fields_run); $task_id = $run_xml['task_id']; $implementation_id = $run_xml['implementation_id']; $setup_string = array_key_exists('setup_string', $run_xml) ? $run_xml['setup_string'] : null; $error_message = array_key_exists('error_message', $run_xml) ? $run_xml['error_message'] : false; $parameter_objects = array_key_exists('parameter_setting', $run_xml) ? $run_xml['parameter_setting'] : array(); $output_data = array_key_exists('output_data', $run_xml) ? $run_xml['output_data'] : array(); $tags = array_key_exists('tag', $run_xml) ? str_getcsv($run_xml['tag']) : array(); // the user can specify his own metrics. here we check whether these exists in the database. if ($output_data != false && array_key_exists('evaluation', $output_data)) { foreach ($output_data->children('oml', true)->{'evaluation'} as $eval) { $measure_id = $this->Implementation->getWhere('`fullName` = "' . $eval->implementation . '" AND `implements` = "' . $eval->name . '"'); if ($measure_id == false) { $this->_returnError(217); return; } } } $predictionsUrl = false; // fetch implementation $implementation = $this->Implementation->getById($implementation_id); if ($implementation === false) { $this->_returnError(205); return; } if (in_array($implementation->{'implements'}, $this->supportedMetrics)) { $this->_returnError(218); return; } // check whether uploaded files are present. if ($error_message == false) { if (count($_FILES) < 2) { $this->_returnError(206); return; } $message = ''; if (!check_uploaded_file($_FILES['predictions'], false, $message)) { $this->_returnError(207, $this->openmlGeneralErrorCode, 'File predictions: ' . $message); return; } } $parameters = array(); foreach ($parameter_objects as $p) { // since 'component' is an optional XML field, we add a default option $component = property_exists($p, 'component') ? $p->component : $implementation->id; // now find the input id $input_id = $this->Input->getWhereSingle('`implementation_id` = ' . $component . ' AND `name` = "' . $p->name . '"'); if ($input_id === false) { $this->_returnError(213); return; } $parameters[$input_id->id] = $p->value . ''; } // search setup ... // TODO: do something about the new parameters. Are still retrieved by ID, does not work with Weka plugin. $setupId = $this->Algorithm_setup->getSetupId($implementation, $parameters, true, $setup_string); if ($setupId === false) { $this->_returnError(214); return; } // fetch task $taskRecord = $this->Task->getById($task_id); if ($taskRecord === false) { $this->_returnError(204); return; } $task = end($this->Task->tasks_crosstabulated($taskRecord->ttid, true, array(), false, $task_id)); // now create a run $runData = array('uploader' => $this->user_id, 'setup' => $setupId, 'task_id' => $task->task_id, 'start_time' => now(), 'status' => $error_message == false ? 'OK' : 'error', 'error' => $error_message == false ? null : $error_message, 'experiment' => '-1'); $runId = $this->Run->insert($runData); if ($runId === false) { $this->_returnError(210); return; } // and fetch the run record $run = $this->Run->getById($runId); $result = new stdClass(); $result->run_id = $runId; // for output // attach uploaded files as output to run foreach ($_FILES as $key => $value) { $file_type = $key == 'predictions' ? 'predictions' : 'run_uploaded_file'; $file_id = $this->File->register_uploaded_file($value, $this->data_folders['run'], $this->user_id, $file_type); if (!$file_id) { $this->_returnError(212); return; } $file_record = $this->File->getById($file_id); $filename = getAvailableName(DATA_PATH . $this->data_folders['run'], $value['name']); $record = array('source' => $run->rid, 'field' => $key, 'name' => $value['name'], 'format' => $file_record->extension, 'file_id' => $file_id); $did = $this->Runfile->insert($record); if ($did == false) { $this->_returnError(212); return; } $this->Run->outputData($run->rid, $did, 'runfile', $key); } // attach input data $inputData = $this->Run->inputData($runId, $task->source_data, 'dataset'); // Based on the query, it has been garantueed that the dataset id exists. if ($inputData === false) { $errorCode = 211; return false; } // tag it, if neccessary foreach ($tags as $tag) { $error = -1; tag_item('run', $runId, $tag, $this->user_id, $error); } // add to elastic search index. $this->elasticsearch->index('run', $run->rid); // remove scheduled task $this->Schedule->deleteWhere('task_id = "' . $task->task_id . '" AND sid = "' . $setupId . '"'); /* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * Now the stuff that needs to be done for the special * * supported tasks, like classification, regression * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */ // and present result, in effect only a run_id. $this->_xmlContents('run-upload', $result); }