Exemplo n.º 1
0
 function register_uploaded_file($file, $to_folder, $creator_id, $type, $access_policy = 'public')
 {
     $md5_hash = md5_file($file['tmp_name']);
     if ($md5_hash === false) {
         return false;
     }
     create_dir(DATA_PATH . $to_folder);
     $newName = getAvailableName(DATA_PATH . $to_folder, $file['name']);
     if (move_uploaded_file($file['tmp_name'], DATA_PATH . $to_folder . $newName) === false) {
         return false;
     }
     $file_record = array('creator' => $creator_id, 'creation_date' => now(), 'filepath' => $to_folder . $newName, 'filesize' => filesize(DATA_PATH . $to_folder . $newName), 'filename_original' => $file['name'], 'extension' => pathinfo($file['name'], PATHINFO_EXTENSION), 'mime_type' => $file['type'], 'md5_hash' => $md5_hash, 'type' => $type, 'access_policy' => $access_policy);
     $file_id = $this->insert($file_record);
     return $file_id;
 }
Exemplo n.º 2
0
 function create_meta_dataset($id = false)
 {
     if ($id == false) {
         $meta_dataset = $this->Meta_dataset->getWhere('processed IS NULL');
         echo 'No id specified, requesting first dataset in queue.' . "\n";
     } else {
         $meta_dataset = $this->Meta_dataset->getWhere('id = "' . $id . '"');
         echo 'Requesting dataset with id ' . $id . ".\n";
     }
     if ($meta_dataset) {
         $meta_dataset = $meta_dataset[0];
         echo 'Processing meta-dataset with id ' . $meta_dataset->id . ".\n";
         $this->Meta_dataset->update($meta_dataset->id, array('processed' => now()));
         $dataset_constr = $meta_dataset->datasets ? 'AND d.did IN (' . $meta_dataset->datasets . ') ' : '';
         $task_constr = $meta_dataset->tasks ? 'AND t.task_id IN (' . $meta_dataset->tasks . ') ' : '';
         $flow_constr = $meta_dataset->flows ? 'AND i.id IN (' . $meta_dataset->flows . ') ' : '';
         $setup_constr = $meta_dataset->setups ? 'AND s.sid IN (' . $meta_dataset->setups . ') ' : '';
         $function_constr = $meta_dataset->functions ? 'AND e.function IN (' . $meta_dataset->functions . ') ' : '';
         $quality_colum = 'data_quality';
         $evaluation_column = 'evaluation';
         $evaluation_keys = array('function' => 'e.function');
         $quality_keys = array();
         if ($meta_dataset->task_type == 3) {
             $evaluation_keys = array('repeat' => 'e.repeat', 'fold' => 'e.fold', 'sample' => 'e.sample', 'sample_size' => 'e.sample_size', 'function' => 'e.function');
             $evaluation_column = 'evaluation_sample';
         }
         if ($meta_dataset->task_type == 4) {
             $evaluation_keys = array('interval_start' => 'e.interval_start', 'interval_end' => 'e.interval_end', 'function' => 'e.function');
             $quality_keys = array('interval_start' => 'q.interval_start', 'interval_end' => 'q.interval_end');
             $quality_colum = 'data_quality_interval';
             $evaluation_column = 'evaluation_interval';
         }
         if (create_dir(DATA_PATH . $this->dir_suffix) == false) {
             $this->_error_meta_dataset($meta_dataset->id, 'Failed to create data directory. ', $meta_dataset->user_id);
             return;
         }
         $tmp_path = '/tmp/' . rand_string(20) . '.csv';
         if ($meta_dataset->type == 'qualities') {
             $quality_keys_string = '';
             if ($quality_keys) {
                 $quality_keys_string = implode(', ', $quality_keys) . ',';
                 $quality_keys_key_string = '"' . implode('", "', array_keys($quality_keys)) . '",';
             }
             $sql = 'SELECT "data_id", "task_id", "name", "quality", ' . $quality_keys_key_string . ' "value" ' . 'UNION ALL ' . 'SELECT d.did, t.task_id, d.name, q.quality, ' . $quality_keys_string . 'q.value ' . 'FROM dataset d, ' . $quality_colum . ' q, task t, task_inputs i ' . 'WHERE t.task_id = i.task_id ' . 'AND i.input = "source_data" ' . 'AND i.value = q.data ' . 'AND d.did = q.data ' . 'AND t.ttid = "' . $meta_dataset->task_type . '" ' . $dataset_constr . $task_constr . 'INTO OUTFILE "' . $tmp_path . '" ' . 'FIELDS TERMINATED BY "," ' . 'ENCLOSED BY "\\"" ' . 'LINES TERMINATED BY "\\n" ' . ';';
         } else {
             $sql = 'SELECT "run_id", "setup_id", "task_id", "' . implode('", "', array_keys($evaluation_keys)) . '" ' . ', "value"' . 'UNION ALL ' . 'SELECT r.rid AS run_id, s.sid AS setup_id, t.task_id AS task_id, ' . implode(', ', $evaluation_keys) . ', e.value ' . 'FROM run r, task t, task_inputs v, dataset d, algorithm_setup s, implementation i, ' . $evaluation_column . ' e ' . 'WHERE r.task_id = t.task_id AND v.task_id = t.task_id  ' . 'AND v.input = "source_data" AND v.value = d.did ' . 'AND r.setup = s.sid AND s.implementation_id = i.id ' . 'AND e.source = r.rid ' . 'AND t.ttid = "' . $meta_dataset->task_type . '"' . $dataset_constr . $task_constr . $flow_constr . $setup_constr . $function_constr . 'GROUP BY r.setup, r.task_id, ' . implode(',', $evaluation_keys) . ' ' . 'INTO OUTFILE "' . $tmp_path . '" ' . 'FIELDS TERMINATED BY "," ' . 'ENCLOSED BY "\\"" ' . 'LINES TERMINATED BY "\\n" ' . ';';
         }
         $this->Dataset->query($sql);
         $success = file_exists($tmp_path);
         if ($success == false) {
             $error = 'MySQL Error #' . $this->Dataset->mysqlErrorNo() . ': ' . $this->Dataset->mysqlErrorMessage();
             $this->_error_meta_dataset($meta_dataset->id, $error, $meta_dataset->user_id);
             return;
         }
         $filename = getAvailableName(DATA_PATH . $this->dir_suffix, 'meta_dataset.csv');
         $filepath = DATA_PATH . $this->dir_suffix . $filename;
         $success = rename($tmp_path, $filepath);
         if ($success == false) {
             $this->_error_meta_dataset($meta_dataset->id, 'Failed to move csv to data directory. Filename: ' . $filename, $meta_dataset->user_id);
             return;
         }
         $file_id = $this->File->register_created_file($this->dir_suffix, $filename, $meta_dataset->user_id, 'dataset', 'text/csv', 'private');
         $this->Meta_dataset->update($meta_dataset->id, array('file_id' => $file_id));
         $user = $this->ion_auth->user($meta_dataset->user_id)->row();
         $this->email->to($user->email);
         $this->email->subject('OpenML Meta Dataset');
         $this->email->message("This is an automatically generated email. The your requested meta-dataset was created successfully and can be downloaded from " . BASE_URL);
         $this->email->send();
     } else {
         echo 'No meta-dataset to process. ' . "\n";
     }
 }
Exemplo n.º 3
0
 private function _openml_run_upload()
 {
     /* * * * * * * * * * * * * * * * * * * * * * * * * * * * * *
      * Everything that needs to be done for EVERY task,        *
      * Including the unsupported tasks                         *
      * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
     // check uploaded file
     $description = isset($_FILES['description']) ? $_FILES['description'] : false;
     if (!check_uploaded_file($description)) {
         $this->_returnError(202);
         return;
     }
     // validate xml
     if (validateXml($description['tmp_name'], xsd('openml.run.upload'), $xmlErrors) == false) {
         $this->_returnError(203, $this->openmlGeneralErrorCode, $xmlErrors);
         return;
     }
     // fetch xml
     $xml = simplexml_load_file($description['tmp_name']);
     if ($xml === false) {
         $this->_returnError(219);
         return;
     }
     $run_xml = all_tags_from_xml($xml->children('oml', true), $this->xml_fields_run);
     $task_id = $run_xml['task_id'];
     $implementation_id = $run_xml['implementation_id'];
     $setup_string = array_key_exists('setup_string', $run_xml) ? $run_xml['setup_string'] : null;
     $error_message = array_key_exists('error_message', $run_xml) ? $run_xml['error_message'] : false;
     $parameter_objects = array_key_exists('parameter_setting', $run_xml) ? $run_xml['parameter_setting'] : array();
     $output_data = array_key_exists('output_data', $run_xml) ? $run_xml['output_data'] : array();
     $tags = array_key_exists('tag', $run_xml) ? str_getcsv($run_xml['tag']) : array();
     // the user can specify his own metrics. here we check whether these exists in the database.
     if ($output_data != false && array_key_exists('evaluation', $output_data)) {
         foreach ($output_data->children('oml', true)->{'evaluation'} as $eval) {
             $measure_id = $this->Implementation->getWhere('`fullName` = "' . $eval->implementation . '" AND `implements` = "' . $eval->name . '"');
             if ($measure_id == false) {
                 $this->_returnError(217);
                 return;
             }
         }
     }
     $predictionsUrl = false;
     // fetch implementation
     $implementation = $this->Implementation->getById($implementation_id);
     if ($implementation === false) {
         $this->_returnError(205);
         return;
     }
     if (in_array($implementation->{'implements'}, $this->supportedMetrics)) {
         $this->_returnError(218);
         return;
     }
     // check whether uploaded files are present.
     if ($error_message == false) {
         if (count($_FILES) < 2) {
             $this->_returnError(206);
             return;
         }
         $message = '';
         if (!check_uploaded_file($_FILES['predictions'], false, $message)) {
             $this->_returnError(207, $this->openmlGeneralErrorCode, 'File predictions: ' . $message);
             return;
         }
     }
     $parameters = array();
     foreach ($parameter_objects as $p) {
         // since 'component' is an optional XML field, we add a default option
         $component = property_exists($p, 'component') ? $p->component : $implementation->id;
         // now find the input id
         $input_id = $this->Input->getWhereSingle('`implementation_id` = ' . $component . ' AND `name` = "' . $p->name . '"');
         if ($input_id === false) {
             $this->_returnError(213);
             return;
         }
         $parameters[$input_id->id] = $p->value . '';
     }
     // search setup ... // TODO: do something about the new parameters. Are still retrieved by ID, does not work with Weka plugin.
     $setupId = $this->Algorithm_setup->getSetupId($implementation, $parameters, true, $setup_string);
     if ($setupId === false) {
         $this->_returnError(214);
         return;
     }
     // fetch task
     $taskRecord = $this->Task->getById($task_id);
     if ($taskRecord === false) {
         $this->_returnError(204);
         return;
     }
     $task = end($this->Task->tasks_crosstabulated($taskRecord->ttid, true, array(), false, $task_id));
     // now create a run
     $runData = array('uploader' => $this->user_id, 'setup' => $setupId, 'task_id' => $task->task_id, 'start_time' => now(), 'status' => $error_message == false ? 'OK' : 'error', 'error' => $error_message == false ? null : $error_message, 'experiment' => '-1');
     $runId = $this->Run->insert($runData);
     if ($runId === false) {
         $this->_returnError(210);
         return;
     }
     // and fetch the run record
     $run = $this->Run->getById($runId);
     $result = new stdClass();
     $result->run_id = $runId;
     // for output
     // attach uploaded files as output to run
     foreach ($_FILES as $key => $value) {
         $file_type = $key == 'predictions' ? 'predictions' : 'run_uploaded_file';
         $file_id = $this->File->register_uploaded_file($value, $this->data_folders['run'], $this->user_id, $file_type);
         if (!$file_id) {
             $this->_returnError(212);
             return;
         }
         $file_record = $this->File->getById($file_id);
         $filename = getAvailableName(DATA_PATH . $this->data_folders['run'], $value['name']);
         $record = array('source' => $run->rid, 'field' => $key, 'name' => $value['name'], 'format' => $file_record->extension, 'file_id' => $file_id);
         $did = $this->Runfile->insert($record);
         if ($did == false) {
             $this->_returnError(212);
             return;
         }
         $this->Run->outputData($run->rid, $did, 'runfile', $key);
     }
     // attach input data
     $inputData = $this->Run->inputData($runId, $task->source_data, 'dataset');
     // Based on the query, it has been garantueed that the dataset id exists.
     if ($inputData === false) {
         $errorCode = 211;
         return false;
     }
     // tag it, if neccessary
     foreach ($tags as $tag) {
         $error = -1;
         tag_item('run', $runId, $tag, $this->user_id, $error);
     }
     // add to elastic search index.
     $this->elasticsearch->index('run', $run->rid);
     // remove scheduled task
     $this->Schedule->deleteWhere('task_id = "' . $task->task_id . '" AND sid = "' . $setupId . '"');
     /* * * * * * * * * * * * * * * * * * * * * * * * * * * * * *
      * Now the stuff that needs to be done for the special     *
      * supported tasks, like classification, regression        *
      * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
     // and present result, in effect only a run_id.
     $this->_xmlContents('run-upload', $result);
 }