function register_uploaded_file($file, $to_folder, $creator_id, $type, $access_policy = 'public') { $md5_hash = md5_file($file['tmp_name']); if ($md5_hash === false) { return false; } create_dir(DATA_PATH . $to_folder); $newName = getAvailableName(DATA_PATH . $to_folder, $file['name']); if (move_uploaded_file($file['tmp_name'], DATA_PATH . $to_folder . $newName) === false) { return false; } $file_record = array('creator' => $creator_id, 'creation_date' => now(), 'filepath' => $to_folder . $newName, 'filesize' => filesize(DATA_PATH . $to_folder . $newName), 'filename_original' => $file['name'], 'extension' => pathinfo($file['name'], PATHINFO_EXTENSION), 'mime_type' => $file['type'], 'md5_hash' => $md5_hash, 'type' => $type, 'access_policy' => $access_policy); $file_id = $this->insert($file_record); return $file_id; }
function create_meta_dataset($id = false) { if ($id == false) { $meta_dataset = $this->Meta_dataset->getWhere('processed IS NULL'); echo 'No id specified, requesting first dataset in queue.' . "\n"; } else { $meta_dataset = $this->Meta_dataset->getWhere('id = "' . $id . '"'); echo 'Requesting dataset with id ' . $id . ".\n"; } if ($meta_dataset) { $meta_dataset = $meta_dataset[0]; echo 'Processing meta-dataset with id ' . $meta_dataset->id . ".\n"; $this->Meta_dataset->update($meta_dataset->id, array('processed' => now())); $dataset_constr = $meta_dataset->datasets ? 'AND d.did IN (' . $meta_dataset->datasets . ') ' : ''; $task_constr = $meta_dataset->tasks ? 'AND t.task_id IN (' . $meta_dataset->tasks . ') ' : ''; $flow_constr = $meta_dataset->flows ? 'AND i.id IN (' . $meta_dataset->flows . ') ' : ''; $setup_constr = $meta_dataset->setups ? 'AND s.sid IN (' . $meta_dataset->setups . ') ' : ''; $function_constr = $meta_dataset->functions ? 'AND e.function IN (' . $meta_dataset->functions . ') ' : ''; $quality_colum = 'data_quality'; $evaluation_column = 'evaluation'; $evaluation_keys = array('function' => 'e.function'); $quality_keys = array(); if ($meta_dataset->task_type == 3) { $evaluation_keys = array('repeat' => 'e.repeat', 'fold' => 'e.fold', 'sample' => 'e.sample', 'sample_size' => 'e.sample_size', 'function' => 'e.function'); $evaluation_column = 'evaluation_sample'; } if ($meta_dataset->task_type == 4) { $evaluation_keys = array('interval_start' => 'e.interval_start', 'interval_end' => 'e.interval_end', 'function' => 'e.function'); $quality_keys = array('interval_start' => 'q.interval_start', 'interval_end' => 'q.interval_end'); $quality_colum = 'data_quality_interval'; $evaluation_column = 'evaluation_interval'; } if (create_dir(DATA_PATH . $this->dir_suffix) == false) { $this->_error_meta_dataset($meta_dataset->id, 'Failed to create data directory. ', $meta_dataset->user_id); return; } $tmp_path = '/tmp/' . rand_string(20) . '.csv'; if ($meta_dataset->type == 'qualities') { $quality_keys_string = ''; if ($quality_keys) { $quality_keys_string = implode(', ', $quality_keys) . ','; $quality_keys_key_string = '"' . implode('", "', array_keys($quality_keys)) . '",'; } $sql = 'SELECT "data_id", "task_id", "name", "quality", ' . $quality_keys_key_string . ' "value" ' . 'UNION ALL ' . 'SELECT d.did, t.task_id, d.name, q.quality, ' . $quality_keys_string . 'q.value ' . 'FROM dataset d, ' . $quality_colum . ' q, task t, task_inputs i ' . 'WHERE t.task_id = i.task_id ' . 'AND i.input = "source_data" ' . 'AND i.value = q.data ' . 'AND d.did = q.data ' . 'AND t.ttid = "' . $meta_dataset->task_type . '" ' . $dataset_constr . $task_constr . 'INTO OUTFILE "' . $tmp_path . '" ' . 'FIELDS TERMINATED BY "," ' . 'ENCLOSED BY "\\"" ' . 'LINES TERMINATED BY "\\n" ' . ';'; } else { $sql = 'SELECT "run_id", "setup_id", "task_id", "' . implode('", "', array_keys($evaluation_keys)) . '" ' . ', "value"' . 'UNION ALL ' . 'SELECT r.rid AS run_id, s.sid AS setup_id, t.task_id AS task_id, ' . implode(', ', $evaluation_keys) . ', e.value ' . 'FROM run r, task t, task_inputs v, dataset d, algorithm_setup s, implementation i, ' . $evaluation_column . ' e ' . 'WHERE r.task_id = t.task_id AND v.task_id = t.task_id ' . 'AND v.input = "source_data" AND v.value = d.did ' . 'AND r.setup = s.sid AND s.implementation_id = i.id ' . 'AND e.source = r.rid ' . 'AND t.ttid = "' . $meta_dataset->task_type . '"' . $dataset_constr . $task_constr . $flow_constr . $setup_constr . $function_constr . 'GROUP BY r.setup, r.task_id, ' . implode(',', $evaluation_keys) . ' ' . 'INTO OUTFILE "' . $tmp_path . '" ' . 'FIELDS TERMINATED BY "," ' . 'ENCLOSED BY "\\"" ' . 'LINES TERMINATED BY "\\n" ' . ';'; } $this->Dataset->query($sql); $success = file_exists($tmp_path); if ($success == false) { $error = 'MySQL Error #' . $this->Dataset->mysqlErrorNo() . ': ' . $this->Dataset->mysqlErrorMessage(); $this->_error_meta_dataset($meta_dataset->id, $error, $meta_dataset->user_id); return; } $filename = getAvailableName(DATA_PATH . $this->dir_suffix, 'meta_dataset.csv'); $filepath = DATA_PATH . $this->dir_suffix . $filename; $success = rename($tmp_path, $filepath); if ($success == false) { $this->_error_meta_dataset($meta_dataset->id, 'Failed to move csv to data directory. Filename: ' . $filename, $meta_dataset->user_id); return; } $file_id = $this->File->register_created_file($this->dir_suffix, $filename, $meta_dataset->user_id, 'dataset', 'text/csv', 'private'); $this->Meta_dataset->update($meta_dataset->id, array('file_id' => $file_id)); $user = $this->ion_auth->user($meta_dataset->user_id)->row(); $this->email->to($user->email); $this->email->subject('OpenML Meta Dataset'); $this->email->message("This is an automatically generated email. The your requested meta-dataset was created successfully and can be downloaded from " . BASE_URL); $this->email->send(); } else { echo 'No meta-dataset to process. ' . "\n"; } }
private function _openml_run_upload() { /* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * Everything that needs to be done for EVERY task, * * Including the unsupported tasks * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */ // check uploaded file $description = isset($_FILES['description']) ? $_FILES['description'] : false; if (!check_uploaded_file($description)) { $this->_returnError(202); return; } // validate xml if (validateXml($description['tmp_name'], xsd('openml.run.upload'), $xmlErrors) == false) { $this->_returnError(203, $this->openmlGeneralErrorCode, $xmlErrors); return; } // fetch xml $xml = simplexml_load_file($description['tmp_name']); if ($xml === false) { $this->_returnError(219); return; } $run_xml = all_tags_from_xml($xml->children('oml', true), $this->xml_fields_run); $task_id = $run_xml['task_id']; $implementation_id = $run_xml['implementation_id']; $setup_string = array_key_exists('setup_string', $run_xml) ? $run_xml['setup_string'] : null; $error_message = array_key_exists('error_message', $run_xml) ? $run_xml['error_message'] : false; $parameter_objects = array_key_exists('parameter_setting', $run_xml) ? $run_xml['parameter_setting'] : array(); $output_data = array_key_exists('output_data', $run_xml) ? $run_xml['output_data'] : array(); $tags = array_key_exists('tag', $run_xml) ? str_getcsv($run_xml['tag']) : array(); // the user can specify his own metrics. here we check whether these exists in the database. if ($output_data != false && array_key_exists('evaluation', $output_data)) { foreach ($output_data->children('oml', true)->{'evaluation'} as $eval) { $measure_id = $this->Implementation->getWhere('`fullName` = "' . $eval->implementation . '" AND `implements` = "' . $eval->name . '"'); if ($measure_id == false) { $this->_returnError(217); return; } } } $predictionsUrl = false; // fetch implementation $implementation = $this->Implementation->getById($implementation_id); if ($implementation === false) { $this->_returnError(205); return; } if (in_array($implementation->{'implements'}, $this->supportedMetrics)) { $this->_returnError(218); return; } // check whether uploaded files are present. if ($error_message == false) { if (count($_FILES) < 2) { $this->_returnError(206); return; } $message = ''; if (!check_uploaded_file($_FILES['predictions'], false, $message)) { $this->_returnError(207, $this->openmlGeneralErrorCode, 'File predictions: ' . $message); return; } } $parameters = array(); foreach ($parameter_objects as $p) { // since 'component' is an optional XML field, we add a default option $component = property_exists($p, 'component') ? $p->component : $implementation->id; // now find the input id $input_id = $this->Input->getWhereSingle('`implementation_id` = ' . $component . ' AND `name` = "' . $p->name . '"'); if ($input_id === false) { $this->_returnError(213); return; } $parameters[$input_id->id] = $p->value . ''; } // search setup ... // TODO: do something about the new parameters. Are still retrieved by ID, does not work with Weka plugin. $setupId = $this->Algorithm_setup->getSetupId($implementation, $parameters, true, $setup_string); if ($setupId === false) { $this->_returnError(214); return; } // fetch task $taskRecord = $this->Task->getById($task_id); if ($taskRecord === false) { $this->_returnError(204); return; } $task = end($this->Task->tasks_crosstabulated($taskRecord->ttid, true, array(), false, $task_id)); // now create a run $runData = array('uploader' => $this->user_id, 'setup' => $setupId, 'task_id' => $task->task_id, 'start_time' => now(), 'status' => $error_message == false ? 'OK' : 'error', 'error' => $error_message == false ? null : $error_message, 'experiment' => '-1'); $runId = $this->Run->insert($runData); if ($runId === false) { $this->_returnError(210); return; } // and fetch the run record $run = $this->Run->getById($runId); $result = new stdClass(); $result->run_id = $runId; // for output // attach uploaded files as output to run foreach ($_FILES as $key => $value) { $file_type = $key == 'predictions' ? 'predictions' : 'run_uploaded_file'; $file_id = $this->File->register_uploaded_file($value, $this->data_folders['run'], $this->user_id, $file_type); if (!$file_id) { $this->_returnError(212); return; } $file_record = $this->File->getById($file_id); $filename = getAvailableName(DATA_PATH . $this->data_folders['run'], $value['name']); $record = array('source' => $run->rid, 'field' => $key, 'name' => $value['name'], 'format' => $file_record->extension, 'file_id' => $file_id); $did = $this->Runfile->insert($record); if ($did == false) { $this->_returnError(212); return; } $this->Run->outputData($run->rid, $did, 'runfile', $key); } // attach input data $inputData = $this->Run->inputData($runId, $task->source_data, 'dataset'); // Based on the query, it has been garantueed that the dataset id exists. if ($inputData === false) { $errorCode = 211; return false; } // tag it, if neccessary foreach ($tags as $tag) { $error = -1; tag_item('run', $runId, $tag, $this->user_id, $error); } // add to elastic search index. $this->elasticsearch->index('run', $run->rid); // remove scheduled task $this->Schedule->deleteWhere('task_id = "' . $task->task_id . '" AND sid = "' . $setupId . '"'); /* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * Now the stuff that needs to be done for the special * * supported tasks, like classification, regression * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */ // and present result, in effect only a run_id. $this->_xmlContents('run-upload', $result); }