public static function store($settings) { try { // Create the SoftwareAgent if it doesnt exist //SoftwareAgent::store('batchcreator', 'Batch creation'); $batch = new Batch(); $batch->_id = $batch->_id; $batch->title = $settings['batch_title']; $batch->content = $settings['batch_description']; $batch->project = $settings['project']; $batch->parents = $settings['units']; $batch->size = count($settings['units']); $hashing = array(); $hashing["project"] = $settings['project']; $hashing["content"] = $settings['units']; $batch->hash = md5(serialize($hashing)); $batch->save(); Queue::push('Queues\\UpdateUnits', $settings['units']); return $batch; } catch (Exception $e) { // Something went wrong with creating the Batch $activity->forceDelete(); $batch->forceDelete(); return false; } }
/** * @param $data = array(unitids) */ public function fire($job, $data) { foreach ($data as $id) { set_time_limit(600); $unit = Entity::id($id)->first(); $batches = count(Batch::whereIn('parents', [$unit->_id])->get()->toArray()); $workerunit = array('count' => 0, 'spam' => 0, 'nonSpam' => 0); $workerlist = $workersspam = $workersnonspam = $joblist = array(); foreach (Workerunit::where('unit_id', $unit->_id)->get() as $a) { $joblist[] = $a->job_id; $workerlist[] = $a->crowdAgent_id; if ($a->spam) { $workerunit['spam']++; $workersspam[] = $a->crowdAgent_id; } else { $workerunit['nonSpam']++; $workersnonspam[] = $a->crowdAgent_id; } } $workerunit['count'] = $workerunit['spam'] + $workerunit['nonSpam']; $workers['count'] = count(array_unique($workerlist)); $workers['spam'] = count(array_unique($workersspam)); $workers['nonSpam'] = count(array_unique($workersnonspam)); $workers['potentialSpam'] = count(array_intersect($workersspam, $workersnonspam)); $platformField = array(); $platformField['cf'] = count(Entity::where('unit_id', $unit->_id)->where('softwareAgent_id', 'cf')->get()->toArray()); $platformField['amt'] = count(Entity::where('unit_id', $unit->_id)->where('softwareAgent_id', 'amt')->get()->toArray()); //filtered $filteredField = array(); $filteredField['job_ids'] = array_flatten(Job::where('metrics.filteredUnits.list', 'all', array($unit['_id']))->get(['_id'])->toArray()); $filteredField['count'] = count($filteredField['job_ids']); $derivatives = Entity::whereIn('parents', array($unit->_id))->lists('_id'); $children["count"] = count($derivatives); $children["list"] = $derivatives; $unit->cache = ["jobs" => 1, "workers" => $workers, "softwareAgent" => $platformField, "workerunits" => $workerunit, "filtered" => $filteredField, "batches" => $batches, "children" => $children]; $avg_clarity = Entity::where('metrics.units.withoutSpam.' . $unit->_id, 'exists', 'true')->avg('metrics.units.withoutSpam.' . $unit->id . '.avg.max_relation_Cos'); if (!isset($avg_clarity)) { $avg_clarity = 0; } $unit->avg_clarity = $avg_clarity; $unit->update(); \Log::debug("Updated unit {$unit->_id}."); } $job->delete(); // the Queue job... }
/** * Create worker unit */ public function process($signal, $payload, $settings) { //return "blabla"; // increase memory usage to support larger files with up to 10k judgments ini_set('memory_limit', '256M'); set_time_limit(0); $this->status = ['notice' => [], 'success' => [], 'error' => []]; try { if (!empty($payload)) { if ($signal == "new_units") { $settings['units'] = []; $retUnits = json_decode($payload, true); //return $retUnits; // Create activity $activity = $this->createActivity(); if ($settings['documentType'] == "") { $settings['documentType'] = "diveunit"; } if ($settings["batch_description"] == "") { $settings["batch_description"] = "Batch imported from DIVE dashboard"; } foreach ($retUnits as $unitContent) { $hashing = array(); $hashing["project"] = $settings["project"]; $hashing["documentType"] = $settings["documentType"]; $hashing["content"] = $unitContent; $hash = md5(serialize($hashing)); $searchForUnit = \Entity::where("hash", $hash)->first(); if ($searchForUnit != NULL) { //$units[$searchForUnit["_id"]] = $searchForUnit; array_push($settings['units'], $searchForUnit["_id"]); } else { $unit = new Unit(); $unit->project = $settings['project']; $unit->activity_id = $activity->_id; $unit->documentType = $settings['documentType']; $unit->type = "unit"; $unit->parents = []; $unit->content = $unitContent; $unit->hash = $hash; $unit->source = "divedashboard"; //return $unit; $unit->save(); $units[$unit->_id] = $unit; array_push($settings['units'], $unit->_id); // dd($settings['units']); } } // Create Batch $hashBatch = array(); $hashBatch["project"] = $settings["project"]; $hashBatch["batch_description"] = $settings["batch_description"]; $hashBatch["content"] = $settings["units"]; $settings['batch_title'] = "Imported batch from Dive dashboard"; $searchForBatch = \Entity::where("hash", md5(serialize($hashBatch)))->first(); if ($searchForBatch != NULL) { array_push($this->status['notice'], "Batch already exists " . $searchForBatch['_id'] . ""); } else { $batch = Batch::store($settings, $activity); } array_push($this->status['success'], "Successfully imported " . $settings['documentType'] . ""); array_push($this->status['success'], "Logged activities as " . $activity->_id . ""); return $this->status; } else { array_push($this->status['error'], "Unknown request from DIVE dashboard -- " . $signal . ""); return $this->status; } } else { array_push($this->status['error'], "The content of the units is empty -- " . $payload . ""); return $this->status; } } catch (Exception $e) { $activity->forceDelete(); foreach ($this->units as $unit) { if (!$unit->exists()) { $unit->forceDelete(); } } return $e; } }
/** * Every time you click a tab or the 'next' button, this function fires. * It combines the Input fields with the JobConfiguration that we already have in the Session. */ public function postFormPart($next) { if (Input::has('batch')) { // TODO: Validate for CSRF $batch = Batch::find(Input::get('batch')); // TODO -- is saving batch in the session a good idea ? Session::put('batch', serialize($batch)); } else { $batch = unserialize(Session::get('batch')); if (empty($batch)) { Session::flash('flashNotice', 'Please select a batch first.'); return Redirect::to("jobs2/batch"); } } return Redirect::to("jobs2/submit"); }
public function anyBatch() { if (Input::has('batch_description')) { $batch = new Batch(); $status = $batch->store(Input::all()); return Redirect::to('media/search'); } $units = Input::get('selection'); natsort($units); $units = array_values($units); $fields = explode("/", $units[0]); return View::make('media.pages.createbatch', compact('units', 'fields')); }
/** * Create worker unit */ public function process($document, $settings) { // increase memory usage to support larger files with up to 10k judgments ini_set('memory_limit', '256M'); set_time_limit(0); $this->status = ['notice' => [], 'success' => [], 'error' => []]; try { /* $workerUnits = Entity::where('documentType', 'workerunit')->select('_id')->get(); foreach($workerUnits as $workerUnit) { $entity = Entity::where('_id', $workerUnit->_id)->first(); $none = 0; $vector = $entity['annotationVector']; $vector['justification']['none'] = 0; if(array_sum($vector['justification']) == 0) { $none = 1; } $vector['justification']['none'] = $none; $entity['annotationVector'] = $vector; $entity->save(); } */ $settings['units'] = []; // keep a list of all unique units, crowdAgents and workerUnits so that we can rollback only the unique ones on error $units = []; $this->crowdAgents = []; $this->workerUnits = []; $this->duplicateUnits = 0; $this->duplicateCrowdAgents = 0; $this->duplicateWorkerUnits = 0; // read document content and put it into an array $data = $this->readCSV($document); // Create activity $activity = $this->createActivity(); // Create input file $file = new File(); $file->project = $settings['project']; $file->store($document, $settings, $activity); $file->save(); // log status if ($file->exists()) { array_push($this->status['notice'], "Existing file found (" . $file->_id . ")"); } else { array_push($this->status['success'], "File created (" . $file->_id . ")"); } // temporary mapping of unit ids to CrowdTruth unit ids $unitMap = []; // Detect if this is an AMT or CF file $column = []; if ($data[0][0] == "HITId") { // AMT $settings['platform'] = 'AMT'; $prefix = "Answer."; // Frefix for answer columns $startColumn = 27; $endColumn = count($data[0]); $column['submit_time'] = 18; $column['id'] = 14; $column['start_time'] = 17; $column['channel'] = 22; // empty $column['trust'] = 22; // empty $column['worker'] = 15; $column['country'] = 22; // empty $column['region'] = 22; // empty $column['city'] = 22; // empty } else { // CrowdFlower $settings['platform'] = 'CF'; $prefix = ""; $startColumn = 12; $endColumn = count($data[0]); $column['submit_time'] = 1; $column['id'] = 2; $column['start_time'] = 3; $column['channel'] = 5; $column['trust'] = 6; $column['worker'] = 7; $column['country'] = 8; $column['region'] = 9; $column['city'] = 10; } // Create Units $unitIds = array_keys(array_unique(array_column($data, 0))); for ($i = 1; $i < count($unitIds); $i++) { // Temp mapping of files to document type structures. This should be done using the preprocessing functions // Sounds if ($settings['project'] == 'Sounds' && $settings['documentType'] == 'sound') { $content = ['id' => $data[$unitIds[$i]][array_search('id', $data[0])], 'preview-hq-mp3' => $data[$unitIds[$i]][array_search('preview-hq-mp3', $data[0])]]; } $platform_id = $data[$unitIds[$i]][0]; // Passage Alignment if ($settings['documentType'] == 'passage_alignment') { $content = ['question' => ['id' => $data[$unitIds[$i]][array_search('id', $data[0])], 'passage' => $data[$unitIds[$i]][array_search('question', $data[0])]], 'answer' => ['id' => $data[$unitIds[$i]][array_search('id', $data[0])], 'passage' => $data[$unitIds[$i]][array_search('passage', $data[0])]]]; } // Passage Justification if ($settings['documentType'] == 'passage_justification') { $content = ['question' => ['id' => $data[$unitIds[$i]][array_search('Input.ID', $data[0])], 'passage' => $data[$unitIds[$i]][array_search('Input.Question', $data[0])]], 'answers' => []]; for ($k = 1; $k <= 6; $k++) { if ($data[$unitIds[$i]][array_search('Input.id' . $k, $data[0])] != "") { $content['answers'][$k] = ['id' => $data[$unitIds[$i]][array_search('Input.id' . $k, $data[0])], 'passage' => $data[$unitIds[$i]][array_search('Input.Passage' . $k, $data[0])]]; } } } // Passage Alignment if ($settings['project'] == 'Quantum' && $settings['documentType'] == 'sound') { $content = ['id' => $data[$unitIds[$i]][array_search('id', $data[0])], 'sound1' => ['id' => $data[$unitIds[$i]][array_search('s1_id', $data[0])], 'name' => $data[$unitIds[$i]][array_search('s1_name', $data[0])], 'description' => $data[$unitIds[$i]][array_search('s1_description', $data[0])], 'duration' => $data[$unitIds[$i]][array_search('s1_duration', $data[0])], 'url' => $data[$unitIds[$i]][array_search('s1_url', $data[0])]], 'sound2' => ['id' => $data[$unitIds[$i]][array_search('s2_id', $data[0])], 'name' => $data[$unitIds[$i]][array_search('s2_name', $data[0])], 'description' => $data[$unitIds[$i]][array_search('s2_description', $data[0])], 'duration' => $data[$unitIds[$i]][array_search('s2_duration', $data[0])], 'url' => $data[$unitIds[$i]][array_search('s2_url', $data[0])]]]; } $unit = new Unit(); $unit->project = $settings['project']; $unit->activity_id = $activity->_id; $unit->documentType = $settings['documentType']; $unit->type = "unit"; $unit->parents = [$file->_id]; $unit->content = $content; $unit->hash = md5(serialize($content)); $unit->platformId = $platform_id; $unit->save(); $units[$unit->_id] = $unit; $unitMap[$data[$unitIds[$i]][0]] = $unit->_id; } // Create Batch $settings['units'] = array_keys($units); $settings['batch_title'] = "Imported batch"; $settings['batch_description'] = "Batch added via result importer"; $batch = Batch::store($settings, $activity); // Create job configuration $unitCount = count(array_unique(array_column($data, 0))) - 1; // Get number of judgments per unit $settings['judgmentsPerUnit'] = round((count($data) - 1) / $unitCount); $jobconfig = $this->createJobconf($activity->id, $settings); // Create job $job = $this->createJob($jobconfig->_id, $activity->_id, $batch, $settings); // temp for sounds, create annotation vector for each unit $annVector = []; $result = []; // passage alignment if ($settings['documentType'] == 'passage_alignment') { for ($i = 1; $i < count($data); $i++) { for ($j = 0; $j < 30; $j++) { // for each passage get the tags if ($data[$i][array_search('rel' . $j, $data[0])] != "") { $term1 = $data[$i][array_search('rel' . $j . 'a', $data[0])]; $term2 = $data[$i][array_search('rel' . $j . 'b', $data[0])]; $key = $term1 . ',' . $term2; // add keyword to list of keywords for this unit if (!isset($annVector[$data[$i][0]][$key])) { $annVector[$data[$i][0]][$key] = 0; } } } } } // Passage Justification if ($settings['documentType'] == 'passage_justification') { $questionTypes = ['Subjective' => 0, 'YesNo' => 0, 'NotYesNo' => 0, 'Unanswerable' => 0]; $answers = ['Noanswer' => 0, 'Yes' => 0, 'No' => 0, 'Other' => 0, 'Unanswerable' => 0]; for ($i = 1; $i < count($data); $i++) { // add answer possibilities to hit $annVector[$data[$i][0]]['question'] = $questionTypes; $annVector[$data[$i][0]]['answer'] = $answers; // add existing passages to vector for ($k = 1; $k <= 6; $k++) { if ($data[$i][array_search('Input.id' . $k, $data[0])] != "") { $annVector[$data[$i][0]]['justification']['p' . $data[$i][array_search('Input.id' . $k, $data[0])]] = 0; } } } } // Sounds if ($settings['documentType'] == 'sound') { for ($i = 1; $i < count($data); $i++) { // for each keywords $keywords = explode(',', $data[$i][array_search('keywords', $data[0])]); foreach ($keywords as $keyword) { $keyword = trim(strtolower(str_replace('.', '', $keyword))); if ($keyword != "") { // add keyword to list of keywords for this unit if (!isset($annVector[$data[$i][0]][$keyword])) { $annVector[$data[$i][0]][$keyword] = 0; } } } $result[$unitMap[$data[$i][0]]] = ['keywords' => $annVector[$data[$i][0]]]; } } // loop through all the judgments and add workerUnits, media units and CrowdAgents. for ($i = 1; $i < count($data); $i++) { // loop through all values in the file, and add them as content $content = []; for ($c = $startColumn; $c < $endColumn; $c++) { $key = str_replace('.', '_', $data[0][$c]); $content[$key] = $data[$i][$c]; } $trust = 1; $vector = $annVector[$data[$i][0]]; $settings['contradiction'] = 0; // Create CrowdAgent $crowdAgent = $this->createCrowdAgent($data[$i][$column['worker']], $data[$i][$column['country']], $data[$i][$column['region']], $data[$i][$column['city']], $trust, $settings); // Create WorkerUnit $workerUnit = $this->createWorkerUnit($activity->_id, $unitMap[$data[$i][0]], $data[$i][$column['start_time']], $data[$i][$column['channel']], $trust, $content, $crowdAgent->_id, $job->_id, $data[$i][$column['id']], $data[$i][$column['submit_time']], $settings); } /* // aggregate all results $result = array(); $annotations = Entity::where("documentType", "=", "workerunit")->where("job_id", "=", $job->_id)->get(); $count = 0; foreach($annotations as $workerUnit){ $uid = $workerUnit->unit_id; // to prevent mongoException: zero length key not allowed. Could also 'continue;' if(empty($uid)) $uid = 'unknown'; else $count++; if(!isset($result[$uid])) $result[$uid] = $workerUnit->annotationVector; else { foreach($workerUnit->annotationVector as $key=>$val){ if(is_array($val)){ // term1 -> [k] -> 1 foreach($val as $k=>$v){ //if(isset($result[$uid][$key][$k])) $result[$uid][$key][$k]+=$v; //else $result[$uid][$key][$k]=$v; // THIS SHOULDN'T HAPPEN } } else { // [key] -> 1 //if(isset($result[$uid][$key])) $result[$uid][$key]+=$val; //else $result[$uid][$key]=$val; // THIS SHOULDN'T HAPPEN } } } } if(!isset($job->results)){ $job->results = array('withSpam' => $result); } else { $r = $job->results; $r['withSpam'] = $result; $job->results = $r; } $job->update(); $job_id = 'entity/text/opendomain/job/94'; $job = Job::where('_id', $job_id)->first(); // metrics $template = 'entity/text/medical/FactSpan/Factor_Span/0'; exec('C:\Users\IBM_ADMIN\AppData\Local\Enthought\Canopy\User\python.exe ' . base_path() . '/app/lib/generateMetrics.py '.$job->_id.' '.$template, $output, $error); $job->JobConfiguration->replicate(); // save metrics in the job $response = json_decode($output[0],true); $job->metrics = $response['metrics']; $r = $job->results; $r['withoutSpam'] = $response['results']['withoutSpam']; $job->results = $r; $job->save(); $jobs = Job::select('_id')->get(); foreach($jobs as $jobId) { $job = Job::where('_id', $jobId->_id)->first(); // update job cache \Queue::push('Queues\UpdateJob', array('job' => serialize($job))); } */ // update job cache \Queue::push('Queues\\UpdateJob', array('job' => serialize($job))); // Notice that units already existed in the database if ($this->duplicateUnits > 0) { array_push($this->status['notice'], "Existing units found (" . $this->duplicateUnits . ")"); } if ($this->duplicateCrowdAgents > 0) { array_push($this->status['notice'], "Existing crowd agents found (" . $this->duplicateCrowdAgents . ")"); } if ($this->duplicateWorkerUnits > 0) { array_push($this->status['notice'], "Existing judgements found (" . $this->duplicateWorkerUnits . ")"); } // Job's done! array_push($this->status['success'], "Successfully imported " . $settings['filename'] . ""); array_push($this->status['success'], "Logged activities as " . $activity->_id . ""); return $this->status; } catch (Exception $e) { $activity->forceDelete(); if (!$file->exists()) { $file->forceDelete(); } if (!$jobconfig->_existing) { $jobconfig->forceDelete(); } $job->forceDelete(); foreach ($this->units as $unit) { if (!$unit->exists()) { $jobconfig->forceDelete(); } } foreach ($this->crowdAgents as $crowdAgent) { if (!$crowdAgent->_existing) { $crowdAgent->forceDelete(); } } foreach ($this->workerUnits as $workerUnit) { $workerUnit->forceDelete(); } return $e; } }