示例#1
0
 public static function store($settings)
 {
     try {
         // Create the SoftwareAgent if it doesnt exist
         //SoftwareAgent::store('batchcreator', 'Batch creation');
         $batch = new Batch();
         $batch->_id = $batch->_id;
         $batch->title = $settings['batch_title'];
         $batch->content = $settings['batch_description'];
         $batch->project = $settings['project'];
         $batch->parents = $settings['units'];
         $batch->size = count($settings['units']);
         $hashing = array();
         $hashing["project"] = $settings['project'];
         $hashing["content"] = $settings['units'];
         $batch->hash = md5(serialize($hashing));
         $batch->save();
         Queue::push('Queues\\UpdateUnits', $settings['units']);
         return $batch;
     } catch (Exception $e) {
         // Something went wrong with creating the Batch
         $activity->forceDelete();
         $batch->forceDelete();
         return false;
     }
 }
示例#2
0
 /**
  * @param $data = array(unitids)
  */
 public function fire($job, $data)
 {
     foreach ($data as $id) {
         set_time_limit(600);
         $unit = Entity::id($id)->first();
         $batches = count(Batch::whereIn('parents', [$unit->_id])->get()->toArray());
         $workerunit = array('count' => 0, 'spam' => 0, 'nonSpam' => 0);
         $workerlist = $workersspam = $workersnonspam = $joblist = array();
         foreach (Workerunit::where('unit_id', $unit->_id)->get() as $a) {
             $joblist[] = $a->job_id;
             $workerlist[] = $a->crowdAgent_id;
             if ($a->spam) {
                 $workerunit['spam']++;
                 $workersspam[] = $a->crowdAgent_id;
             } else {
                 $workerunit['nonSpam']++;
                 $workersnonspam[] = $a->crowdAgent_id;
             }
         }
         $workerunit['count'] = $workerunit['spam'] + $workerunit['nonSpam'];
         $workers['count'] = count(array_unique($workerlist));
         $workers['spam'] = count(array_unique($workersspam));
         $workers['nonSpam'] = count(array_unique($workersnonspam));
         $workers['potentialSpam'] = count(array_intersect($workersspam, $workersnonspam));
         $platformField = array();
         $platformField['cf'] = count(Entity::where('unit_id', $unit->_id)->where('softwareAgent_id', 'cf')->get()->toArray());
         $platformField['amt'] = count(Entity::where('unit_id', $unit->_id)->where('softwareAgent_id', 'amt')->get()->toArray());
         //filtered
         $filteredField = array();
         $filteredField['job_ids'] = array_flatten(Job::where('metrics.filteredUnits.list', 'all', array($unit['_id']))->get(['_id'])->toArray());
         $filteredField['count'] = count($filteredField['job_ids']);
         $derivatives = Entity::whereIn('parents', array($unit->_id))->lists('_id');
         $children["count"] = count($derivatives);
         $children["list"] = $derivatives;
         $unit->cache = ["jobs" => 1, "workers" => $workers, "softwareAgent" => $platformField, "workerunits" => $workerunit, "filtered" => $filteredField, "batches" => $batches, "children" => $children];
         $avg_clarity = Entity::where('metrics.units.withoutSpam.' . $unit->_id, 'exists', 'true')->avg('metrics.units.withoutSpam.' . $unit->id . '.avg.max_relation_Cos');
         if (!isset($avg_clarity)) {
             $avg_clarity = 0;
         }
         $unit->avg_clarity = $avg_clarity;
         $unit->update();
         \Log::debug("Updated unit {$unit->_id}.");
     }
     $job->delete();
     // the Queue job...
 }
 /**
  * Create worker unit
  */
 public function process($signal, $payload, $settings)
 {
     //return "blabla";
     // increase memory usage to support larger files with up to 10k judgments
     ini_set('memory_limit', '256M');
     set_time_limit(0);
     $this->status = ['notice' => [], 'success' => [], 'error' => []];
     try {
         if (!empty($payload)) {
             if ($signal == "new_units") {
                 $settings['units'] = [];
                 $retUnits = json_decode($payload, true);
                 //return $retUnits;
                 // Create activity
                 $activity = $this->createActivity();
                 if ($settings['documentType'] == "") {
                     $settings['documentType'] = "diveunit";
                 }
                 if ($settings["batch_description"] == "") {
                     $settings["batch_description"] = "Batch imported from DIVE dashboard";
                 }
                 foreach ($retUnits as $unitContent) {
                     $hashing = array();
                     $hashing["project"] = $settings["project"];
                     $hashing["documentType"] = $settings["documentType"];
                     $hashing["content"] = $unitContent;
                     $hash = md5(serialize($hashing));
                     $searchForUnit = \Entity::where("hash", $hash)->first();
                     if ($searchForUnit != NULL) {
                         //$units[$searchForUnit["_id"]] = $searchForUnit;
                         array_push($settings['units'], $searchForUnit["_id"]);
                     } else {
                         $unit = new Unit();
                         $unit->project = $settings['project'];
                         $unit->activity_id = $activity->_id;
                         $unit->documentType = $settings['documentType'];
                         $unit->type = "unit";
                         $unit->parents = [];
                         $unit->content = $unitContent;
                         $unit->hash = $hash;
                         $unit->source = "divedashboard";
                         //return $unit;
                         $unit->save();
                         $units[$unit->_id] = $unit;
                         array_push($settings['units'], $unit->_id);
                         //	dd($settings['units']);
                     }
                 }
                 // Create Batch
                 $hashBatch = array();
                 $hashBatch["project"] = $settings["project"];
                 $hashBatch["batch_description"] = $settings["batch_description"];
                 $hashBatch["content"] = $settings["units"];
                 $settings['batch_title'] = "Imported batch from Dive dashboard";
                 $searchForBatch = \Entity::where("hash", md5(serialize($hashBatch)))->first();
                 if ($searchForBatch != NULL) {
                     array_push($this->status['notice'], "Batch already exists " . $searchForBatch['_id'] . "");
                 } else {
                     $batch = Batch::store($settings, $activity);
                 }
                 array_push($this->status['success'], "Successfully imported " . $settings['documentType'] . "");
                 array_push($this->status['success'], "Logged activities as " . $activity->_id . "");
                 return $this->status;
             } else {
                 array_push($this->status['error'], "Unknown request from DIVE dashboard -- " . $signal . "");
                 return $this->status;
             }
         } else {
             array_push($this->status['error'], "The content of the units is empty -- " . $payload . "");
             return $this->status;
         }
     } catch (Exception $e) {
         $activity->forceDelete();
         foreach ($this->units as $unit) {
             if (!$unit->exists()) {
                 $unit->forceDelete();
             }
         }
         return $e;
     }
 }
示例#4
0
 /**
  * Every time you click a tab or the 'next' button, this function fires. 
  * It combines the Input fields with the JobConfiguration that we already have in the Session.
  */
 public function postFormPart($next)
 {
     if (Input::has('batch')) {
         // TODO: Validate for CSRF
         $batch = Batch::find(Input::get('batch'));
         // TODO -- is saving batch in the session a good idea ?
         Session::put('batch', serialize($batch));
     } else {
         $batch = unserialize(Session::get('batch'));
         if (empty($batch)) {
             Session::flash('flashNotice', 'Please select a batch first.');
             return Redirect::to("jobs2/batch");
         }
     }
     return Redirect::to("jobs2/submit");
 }
示例#5
0
 public function anyBatch()
 {
     if (Input::has('batch_description')) {
         $batch = new Batch();
         $status = $batch->store(Input::all());
         return Redirect::to('media/search');
     }
     $units = Input::get('selection');
     natsort($units);
     $units = array_values($units);
     $fields = explode("/", $units[0]);
     return View::make('media.pages.createbatch', compact('units', 'fields'));
 }
示例#6
0
 /**
  * Create worker unit
  */
 public function process($document, $settings)
 {
     // increase memory usage to support larger files with up to 10k judgments
     ini_set('memory_limit', '256M');
     set_time_limit(0);
     $this->status = ['notice' => [], 'success' => [], 'error' => []];
     try {
         /*
         $workerUnits = Entity::where('documentType', 'workerunit')->select('_id')->get();
         foreach($workerUnits as $workerUnit) {
         	$entity = Entity::where('_id', $workerUnit->_id)->first();
         	
         	$none = 0;
         	$vector = $entity['annotationVector'];
         	$vector['justification']['none'] = 0;
         	if(array_sum($vector['justification']) == 0) {
         		$none = 1;
         	}
         	$vector['justification']['none'] = $none;
         	$entity['annotationVector'] = $vector;
         	$entity->save();
         }
         */
         $settings['units'] = [];
         // keep a list of all unique units, crowdAgents and workerUnits so that we can rollback only the unique ones on error
         $units = [];
         $this->crowdAgents = [];
         $this->workerUnits = [];
         $this->duplicateUnits = 0;
         $this->duplicateCrowdAgents = 0;
         $this->duplicateWorkerUnits = 0;
         // read document content and put it into an array
         $data = $this->readCSV($document);
         // Create activity
         $activity = $this->createActivity();
         // Create input file
         $file = new File();
         $file->project = $settings['project'];
         $file->store($document, $settings, $activity);
         $file->save();
         // log status
         if ($file->exists()) {
             array_push($this->status['notice'], "Existing file found (" . $file->_id . ")");
         } else {
             array_push($this->status['success'], "File created (" . $file->_id . ")");
         }
         // temporary mapping of unit ids to CrowdTruth unit ids
         $unitMap = [];
         // Detect if this is an AMT or CF file
         $column = [];
         if ($data[0][0] == "HITId") {
             // AMT
             $settings['platform'] = 'AMT';
             $prefix = "Answer.";
             // Frefix for answer columns
             $startColumn = 27;
             $endColumn = count($data[0]);
             $column['submit_time'] = 18;
             $column['id'] = 14;
             $column['start_time'] = 17;
             $column['channel'] = 22;
             // empty
             $column['trust'] = 22;
             // empty
             $column['worker'] = 15;
             $column['country'] = 22;
             // empty
             $column['region'] = 22;
             // empty
             $column['city'] = 22;
             // empty
         } else {
             // CrowdFlower
             $settings['platform'] = 'CF';
             $prefix = "";
             $startColumn = 12;
             $endColumn = count($data[0]);
             $column['submit_time'] = 1;
             $column['id'] = 2;
             $column['start_time'] = 3;
             $column['channel'] = 5;
             $column['trust'] = 6;
             $column['worker'] = 7;
             $column['country'] = 8;
             $column['region'] = 9;
             $column['city'] = 10;
         }
         // Create Units
         $unitIds = array_keys(array_unique(array_column($data, 0)));
         for ($i = 1; $i < count($unitIds); $i++) {
             // Temp mapping of files to document type structures. This should be done using the preprocessing functions
             // Sounds
             if ($settings['project'] == 'Sounds' && $settings['documentType'] == 'sound') {
                 $content = ['id' => $data[$unitIds[$i]][array_search('id', $data[0])], 'preview-hq-mp3' => $data[$unitIds[$i]][array_search('preview-hq-mp3', $data[0])]];
             }
             $platform_id = $data[$unitIds[$i]][0];
             // Passage Alignment
             if ($settings['documentType'] == 'passage_alignment') {
                 $content = ['question' => ['id' => $data[$unitIds[$i]][array_search('id', $data[0])], 'passage' => $data[$unitIds[$i]][array_search('question', $data[0])]], 'answer' => ['id' => $data[$unitIds[$i]][array_search('id', $data[0])], 'passage' => $data[$unitIds[$i]][array_search('passage', $data[0])]]];
             }
             // Passage Justification
             if ($settings['documentType'] == 'passage_justification') {
                 $content = ['question' => ['id' => $data[$unitIds[$i]][array_search('Input.ID', $data[0])], 'passage' => $data[$unitIds[$i]][array_search('Input.Question', $data[0])]], 'answers' => []];
                 for ($k = 1; $k <= 6; $k++) {
                     if ($data[$unitIds[$i]][array_search('Input.id' . $k, $data[0])] != "") {
                         $content['answers'][$k] = ['id' => $data[$unitIds[$i]][array_search('Input.id' . $k, $data[0])], 'passage' => $data[$unitIds[$i]][array_search('Input.Passage' . $k, $data[0])]];
                     }
                 }
             }
             // Passage Alignment
             if ($settings['project'] == 'Quantum' && $settings['documentType'] == 'sound') {
                 $content = ['id' => $data[$unitIds[$i]][array_search('id', $data[0])], 'sound1' => ['id' => $data[$unitIds[$i]][array_search('s1_id', $data[0])], 'name' => $data[$unitIds[$i]][array_search('s1_name', $data[0])], 'description' => $data[$unitIds[$i]][array_search('s1_description', $data[0])], 'duration' => $data[$unitIds[$i]][array_search('s1_duration', $data[0])], 'url' => $data[$unitIds[$i]][array_search('s1_url', $data[0])]], 'sound2' => ['id' => $data[$unitIds[$i]][array_search('s2_id', $data[0])], 'name' => $data[$unitIds[$i]][array_search('s2_name', $data[0])], 'description' => $data[$unitIds[$i]][array_search('s2_description', $data[0])], 'duration' => $data[$unitIds[$i]][array_search('s2_duration', $data[0])], 'url' => $data[$unitIds[$i]][array_search('s2_url', $data[0])]]];
             }
             $unit = new Unit();
             $unit->project = $settings['project'];
             $unit->activity_id = $activity->_id;
             $unit->documentType = $settings['documentType'];
             $unit->type = "unit";
             $unit->parents = [$file->_id];
             $unit->content = $content;
             $unit->hash = md5(serialize($content));
             $unit->platformId = $platform_id;
             $unit->save();
             $units[$unit->_id] = $unit;
             $unitMap[$data[$unitIds[$i]][0]] = $unit->_id;
         }
         // Create Batch
         $settings['units'] = array_keys($units);
         $settings['batch_title'] = "Imported batch";
         $settings['batch_description'] = "Batch added via result importer";
         $batch = Batch::store($settings, $activity);
         // Create job configuration
         $unitCount = count(array_unique(array_column($data, 0))) - 1;
         // Get number of judgments per unit
         $settings['judgmentsPerUnit'] = round((count($data) - 1) / $unitCount);
         $jobconfig = $this->createJobconf($activity->id, $settings);
         // Create job
         $job = $this->createJob($jobconfig->_id, $activity->_id, $batch, $settings);
         // temp for sounds, create annotation vector for each unit
         $annVector = [];
         $result = [];
         // passage alignment
         if ($settings['documentType'] == 'passage_alignment') {
             for ($i = 1; $i < count($data); $i++) {
                 for ($j = 0; $j < 30; $j++) {
                     // for each passage get the tags
                     if ($data[$i][array_search('rel' . $j, $data[0])] != "") {
                         $term1 = $data[$i][array_search('rel' . $j . 'a', $data[0])];
                         $term2 = $data[$i][array_search('rel' . $j . 'b', $data[0])];
                         $key = $term1 . ',' . $term2;
                         // add keyword to list of keywords for this unit
                         if (!isset($annVector[$data[$i][0]][$key])) {
                             $annVector[$data[$i][0]][$key] = 0;
                         }
                     }
                 }
             }
         }
         // Passage Justification
         if ($settings['documentType'] == 'passage_justification') {
             $questionTypes = ['Subjective' => 0, 'YesNo' => 0, 'NotYesNo' => 0, 'Unanswerable' => 0];
             $answers = ['Noanswer' => 0, 'Yes' => 0, 'No' => 0, 'Other' => 0, 'Unanswerable' => 0];
             for ($i = 1; $i < count($data); $i++) {
                 // add answer possibilities to hit
                 $annVector[$data[$i][0]]['question'] = $questionTypes;
                 $annVector[$data[$i][0]]['answer'] = $answers;
                 // add existing passages to vector
                 for ($k = 1; $k <= 6; $k++) {
                     if ($data[$i][array_search('Input.id' . $k, $data[0])] != "") {
                         $annVector[$data[$i][0]]['justification']['p' . $data[$i][array_search('Input.id' . $k, $data[0])]] = 0;
                     }
                 }
             }
         }
         // Sounds
         if ($settings['documentType'] == 'sound') {
             for ($i = 1; $i < count($data); $i++) {
                 // for each keywords
                 $keywords = explode(',', $data[$i][array_search('keywords', $data[0])]);
                 foreach ($keywords as $keyword) {
                     $keyword = trim(strtolower(str_replace('.', '', $keyword)));
                     if ($keyword != "") {
                         // add keyword to list of keywords for this unit
                         if (!isset($annVector[$data[$i][0]][$keyword])) {
                             $annVector[$data[$i][0]][$keyword] = 0;
                         }
                     }
                 }
                 $result[$unitMap[$data[$i][0]]] = ['keywords' => $annVector[$data[$i][0]]];
             }
         }
         // loop through all the judgments and add workerUnits, media units and CrowdAgents.
         for ($i = 1; $i < count($data); $i++) {
             // loop through all values in the file, and add them as content
             $content = [];
             for ($c = $startColumn; $c < $endColumn; $c++) {
                 $key = str_replace('.', '_', $data[0][$c]);
                 $content[$key] = $data[$i][$c];
             }
             $trust = 1;
             $vector = $annVector[$data[$i][0]];
             $settings['contradiction'] = 0;
             // Create CrowdAgent
             $crowdAgent = $this->createCrowdAgent($data[$i][$column['worker']], $data[$i][$column['country']], $data[$i][$column['region']], $data[$i][$column['city']], $trust, $settings);
             // Create WorkerUnit
             $workerUnit = $this->createWorkerUnit($activity->_id, $unitMap[$data[$i][0]], $data[$i][$column['start_time']], $data[$i][$column['channel']], $trust, $content, $crowdAgent->_id, $job->_id, $data[$i][$column['id']], $data[$i][$column['submit_time']], $settings);
         }
         /*
         			// aggregate all results
         			$result = array();
         			$annotations = Entity::where("documentType", "=", "workerunit")->where("job_id", "=", $job->_id)->get();
         			$count = 0;
         			foreach($annotations as $workerUnit){
         			   $uid = $workerUnit->unit_id; // to prevent mongoException: zero length key not allowed. Could also 'continue;'
         			   if(empty($uid)) $uid = 'unknown';
         				   else $count++;
         
         			   if(!isset($result[$uid]))
         				   $result[$uid] = $workerUnit->annotationVector;
         			   else {
         				   foreach($workerUnit->annotationVector as $key=>$val){
         					   if(is_array($val)){ // term1 -> [k] -> 1
         						   foreach($val as $k=>$v){
         							   //if(isset($result[$uid][$key][$k]))
         								   $result[$uid][$key][$k]+=$v;
         							   //else $result[$uid][$key][$k]=$v; // THIS SHOULDN'T HAPPEN
         						   }
         					   } else {            // [key] -> 1
         						   //if(isset($result[$uid][$key]))
         							   $result[$uid][$key]+=$val;
         						   //else $result[$uid][$key]=$val; // THIS SHOULDN'T HAPPEN
         					   }
         				   }
         			   }
         			}
         
         			if(!isset($job->results)){
         			   $job->results = array('withSpam' => $result);
         			} else {
         			   $r = $job->results;
         			   $r['withSpam'] = $result;
         			   $job->results = $r;
         			}
         			$job->update();
         
         			$job_id = 'entity/text/opendomain/job/94';
         			$job = Job::where('_id', $job_id)->first();
         			// metrics
         			$template = 'entity/text/medical/FactSpan/Factor_Span/0';
         			exec('C:\Users\IBM_ADMIN\AppData\Local\Enthought\Canopy\User\python.exe ' . base_path()  . '/app/lib/generateMetrics.py '.$job->_id.' '.$template, $output, $error);
         			$job->JobConfiguration->replicate();
         			// save metrics in the job
         			$response = json_decode($output[0],true);
         			$job->metrics = $response['metrics'];
         			$r = $job->results;
         			$r['withoutSpam'] = $response['results']['withoutSpam'];
         			$job->results = $r;
         			$job->save();
         
         	
         			$jobs = Job::select('_id')->get();
         			foreach($jobs as $jobId) {
         				$job = Job::where('_id', $jobId->_id)->first();
         				
         				// update job cache
         				\Queue::push('Queues\UpdateJob', array('job' => serialize($job)));
         			}
         */
         // update job cache
         \Queue::push('Queues\\UpdateJob', array('job' => serialize($job)));
         // Notice that units already existed in the database
         if ($this->duplicateUnits > 0) {
             array_push($this->status['notice'], "Existing units found (" . $this->duplicateUnits . ")");
         }
         if ($this->duplicateCrowdAgents > 0) {
             array_push($this->status['notice'], "Existing crowd agents found (" . $this->duplicateCrowdAgents . ")");
         }
         if ($this->duplicateWorkerUnits > 0) {
             array_push($this->status['notice'], "Existing judgements found (" . $this->duplicateWorkerUnits . ")");
         }
         // Job's done!
         array_push($this->status['success'], "Successfully imported " . $settings['filename'] . "");
         array_push($this->status['success'], "Logged activities as " . $activity->_id . "");
         return $this->status;
     } catch (Exception $e) {
         $activity->forceDelete();
         if (!$file->exists()) {
             $file->forceDelete();
         }
         if (!$jobconfig->_existing) {
             $jobconfig->forceDelete();
         }
         $job->forceDelete();
         foreach ($this->units as $unit) {
             if (!$unit->exists()) {
                 $jobconfig->forceDelete();
             }
         }
         foreach ($this->crowdAgents as $crowdAgent) {
             if (!$crowdAgent->_existing) {
                 $crowdAgent->forceDelete();
             }
         }
         foreach ($this->workerUnits as $workerUnit) {
             $workerUnit->forceDelete();
         }
         return $e;
     }
 }