/**
  * Return view for configuring preprocessing.
  */
 public function getConfigure()
 {
     $URI = Input::get('URI');
     if ($document = File::where('_id', $URI)->first()) {
         // Load which functions are available for display
         $functions = $this->getAvailableFunctions();
         $newLine = "\n";
         $docPreview = $document['content'];
         $project = $document['project'];
         $docPreview = explode($newLine, $docPreview);
         $docPreview = array_slice($docPreview, 0, $this->nLines);
         $docPreview = implode($newLine, $docPreview);
         $docTypes = Unit::select('documentType')->where('project', $document->project)->distinct()->get()->toArray();
         // default preview of files
         $previewTable = $this->doPreviewTable($document, '"', ',', false);
         return View::make('media.preprocess.text.pages.configure')->with('URI', $URI)->with('docTitle', $document['title'])->with('docPreview', $docPreview)->with('functions', $functions)->with('project', $project)->with('previewTable', $previewTable)->with('docTypes', $docTypes);
     } else {
         return Redirect::back()->with('flashError', 'No valid URI given: ' . $URI);
     }
 }
 public function getActions()
 {
     $entities = Unit::where('documentType', '=', 'tv-news-broadcasts')->get();
     if (count($entities) > 0) {
         return View::make('media.preprocess.metadatadescription.pages.actions', compact('entities'));
     }
     return Redirect::to('media/upload')->with('flashNotice', 'You have not uploaded any "tv-news-broadcasts" documents yet');
     $items = Cart::content();
     if (count($items) > 0) {
         $entities = array();
         foreach ($items as $item) {
             if ($entity = $this->repository->find($item['id'])) {
                 if ($entity->documentType != "tv-news-broadcasts") {
                     continue;
                 }
                 $entity['rowid'] = $item['rowid'];
                 array_push($entities, $entity);
             }
         }
         return View::make('media.preprocess.metadatadescription.pages.actions', compact('entities'));
     }
     return Redirect::to('media/browse')->with('flashNotice', 'You have not added any "tv-news-broadcasts" items to your selection yet');
 }
 /**
  * Create worker unit
  */
 public function process($signal, $payload, $settings)
 {
     //return "blabla";
     // increase memory usage to support larger files with up to 10k judgments
     ini_set('memory_limit', '256M');
     set_time_limit(0);
     $this->status = ['notice' => [], 'success' => [], 'error' => []];
     try {
         if (!empty($payload)) {
             if ($signal == "new_units") {
                 $settings['units'] = [];
                 $retUnits = json_decode($payload, true);
                 //return $retUnits;
                 // Create activity
                 $activity = $this->createActivity();
                 if ($settings['documentType'] == "") {
                     $settings['documentType'] = "diveunit";
                 }
                 if ($settings["batch_description"] == "") {
                     $settings["batch_description"] = "Batch imported from DIVE dashboard";
                 }
                 foreach ($retUnits as $unitContent) {
                     $hashing = array();
                     $hashing["project"] = $settings["project"];
                     $hashing["documentType"] = $settings["documentType"];
                     $hashing["content"] = $unitContent;
                     $hash = md5(serialize($hashing));
                     $searchForUnit = \Entity::where("hash", $hash)->first();
                     if ($searchForUnit != NULL) {
                         //$units[$searchForUnit["_id"]] = $searchForUnit;
                         array_push($settings['units'], $searchForUnit["_id"]);
                     } else {
                         $unit = new Unit();
                         $unit->project = $settings['project'];
                         $unit->activity_id = $activity->_id;
                         $unit->documentType = $settings['documentType'];
                         $unit->type = "unit";
                         $unit->parents = [];
                         $unit->content = $unitContent;
                         $unit->hash = $hash;
                         $unit->source = "divedashboard";
                         //return $unit;
                         $unit->save();
                         $units[$unit->_id] = $unit;
                         array_push($settings['units'], $unit->_id);
                         //	dd($settings['units']);
                     }
                 }
                 // Create Batch
                 $hashBatch = array();
                 $hashBatch["project"] = $settings["project"];
                 $hashBatch["batch_description"] = $settings["batch_description"];
                 $hashBatch["content"] = $settings["units"];
                 $settings['batch_title'] = "Imported batch from Dive dashboard";
                 $searchForBatch = \Entity::where("hash", md5(serialize($hashBatch)))->first();
                 if ($searchForBatch != NULL) {
                     array_push($this->status['notice'], "Batch already exists " . $searchForBatch['_id'] . "");
                 } else {
                     $batch = Batch::store($settings, $activity);
                 }
                 array_push($this->status['success'], "Successfully imported " . $settings['documentType'] . "");
                 array_push($this->status['success'], "Logged activities as " . $activity->_id . "");
                 return $this->status;
             } else {
                 array_push($this->status['error'], "Unknown request from DIVE dashboard -- " . $signal . "");
                 return $this->status;
             }
         } else {
             array_push($this->status['error'], "The content of the units is empty -- " . $payload . "");
             return $this->status;
         }
     } catch (Exception $e) {
         $activity->forceDelete();
         foreach ($this->units as $unit) {
             if (!$unit->exists()) {
                 $unit->forceDelete();
             }
         }
         return $e;
     }
 }
Exemple #4
0
 public function store($documentType, $parameters, $noOfVideos)
 {
     //fastcgi_finish_request();
     $listOfVideoIdentifiers = array();
     $this->listRecords($parameters, $noOfVideos, $listOfVideoIdentifiers);
     // get list of existing projects
     $projects = ProjectHandler::listProjects();
     //	dd("done");
     $status = array();
     try {
         $this->createOpenimagesVideoGetterSoftwareAgent();
     } catch (Exception $e) {
         $status['error']['OnlineData'] = $e->getMessage();
         return $status;
     }
     try {
         $activity = new Activity();
         $activity->softwareAgent_id = "openimagesgetter";
         $activity->save();
     } catch (Exception $e) {
         // Something went wrong with creating the Activity
         $status['error']['OnlineData'] = $e->getMessage();
         $activity->forceDelete();
         return $status;
     }
     $count["count"] = 0;
     foreach ($listOfVideoIdentifiers as $video) {
         $title = $video;
         try {
             $entity = new Unit();
             $entity->_id = $entity->_id;
             $entity->title = strtolower($title);
             $entity->documentType = $documentType;
             $entity->source = "openimages";
             $entity->project = "soundandvision";
             $entity->type = "unit";
             $videoMetadata = $this->getRecord($video, $parameters["metadataPrefix"]);
             $entity->content = $videoMetadata["content"];
             $parents = array();
             $entity->parents = $parents;
             $entity->tags = array("unit");
             $entity->segments = $count;
             $entity->keyframes = $count;
             $hashing = array();
             $hashing["content"] = $entity->content;
             $hashing["project"] = $entity->project;
             $entity->hash = md5(serialize($hashing));
             $entity->activity_id = $activity->_id;
             $entity->save();
             $status['success'][$title] = $title . " was successfully uploaded. (URI: {$entity->_id})";
             // add the project if it doesnt exist yet
             if (!in_array($entity->project, $projects)) {
                 ProjectHandler::createGroup($entity->project);
                 // add the project to the temporary list
                 array_push($projects, $entity->project);
             }
             // add the user to the project if it has no access yet
             if (!ProjectHandler::inGroup($entity->user_id, $entity->project)) {
                 $user = UserAgent::find($entity->user_id);
                 ProjectHandler::grantUser($user, $entity->project, Roles::PROJECT_MEMBER);
             }
         } catch (Exception $e) {
             // Something went wrong with creating the Entity
             $activity->forceDelete();
             $entity->forceDelete();
             $status['error'][$title] = $e->getMessage();
         }
     }
     $status["recno"] = count($listOfVideoIdentifiers);
     return $status;
 }
 private function userDocTypes()
 {
     // get all projects a user has access to
     $projects = ProjectHandler::getUserProjects(Auth::user());
     $projects = array_column($projects, 'name');
     $types = [];
     $allunits = 0;
     $searchComponent = new MediaSearchComponent();
     // for each project get the document types in it
     foreach ($projects as $key => $project) {
         $docTypes = Unit::distinct('documentType')->where('project', $project)->get()->toArray();
         // skip if there is no data
         if (!empty($docTypes[0])) {
             // for each document type get the number of units
             $types[$project] = [];
             foreach ($docTypes as $key => $type) {
                 $count = Unit::where('project', $project)->where('documentType', $type[0])->count();
                 $allunits += $count;
                 $types[$project][$type[0]] = $count;
             }
         }
     }
     return [$types, $allunits];
 }
 /**
  * Create worker unit
  */
 public function process($document, $settings)
 {
     // increase memory usage to support larger files with up to 10k judgments
     ini_set('memory_limit', '256M');
     set_time_limit(0);
     $this->status = ['notice' => [], 'success' => [], 'error' => []];
     try {
         /*
         $workerUnits = Entity::where('documentType', 'workerunit')->select('_id')->get();
         foreach($workerUnits as $workerUnit) {
         	$entity = Entity::where('_id', $workerUnit->_id)->first();
         	
         	$none = 0;
         	$vector = $entity['annotationVector'];
         	$vector['justification']['none'] = 0;
         	if(array_sum($vector['justification']) == 0) {
         		$none = 1;
         	}
         	$vector['justification']['none'] = $none;
         	$entity['annotationVector'] = $vector;
         	$entity->save();
         }
         */
         $settings['units'] = [];
         // keep a list of all unique units, crowdAgents and workerUnits so that we can rollback only the unique ones on error
         $units = [];
         $this->crowdAgents = [];
         $this->workerUnits = [];
         $this->duplicateUnits = 0;
         $this->duplicateCrowdAgents = 0;
         $this->duplicateWorkerUnits = 0;
         // read document content and put it into an array
         $data = $this->readCSV($document);
         // Create activity
         $activity = $this->createActivity();
         // Create input file
         $file = new File();
         $file->project = $settings['project'];
         $file->store($document, $settings, $activity);
         $file->save();
         // log status
         if ($file->exists()) {
             array_push($this->status['notice'], "Existing file found (" . $file->_id . ")");
         } else {
             array_push($this->status['success'], "File created (" . $file->_id . ")");
         }
         // temporary mapping of unit ids to CrowdTruth unit ids
         $unitMap = [];
         // Detect if this is an AMT or CF file
         $column = [];
         if ($data[0][0] == "HITId") {
             // AMT
             $settings['platform'] = 'AMT';
             $prefix = "Answer.";
             // Frefix for answer columns
             $startColumn = 27;
             $endColumn = count($data[0]);
             $column['submit_time'] = 18;
             $column['id'] = 14;
             $column['start_time'] = 17;
             $column['channel'] = 22;
             // empty
             $column['trust'] = 22;
             // empty
             $column['worker'] = 15;
             $column['country'] = 22;
             // empty
             $column['region'] = 22;
             // empty
             $column['city'] = 22;
             // empty
         } else {
             // CrowdFlower
             $settings['platform'] = 'CF';
             $prefix = "";
             $startColumn = 12;
             $endColumn = count($data[0]);
             $column['submit_time'] = 1;
             $column['id'] = 2;
             $column['start_time'] = 3;
             $column['channel'] = 5;
             $column['trust'] = 6;
             $column['worker'] = 7;
             $column['country'] = 8;
             $column['region'] = 9;
             $column['city'] = 10;
         }
         // Create Units
         $unitIds = array_keys(array_unique(array_column($data, 0)));
         for ($i = 1; $i < count($unitIds); $i++) {
             // Temp mapping of files to document type structures. This should be done using the preprocessing functions
             // Sounds
             if ($settings['project'] == 'Sounds' && $settings['documentType'] == 'sound') {
                 $content = ['id' => $data[$unitIds[$i]][array_search('id', $data[0])], 'preview-hq-mp3' => $data[$unitIds[$i]][array_search('preview-hq-mp3', $data[0])]];
             }
             $platform_id = $data[$unitIds[$i]][0];
             // Passage Alignment
             if ($settings['documentType'] == 'passage_alignment') {
                 $content = ['question' => ['id' => $data[$unitIds[$i]][array_search('id', $data[0])], 'passage' => $data[$unitIds[$i]][array_search('question', $data[0])]], 'answer' => ['id' => $data[$unitIds[$i]][array_search('id', $data[0])], 'passage' => $data[$unitIds[$i]][array_search('passage', $data[0])]]];
             }
             // Passage Justification
             if ($settings['documentType'] == 'passage_justification') {
                 $content = ['question' => ['id' => $data[$unitIds[$i]][array_search('Input.ID', $data[0])], 'passage' => $data[$unitIds[$i]][array_search('Input.Question', $data[0])]], 'answers' => []];
                 for ($k = 1; $k <= 6; $k++) {
                     if ($data[$unitIds[$i]][array_search('Input.id' . $k, $data[0])] != "") {
                         $content['answers'][$k] = ['id' => $data[$unitIds[$i]][array_search('Input.id' . $k, $data[0])], 'passage' => $data[$unitIds[$i]][array_search('Input.Passage' . $k, $data[0])]];
                     }
                 }
             }
             // Passage Alignment
             if ($settings['project'] == 'Quantum' && $settings['documentType'] == 'sound') {
                 $content = ['id' => $data[$unitIds[$i]][array_search('id', $data[0])], 'sound1' => ['id' => $data[$unitIds[$i]][array_search('s1_id', $data[0])], 'name' => $data[$unitIds[$i]][array_search('s1_name', $data[0])], 'description' => $data[$unitIds[$i]][array_search('s1_description', $data[0])], 'duration' => $data[$unitIds[$i]][array_search('s1_duration', $data[0])], 'url' => $data[$unitIds[$i]][array_search('s1_url', $data[0])]], 'sound2' => ['id' => $data[$unitIds[$i]][array_search('s2_id', $data[0])], 'name' => $data[$unitIds[$i]][array_search('s2_name', $data[0])], 'description' => $data[$unitIds[$i]][array_search('s2_description', $data[0])], 'duration' => $data[$unitIds[$i]][array_search('s2_duration', $data[0])], 'url' => $data[$unitIds[$i]][array_search('s2_url', $data[0])]]];
             }
             $unit = new Unit();
             $unit->project = $settings['project'];
             $unit->activity_id = $activity->_id;
             $unit->documentType = $settings['documentType'];
             $unit->type = "unit";
             $unit->parents = [$file->_id];
             $unit->content = $content;
             $unit->hash = md5(serialize($content));
             $unit->platformId = $platform_id;
             $unit->save();
             $units[$unit->_id] = $unit;
             $unitMap[$data[$unitIds[$i]][0]] = $unit->_id;
         }
         // Create Batch
         $settings['units'] = array_keys($units);
         $settings['batch_title'] = "Imported batch";
         $settings['batch_description'] = "Batch added via result importer";
         $batch = Batch::store($settings, $activity);
         // Create job configuration
         $unitCount = count(array_unique(array_column($data, 0))) - 1;
         // Get number of judgments per unit
         $settings['judgmentsPerUnit'] = round((count($data) - 1) / $unitCount);
         $jobconfig = $this->createJobconf($activity->id, $settings);
         // Create job
         $job = $this->createJob($jobconfig->_id, $activity->_id, $batch, $settings);
         // temp for sounds, create annotation vector for each unit
         $annVector = [];
         $result = [];
         // passage alignment
         if ($settings['documentType'] == 'passage_alignment') {
             for ($i = 1; $i < count($data); $i++) {
                 for ($j = 0; $j < 30; $j++) {
                     // for each passage get the tags
                     if ($data[$i][array_search('rel' . $j, $data[0])] != "") {
                         $term1 = $data[$i][array_search('rel' . $j . 'a', $data[0])];
                         $term2 = $data[$i][array_search('rel' . $j . 'b', $data[0])];
                         $key = $term1 . ',' . $term2;
                         // add keyword to list of keywords for this unit
                         if (!isset($annVector[$data[$i][0]][$key])) {
                             $annVector[$data[$i][0]][$key] = 0;
                         }
                     }
                 }
             }
         }
         // Passage Justification
         if ($settings['documentType'] == 'passage_justification') {
             $questionTypes = ['Subjective' => 0, 'YesNo' => 0, 'NotYesNo' => 0, 'Unanswerable' => 0];
             $answers = ['Noanswer' => 0, 'Yes' => 0, 'No' => 0, 'Other' => 0, 'Unanswerable' => 0];
             for ($i = 1; $i < count($data); $i++) {
                 // add answer possibilities to hit
                 $annVector[$data[$i][0]]['question'] = $questionTypes;
                 $annVector[$data[$i][0]]['answer'] = $answers;
                 // add existing passages to vector
                 for ($k = 1; $k <= 6; $k++) {
                     if ($data[$i][array_search('Input.id' . $k, $data[0])] != "") {
                         $annVector[$data[$i][0]]['justification']['p' . $data[$i][array_search('Input.id' . $k, $data[0])]] = 0;
                     }
                 }
             }
         }
         // Sounds
         if ($settings['documentType'] == 'sound') {
             for ($i = 1; $i < count($data); $i++) {
                 // for each keywords
                 $keywords = explode(',', $data[$i][array_search('keywords', $data[0])]);
                 foreach ($keywords as $keyword) {
                     $keyword = trim(strtolower(str_replace('.', '', $keyword)));
                     if ($keyword != "") {
                         // add keyword to list of keywords for this unit
                         if (!isset($annVector[$data[$i][0]][$keyword])) {
                             $annVector[$data[$i][0]][$keyword] = 0;
                         }
                     }
                 }
                 $result[$unitMap[$data[$i][0]]] = ['keywords' => $annVector[$data[$i][0]]];
             }
         }
         // loop through all the judgments and add workerUnits, media units and CrowdAgents.
         for ($i = 1; $i < count($data); $i++) {
             // loop through all values in the file, and add them as content
             $content = [];
             for ($c = $startColumn; $c < $endColumn; $c++) {
                 $key = str_replace('.', '_', $data[0][$c]);
                 $content[$key] = $data[$i][$c];
             }
             $trust = 1;
             $vector = $annVector[$data[$i][0]];
             $settings['contradiction'] = 0;
             // Create CrowdAgent
             $crowdAgent = $this->createCrowdAgent($data[$i][$column['worker']], $data[$i][$column['country']], $data[$i][$column['region']], $data[$i][$column['city']], $trust, $settings);
             // Create WorkerUnit
             $workerUnit = $this->createWorkerUnit($activity->_id, $unitMap[$data[$i][0]], $data[$i][$column['start_time']], $data[$i][$column['channel']], $trust, $content, $crowdAgent->_id, $job->_id, $data[$i][$column['id']], $data[$i][$column['submit_time']], $settings);
         }
         /*
         			// aggregate all results
         			$result = array();
         			$annotations = Entity::where("documentType", "=", "workerunit")->where("job_id", "=", $job->_id)->get();
         			$count = 0;
         			foreach($annotations as $workerUnit){
         			   $uid = $workerUnit->unit_id; // to prevent mongoException: zero length key not allowed. Could also 'continue;'
         			   if(empty($uid)) $uid = 'unknown';
         				   else $count++;
         
         			   if(!isset($result[$uid]))
         				   $result[$uid] = $workerUnit->annotationVector;
         			   else {
         				   foreach($workerUnit->annotationVector as $key=>$val){
         					   if(is_array($val)){ // term1 -> [k] -> 1
         						   foreach($val as $k=>$v){
         							   //if(isset($result[$uid][$key][$k]))
         								   $result[$uid][$key][$k]+=$v;
         							   //else $result[$uid][$key][$k]=$v; // THIS SHOULDN'T HAPPEN
         						   }
         					   } else {            // [key] -> 1
         						   //if(isset($result[$uid][$key]))
         							   $result[$uid][$key]+=$val;
         						   //else $result[$uid][$key]=$val; // THIS SHOULDN'T HAPPEN
         					   }
         				   }
         			   }
         			}
         
         			if(!isset($job->results)){
         			   $job->results = array('withSpam' => $result);
         			} else {
         			   $r = $job->results;
         			   $r['withSpam'] = $result;
         			   $job->results = $r;
         			}
         			$job->update();
         
         			$job_id = 'entity/text/opendomain/job/94';
         			$job = Job::where('_id', $job_id)->first();
         			// metrics
         			$template = 'entity/text/medical/FactSpan/Factor_Span/0';
         			exec('C:\Users\IBM_ADMIN\AppData\Local\Enthought\Canopy\User\python.exe ' . base_path()  . '/app/lib/generateMetrics.py '.$job->_id.' '.$template, $output, $error);
         			$job->JobConfiguration->replicate();
         			// save metrics in the job
         			$response = json_decode($output[0],true);
         			$job->metrics = $response['metrics'];
         			$r = $job->results;
         			$r['withoutSpam'] = $response['results']['withoutSpam'];
         			$job->results = $r;
         			$job->save();
         
         	
         			$jobs = Job::select('_id')->get();
         			foreach($jobs as $jobId) {
         				$job = Job::where('_id', $jobId->_id)->first();
         				
         				// update job cache
         				\Queue::push('Queues\UpdateJob', array('job' => serialize($job)));
         			}
         */
         // update job cache
         \Queue::push('Queues\\UpdateJob', array('job' => serialize($job)));
         // Notice that units already existed in the database
         if ($this->duplicateUnits > 0) {
             array_push($this->status['notice'], "Existing units found (" . $this->duplicateUnits . ")");
         }
         if ($this->duplicateCrowdAgents > 0) {
             array_push($this->status['notice'], "Existing crowd agents found (" . $this->duplicateCrowdAgents . ")");
         }
         if ($this->duplicateWorkerUnits > 0) {
             array_push($this->status['notice'], "Existing judgements found (" . $this->duplicateWorkerUnits . ")");
         }
         // Job's done!
         array_push($this->status['success'], "Successfully imported " . $settings['filename'] . "");
         array_push($this->status['success'], "Logged activities as " . $activity->_id . "");
         return $this->status;
     } catch (Exception $e) {
         $activity->forceDelete();
         if (!$file->exists()) {
             $file->forceDelete();
         }
         if (!$jobconfig->_existing) {
             $jobconfig->forceDelete();
         }
         $job->forceDelete();
         foreach ($this->units as $unit) {
             if (!$unit->exists()) {
                 $jobconfig->forceDelete();
             }
         }
         foreach ($this->crowdAgents as $crowdAgent) {
             if (!$crowdAgent->_existing) {
                 $crowdAgent->forceDelete();
             }
         }
         foreach ($this->workerUnits as $workerUnit) {
             $workerUnit->forceDelete();
         }
         return $e;
     }
 }