public function getWorkerunitdata($action = null) { if (is_null($action)) { $entities = \MongoDB\Entity::where('documentType', 'csvresult')->where('title', 'like', '%workerunit%')->get(); if (count($entities) > 0) { return View::make('preprocess.csvresult.pages.workerunitdata', compact('entities')); } return Redirect::to('files/upload')->with('flashNotice', 'You have not uploaded any "csvresult" documents yet'); } elseif ($action == "preview") { if ($URI = Input::get('URI')) { if ($entity = $this->repository->find($URI)) { if ($entity->documentType == "csvresult") { return $document = $this->csvresultMapper->processWorkerunitData($entity, true); } } } } elseif ($action == "process") { if ($URI = Input::get('URI')) { if ($entity = $this->repository->find($URI)) { if ($entity->documentType == "csvresult") { return $document = $this->csvresultMapper->processWorkerunitData($entity); } } } } }
public function getProcess() { if ($URI = Input::get('URI')) { if ($entity = $this->repository->find($URI)) { if ($entity->documentType != "fullvideo") { continue; } $videoPreprocessing = $this->fullvideoStructurer->process($entity); $status_processing = $this->fullvideoStructurer->store($entity, $videoPreprocessing); if (isset($status_processing["keyframes"])) { if (!isset($status_processing["keyframes"]['error'])) { //update the parent \MongoDB\Entity::where('_id', '=', $entity->_id)->update(array('keyframes.count' => $status_processing["keyframes"]['success']["noEntitiesCreated"])); } echo "<pre>"; } if (isset($status_processing["segments"])) { if (!isset($status_processing["segments"]['error'])) { //update the parent \MongoDB\Entity::where('_id', '=', $entity->_id)->update(array('segments.count' => $status_processing["segments"]['success']["noEntitiesCreated"])); } echo "<pre>"; } if (isset($status_processing["keyframes"]['success']) && isset($status_processing["segments"]['success'])) { return Redirect::back()->with('flashSuccess', 'Your video has been pre-processed in keyframes and video segments'); } else { return Redirect::back()->with('flashError', 'An error occurred while the video was being pre-processed in keyframes and video segments'); } } } else { return Redirect::back()->with('flashError', 'No valid URI given: ' . $URI); } }
public function getSearchFieldsAndValues($format, $domain) { $fields = array(); $fields['formats'] = array("text", "image", "video"); if (is_null($format)) { $domains = Entity::distinct('domain')->get(); $usersInvolvedInEntities = array_flatten(Entity::distinct('user_id')->get()->toArray()); } else { $domains = Entity::where('format', $format)->distinct('domain')->get(); $usersInvolvedInEntities = array_flatten(Entity::where('format', $format)->distinct('user_id')->get()->toArray()); if ($key = array_search($format, $fields['formats'])) { $value = $fields['formats'][$key]; unset($fields['formats'][$key]); array_unshift($fields['formats'], $value); } } if (is_null($domain)) { $documentTypes = Entity::where('format', $format)->distinct('documentType')->get(); } else { $documentTypes = Entity::where('format', $format)->where('domain', $domain)->distinct('documentType')->get(); $usersInvolvedInEntities = array_flatten(Entity::where('format', $format)->where('domain', $domain)->distinct('user_id')->get()->toArray()); } foreach ($usersInvolvedInEntities as $key => $user_id) { $fields['userAgents'][$key] = User::find($user_id); } $fields['domains'] = array_flatten($domains->toArray()); $fields['documentTypes'] = array_flatten($documentTypes->toArray()); return $fields; }
public function getActions() { // get all uploaded documents // TODO: change to select by actual type $entities = \MongoDB\Entity::where('activity_id', 'LIKE', '%fileuploader%')->get(); if (count($entities) > 0) { return View::make('media.preprocess.relex.pages.actions', compact('entities')); } return Redirect::to('media/upload')->with('flashNotice', 'You have not uploaded any documents yet'); }
/** * Return view for selecting a document for preprocessing. */ public function getIndex() { $entities = Entity::where('activity_id', 'LIKE', '%fileuploader%')->get(); $thisUser = \Auth::user(); foreach ($entities as $ent) { $hasPermission = PermissionHandler::checkProject($thisUser, $ent['project'], Permissions::PROJECT_WRITE); $ent['canWrite'] = $hasPermission; } if (count($entities) > 0) { return View::make('media.preprocess.text.pages.actions', compact('entities')); } return Redirect::to('media/upload')->with('flashNotice', 'You have not uploaded any documents yet'); }
/** * Fetch the last existing ID for the given format / domain / docType combination. */ private function getLastDocumentInc($format, $domain, $docType) { $lastMongoURIUsed = Entity::where('format', $format)->where('domain', $domain)->where('documentType', $docType)->get(array("_id")); if (count($lastMongoURIUsed) > 0) { $lastMongoURIUsed = $lastMongoURIUsed->sortBy(function ($entity) { return $entity->_id; }, SORT_NATURAL)->toArray(); if (end($lastMongoURIUsed)) { $lastMongoIDUsed = explode("/", end($lastMongoURIUsed)['_id']); $inc = end($lastMongoIDUsed) + 1; } } else { $inc = 0; } return $inc; }
public function getVector() { if (($handle = fopen(storage_path() . '/output_AMT_FactSpan_sentences_raw.csv', 'r')) === false) { die('Error opening file'); } $headers = fgetcsv($handle, 1024, ','); $count = 0; $complete = array(); $return = array(); $skip = false; while ($row = fgetcsv($handle, 1024, ',')) { set_time_limit(30); $skip = !$skip; if ($skip) { continue; } $count++; $c = array_combine($headers, $row); $sentence = rtrim($c['sentence'], '.'); $term1 = $c['term1']; $term2 = $c['term2']; $found = false; foreach (MongoDB\Entity::where('documentType', 'relex-structured-sentence')->get() as $unit) { if ($unit['content']['sentence']['formatted'] == $sentence and $unit['content']['terms']['first']['formatted'] == $term1 and $unit['content']['terms']['second']['formatted'] == $term2) { $found = true; // THis can also be used to compare with CF. //foreach (Job::type('FactSpan')->where('softwareAgent_id', 'amt')->get() as $job) { $job = Job::id('entity/text/medical/job/0')->first(); if (in_array($unit->_id, array_keys($job->results))) { $vector = $job->results[$unit->_id]; if (isset($vector['term1'])) { //$temp = $this->computeSimilarity($vector['term1'], 1, $unit->_id, $job->softwareAgent_id); $temp = $vector['term1']; $temp['sentence'] = $sentence; $temp['term1'] = $term1; $temp['term2'] = $term2; $result[] = $temp; } if (isset($vector['term2'])) { //$temp = $this->computeSimilarity($vector['term2'], 2, $unit->_id, $job->softwareAgent_id); $temp = $vector['term2']; $temp['sentence'] = $sentence; $temp['term1'] = $term1; $temp['term2'] = $term2; $result[] = $temp; } } } //} } if (!$found) { $result[] = array('', '', '', '', '', '', '', '', '', '', '', '', '', ''); $result[] = array('', '', '', '', '', '', '', '', '', '', '', '', '', ''); } //if($count==5) dd($result); /* if($count == 5){ dd($result); $path =storage_path() . '/amt_new_output.csv'; $out = fopen($path, 'w'); fputcsv($out, array_keys($result[0])); foreach ($result as $row) fputcsv($out, $row); // Close file rewind($out); fclose($out); dd($path); }*/ } $path = storage_path() . '/amt_new_output.csv'; $out = fopen($path, 'w'); fputcsv($out, array_keys($result[0])); foreach ($result as $row) { fputcsv($out, $row); } // Close file rewind($out); fclose($out); dd($path); }
public function postFeatures() { $return = array('status' => 'ok'); $input = Input::get(); $domain = $input[1]; $type = $input[2]; // CREATE ACTIVITY FOR BATCH $activity = new Activity(); $activity->label = "Images posted for processing."; $activity->softwareAgent_id = 'imagegetter'; $activity->save(); // LOOP THROUGH IMAGES CREATE ENTITIES WITH ACTIVITY-ID FOR NEW IMAGES $url_ids = ""; foreach ($input[0] as $img) { \Log::debug(json_encode($img)); try { $parse = parse_url($img['url']); //$source = $parse['host']; // Save images as parent $image = new Entity(); $image->domain = $domain; $image->format = "image"; $content = $image->content; $content['url'] = $img['url']; $content['title'] = $img['title']; $content['height'] = $img['height']; $content['width'] = $img['width']; $content['description'] = $img['description']; $content['author'] = $img['author']; $image->content = $content; $image->documentType = $type; $image->source = "Rijksmuseum"; $image->tags = ['unit']; $image->activity_id = $activity->_id; $image->softwareAgent_id = "imagegetter"; // Take last part of URL as image title $temp = explode('/', $img['url']); //$image->title = end($temp); // CHECK WHETHER URL EXISTS ALREADY $hash = md5(serialize($image->content)); if ($existingid = Entity::where('hash', $hash)->pluck('_id')) { $imageid = $existingid; } else { $image->hash = $hash; $image->activity_id = $activity->_id; \Log::debug(json_encode($image->toArray())); $image->save(); $existingid = $image->_id; } $url_ids .= "{$img['url']} {$existingid} "; } catch (Exception $e) { //delete image if (isset($image)) { $image->forceDelete(); } //delete activity if (isset($activity)) { $activity->forceDelete(); } //Session::flash('flashError', $e->getMessage()); $return['error'] = $e->getMessage(); $return['status'] = 'bad'; \Log::debug($e->getMessage()); return $return; } // RUN PYTHON SCRIPT THAT CALLS APIs TO ADD FEATURES TO IMAGE } //return $url_ids; try { //$command = "/usr/bin/python2.7 /var/www/crowd-watson/app/lib/getAPIS/getRijks.py " . $domain . " " . $type . " " . 4 . " " . "vogel"; $command = "/usr/bin/python2.7 " . base_path() . "/app/lib/getAPIS/getMany.py " . $domain . " " . $type . " " . Auth::user()->email . " " . $url_ids; //$command = "/usr/bin/python2.7 /var/www/crowd-watson/app/lib/getAPIS/getMany.py art painting http://lh3.ggpht.com/Q1GZTdmwa8iTLgdbu5uAgzovmLbb7lsYhG-QgVcoN8A-WJtIsNUo4-VyTMd9iKHLp-XNm812WyUaSgQdHdjQjDioJQI=s0 999"; //return $command; \Log::debug("Running {$command}"); exec($command, $output, $error); $return['oo'] = $output; $return['ee'] = $error; //$return['a'] = $a; //throw $e; // for debugging. //return $error; } catch (Exception $e) { //throw $e; // for debugging. \Log::debug("ERROR: " . $e->getMessage()); $return['error'] = $e->getMessage(); $return['status'] = 'bad'; } return $this->returnJson($return); }
public function getProcess() { if ($URI = Input::get('URI')) { if ($entity = $this->repository->find($URI)) { if ($entity->documentType != "metadatadescription") { continue; } $metadataProcessing = $this->metadataAnnotationStructurer->process($entity); $status_processing = $this->metadataAnnotationStructurer->store($entity, $metadataProcessing); if (isset($status_processing["processAutomatedEventExtraction"])) { if (!isset($status_processing["processAutomatedEventExtraction"]['error'])) { \MongoDB\Entity::where('_id', '=', $entity->_id)->update(array('preprocessed.automatedEvents' => true)); } echo "<pre>"; } if (isset($status_processing["thdapi"])) { if (!isset($status_processing["thdapi"]['error'])) { \MongoDB\Entity::where('_id', '=', $entity->_id)->update(array('preprocessed.automatedEntities' => true)); } echo "<pre>"; } if (isset($status_processing["textrazorapi"])) { if (!isset($status_processing["textrazorapi"]['error'])) { \MongoDB\Entity::where('_id', '=', $entity->_id)->update(array('preprocessed.automatedEntities' => true)); } echo "<pre>"; } if (isset($status_processing["semitagsapi"])) { if (!isset($status_processing["semitagsapi"]['error'])) { \MongoDB\Entity::where('_id', '=', $entity->_id)->update(array('preprocessed.automatedEntities' => true)); } echo "<pre>"; } if (isset($status_processing["nerdapi"])) { if (!isset($status_processing["nerdapi"]['error'])) { \MongoDB\Entity::where('_id', '=', $entity->_id)->update(array('preprocessed.automatedEntities' => true)); } echo "<pre>"; } if (isset($status_processing["lupediaapi"])) { if (!isset($status_processing["lupediaapi"]['error'])) { \MongoDB\Entity::where('_id', '=', $entity->_id)->update(array('preprocessed.automatedEntities' => true)); } echo "<pre>"; } if (isset($status_processing["dbpediaspotlightapi"])) { if (!isset($status_processing["dbpediaspotlightapi"]['error'])) { \MongoDB\Entity::where('_id', '=', $entity->_id)->update(array('preprocessed.automatedEntities' => true)); } echo "<pre>"; } if (isset($status_processing["processAutomatedEventExtraction"]['success'])) { $this->createStatisticsForMetadatadescriptionCache($entity->_id); return Redirect::back()->with('flashSuccess', 'Your video description has been pre-processed in named entities and putative events'); } else { return Redirect::back()->with('flashError', 'An error occurred while the video description was being pre-processed in named entities and putative events'); } } } else { return Redirect::back()->with('flashError', 'No valid URI given: ' . $URI); } }
public function getProcesscrowdgames() { $gameJobs = Job::where('softwareAgent_id', 'DrDetectiveGamingPlatform')->get(); $activity = new Activity(); $activity->softwareAgent_id = 'DrDetectiveGamingPlatform'; $activity->save(); foreach ($gameJobs as $job) { // $annotations = Entity::where('jobParents', $job['_id'])->get(); // Create one annotation vector for each image on the game $images = Entity::where('jobParents', $job['_id'])->distinct('content.task_data')->get(); $annotationsSummary = []; foreach ($images as $image) { $imageName = $image[0]; // unpack data $annotations = Entity::where('jobParents', $job['_id'])->where('content.task_data', $imageName)->get(); $annotations = $annotations->toArray(); // Create an array with all coordinates given for target image. $coordinates = array_column(array_column(array_column($annotations, 'content'), 'response'), 'Coordinates'); $allCoordinates = []; foreach ($coordinates as $coords) { // Flatten to array of coords. foreach ($coords as $c) { $allCoordinates[] = $c; } } $aggCoords = static::aggregateCoordinates($allCoordinates); $annotationsSummary[] = ['image' => $imageName, 'aggregateCoordinates' => $aggCoords]; } // process annotations for this job into an annotation vector... $e = new Entity(); $e->jobParents = [$job['_id']]; $e->annotationVector = $annotationsSummary; $e->documentType = 'annotationVector'; $e->activity_id = $activity->_id; $e->softwareAgent_id = $job->softwareAgent_id; $e->project = $job->project; $e->user_id = $job->user_id; $e->save(); } return 'OK -- may need adjustments...'; }
public function getLastDocumentInc() { $lastMongoURIUsed = Entity::where('format', 'text')->where('domain', 'medical')->where("documentType", 'relex-structured-sentence')->get(array("_id")); if (count($lastMongoURIUsed) > 0) { $lastMongoURIUsed = $lastMongoURIUsed->sortBy(function ($entity) { return $entity->_id; }, SORT_NATURAL)->toArray(); if (end($lastMongoURIUsed)) { $lastMongoIDUsed = explode("/", end($lastMongoURIUsed)['_id']); $inc = end($lastMongoIDUsed) + 1; } } else { $inc = 0; } unset($lastMongoURIUsed); return $inc; }
public function listRecords($parameters, $noEntries, &$listOfRecords) { $curlRequest = new SVRequest(); $url = $this->url . "verb=ListRecords"; if (isset($parameters)) { if (!array_key_exists("metadataPrefix", $parameters)) { if (!array_key_exists("resumptionToken", $parameters)) { throw new Exception("Request must contain -metadataPrefix- parameter!"); } else { foreach ($parameters as $param => $value) { $url .= "&" . $param . "=" . $value; } } } else { foreach ($parameters as $param => $value) { $url .= "&" . $param . "=" . $value; } } } else { throw new Exception('Request parameters missing!'); } $entities = \MongoDB\Entity::where('documentType', 'fullvideo')->lists("title"); while ($noEntries > 0) { //throw new Exception($noEntries); $result = $curlRequest->curlRequest($url, "POST", null); $xml = simplexml_load_string($result["result"]); if ($xml === false) { die('Error parsing XML'); } else { $xmlNode = $xml->ListRecords; if (isset($xmlNode)) { foreach ($xmlNode->record as $rNode) { if (strpos((string) $rNode->metadata->children('oai_oi', 1)->oi->children('oi', 1)->type, "Moving Image") !== false) { if ($noEntries > 0) { if (!in_array((string) $rNode->header->identifier, $entities)) { array_push($listOfRecords, (string) $rNode->header->identifier); array_push($entities, (string) $rNode->header->identifier); // dd($entities); $noEntries--; } else { continue; } } else { break; } } } } if (isset($xml->ListRecords->resumptionToken)) { if ($noEntries > 0) { if (!array_key_exists("resumptionToken", $parameters)) { $parameters["resumptionToken"] = (string) $xml->ListRecords->resumptionToken; unset($parameters["metadataPrefix"]); $this->listRecords($parameters, $noEntries, $listOfRecords); } else { $replacement = array("resumptionToken" => (string) $xml->ListRecords->resumptionToken); $parameters = array_replace($parameters, $replacement); $this->listRecords($parameters, $noEntries, $listOfRecords); } } } } } // dd($listOfRecords); // return $listOfRecords; }
public function getJobCountAttribute() { if ($this->documentType == "relex-structured-sentence") { return $workerunits = count(array_flatten(Entity::where('unit_id', $this->_id)->distinct('job_id')->get()->toArray())); } }
public static function createMainSearchFiltersCache() { // $mainSearchFilters['media']['formats'] = $this->getDistinctFieldAndCount('format', ['unit']); // $mainSearchFilters['media']['domains'] = $this->getDistinctFieldAndCount('domain', ['unit']); $mainSearchFilters['media']['categories'] = static::getCategories(); $mainSearchFilters['media']['all'] = ["count" => \MongoDB\Entity::whereIn('tags', ['unit'])->count(), "label" => "All Media"]; //unset($mainSearchFilters['media']['documentTypes']['twrex']); $mainSearchFilters['job']['count'] = Entity::where('documentType', 'job')->count(); $mainSearchFilters['workers']['count'] = \MongoDB\CrowdAgent::all()->count(); ksort($mainSearchFilters['media']['categories']); $entity = new \MongoDB\Temp(); $entity->_id = "mainSearchFilters"; $entity->filters = $mainSearchFilters; $entity->save(); return $entity->toArray(); }
public function postLoadt() { $jc_id = Session::get('jobconf_id_t'); $j_id = Session::get('job_id_t'); $jc = \MongoDB\Entity::where("_id", $jc_id)->first(); $j = \MongoDB\Entity::where("_id", $j_id)->first(); $jcco = $jc['content']; $jcco['type'] = Input::get('templateType'); if ($jcco['type'] == null) { return Redirect::back()->with('flashError', "form not filled in (type)."); } // get a selected, newest jcbase $maxi = \MongoDB\Template::where("type", $jcco['type'])->where("format", Session::get('format_t'))->max('version'); $jcbase = \MongoDB\Template::where("type", $jcco['type'])->where("format", Session::get('format_t'))->where('version', $maxi)->first(); if (!isset($jcbase)) { Session::flash('flashError', "template not found"); return Redirect::to("jobs2/submit"); } if (!isset($jcbase['cml'])) { Session::flash('flashError', "No template details in this template"); return Redirect::to("jobs2/submit"); } $jcco['cml'] = $jcbase['cml']; if (isset($jcbase['css'])) { $jcco['css'] = $jcbase['css']; } if (isset($jcbase['instructions'])) { $jcco['instructions'] = $jcbase['instructions']; } if (isset($jcbase['js'])) { $jcco['js'] = $jcbase['js']; } $jcco['template_id'] = $jcbase['_id']; $pos = strpos($jcco['title'], '[['); $title = substr($jcco['title'], 0, $pos); $rest = substr($jcco['title'], strpos($jcco['title'], '(entity/')); $jcco['title'] = $title . "[[" . $jcco['type'] . $rest; $jc['content'] = $jcco; $j['type'] = $jcco['type']; $jc->save(); $j->save(); $platform = App::make('cf2'); //upadte $platform->cfUpdate($j['platformJobId'], $jc); $successmessage = "Job loaded."; Session::flash('flashSuccess', $successmessage); return Redirect::to("jobs"); }
public static function createJobCache() { \Session::flash('rawArray', 1); $db = \DB::getMongoDB(); $db = $db->temp; $result = \MongoDB\Entity::where('documentType', 'job')->with('hasConfiguration')->get()->toArray(); if (count($result) > 0) { try { \MongoDB\Temp::where('documentType', 'job')->forceDelete(); $db->batchInsert($result, array('continueOnError' => true)); } catch (Exception $e) { // ContinueOnError will still throw an exception on duplication, even though it continues, so we just move on. } } \Session::forget('rawArray'); }
public function anyTest() { try { if (!($data = Input::get('data'))) { if (!($data = \Request::getContent())) { return false; } } if (empty($data)) { return false; } $data = json_decode($data, true); $data['softwareAgent_id'] = strtolower($data['softwareAgent_id']); try { $this->createPostSoftwareAgent($data); } catch (Exception $e) { return serialize([$e->getMessage()]); } try { $activity = new Activity(); $activity->softwareAgent_id = $data['softwareAgent_id']; $activity->save(); } catch (Exception $e) { // Something went wrong with creating the Activity $activity->forceDelete(); return serialize([$e->getMessage()]); } $entity = new Entity(); $entity->format = 'image'; $entity->domain = $data['domain']; $entity->tags = $data['tags']; $entity->documentType = $data['documentType']; $entity->softwareAgent_configuration = $data['softwareAgent_configuration']; if (isset($data['parents'])) { $entity->parents = $data['parents']; } $entity->content = $data['content']; if (isset($data['threshold'])) { $entity->threshold = $data['threshold']; } if (isset($data['relevantFeatures'])) { $entity->recognizedFeatures = $data['relevantFeatures']; } if (isset($data['hash'])) { $entity->hash = $data['hash']; } else { $entity->hash = md5(serialize($data['content'])); } $entity->activity_id = $activity->_id; if (Entity::where('hash', $entity->hash)->first()) { //dd('asdasd'); } else { $entity->save(); } return Response::json($entity); } catch (Exception $e) { dd($e->getMessage()); } }
public function updateStats2() { // take all the jobs for that worker if ($crowdAgentJobs = Job::where('metrics.workers.withFilter.' . $this->_id, 'exists', true)->get(['_id'])) { //if there is at least one job with that worker if (count($crowdAgentJobs->toArray()) > 0) { $domains = $formats = $types = $jobids = array(); $spam = $nonspam = $totalNoOfWorkerunits = 0; foreach ($this->workerunits as $a) { $totalNoOfWorkerunits++; if ($a->spam) { $spam++; } else { $nonspam++; } $domains[] = $a->domain; $formats[] = $a->format; $types[] = $a->type; $jobids[] = $a->job_id; $unitids[] = $a->unit_id; } // $this->WorkerunitStats = array('count'=>$total['count'], 'spam'=>$spam, 'nonspam'=>$nonspam); $distinctWorkerunitTypes = array_unique($types); // These actually are the Workerunit types $distinctMediaFormats = array_unique($formats); $distinctMediaDomains = array_unique($domains); $workerParticipatedIn = count(array_unique($unitids)); $cache["workerunits"] = ["count" => $totalNoOfWorkerunits, "spam" => $spam, "nonspam" => $nonspam]; // take all distinct batches $distinctBatchIds = \MongoDB\Entity::whereIn('_id', array_flatten($crowdAgentJobs->toArray()))->distinct('batch_id')->get(['_id']); $cache["mediaTypes"] = ["count" => count($distinctWorkerunitTypes), "types" => []]; foreach ($distinctBatchIds as $distinctBatchId) { $batchParents = array_flatten(\MongoDB\Entity::where('_id', '=', $distinctBatchId[0])->lists('parents')); //print_r($batchParents[0]); $batchParentsType = \MongoDB\Entity::where('_id', '=', $batchParents[0])->distinct('documentType')->get(['documentType']); //print_r(array_flatten($batchParentsType->toArray())[0]); if (isset($cache["mediaTypes"]["types"][array_flatten($batchParentsType->toArray())[0]])) { $cache["mediaTypes"]["types"][array_flatten($batchParentsType->toArray())[0]] = $cache["mediaTypes"][array_flatten($batchParentsType->toArray())[0]] + 1; } else { $cache["mediaTypes"]["types"] = []; $cache["mediaTypes"]["types"][array_flatten($batchParentsType->toArray())[0]] = 1; } } $cache["mediaTypes"]["distinct"] = sizeof(array_keys($cache["mediaTypes"]["types"])); if (count($distinctWorkerunitTypes) > 0) { $cache["jobTypes"] = ["distinct" => count($distinctWorkerunitTypes), "count" => count(array_unique($jobids)), "types" => []]; foreach ($distinctWorkerunitTypes as $distinctJobType) { $distinctJobTypeCount = Job::whereIn('_id', array_flatten($crowdAgentJobs->toArray()))->type($distinctJobType)->count(); $distinctJobTemplateTypes = Job::whereIn('_id', array_flatten($crowdAgentJobs->toArray()))->type($distinctJobType)->distinct('template')->get()->toArray(); $countJobTemplateTypes = Job::whereIn('_id', array_flatten($crowdAgentJobs->toArray()))->type($distinctJobType)->count(); //$cache["jobTypes"]["types"][$distinctJobType[0]] = []; $cache["jobTypes"]["types"][$distinctJobType]['distinct'] = count($distinctJobTemplateTypes); $cache["jobTypes"]["types"][$distinctJobType]['count'] = count($countJobTemplateTypes); $cache["jobTypes"]["types"][$distinctJobType]["templates"] = []; foreach ($distinctJobTemplateTypes as $distinctJobTemplateType) { $distinctJobTemplateAndCount = Job::whereIn('_id', array_flatten($crowdAgentJobs->toArray()))->where('template', $distinctJobTemplateType)->count(); $cache["jobTypes"]["types"][$distinctJobType]["templates"][$distinctJobTemplateType[0]] = $distinctJobTemplateAndCount; } } } if (count($distinctMediaFormats) > 0) { $cache["mediaFormats"] = ["distinct" => count($distinctMediaFormats), "count" => $workerParticipatedIn, "formats" => []]; $cache["mediaDomains"] = ["distinct" => count($distinctMediaDomains), "count" => $workerParticipatedIn, "domains" => []]; foreach ($distinctMediaFormats as $distinctMediaFormat) { $distinctMediaFormatAndCount = \MongoDB\Entity::whereIn('_id', array_flatten($crowdAgentJobs->toArray()))->where('documentType', 'job')->where('format', $distinctMediaFormat)->count(); $cache["mediaFormats"]["formats"][$distinctMediaFormat] = $distinctMediaFormatAndCount; } foreach ($distinctMediaDomains as $distinctMediaDomain) { $distinctMediaDomainAndCount = \MongoDB\Entity::whereIn('_id', array_flatten($crowdAgentJobs->toArray()))->where('documentType', 'job')->where('domain', $distinctMediaDomain)->count(); $cache["mediaDomains"]["domains"][$distinctMediaDomain] = $distinctMediaDomainAndCount; } } $jobsAsSpammer = \MongoDB\Entity::whereIn('_id', array_flatten($crowdAgentJobs->toArray()))->whereIn('metrics.spammers.list', [$this->_id])->lists('platformJobId'); $cache["spammer"]["count"] = count($jobsAsSpammer); $cache["spammer"]["jobs"] = array_flatten($jobsAsSpammer); $this->cache = $cache; $this->save(); } else { $this->save(); } } }
<?php // Get a list of titles and template types which are already in the database // and put them to dropdown $aTitles = array(null => '---'); $aTypes = array(null => '---'); $_format = unserialize(Session::get('batch'))->format; $batchUnits = unserialize(Session::get('batch'))->parents; $batchUnitContent = \MongoDB\Entity::where("_id", $batchUnits[0])->get()->first(); $unitAttributes = array(); $c = array_change_key_case(array_dot($batchUnitContent['content']), CASE_LOWER); foreach ($c as $key => $val) { $key = strtolower(str_replace('.', '_', $key)); $unitAttributes[$key] = $key; } // dd($unitAttributes); $_aTitles = \MongoDB\Entity::where("documentType", "jobconf")->where("format", $_format)->distinct("content.title")->get(); $_aTitles = array_flatten($_aTitles->toArray()); foreach ($_aTitles as $key => $value) { $pos = strpos($value, '[['); if ($pos > 0) { $t = trim(substr($value, 0, $pos)); if (!array_key_exists($t, $aTitles)) { $aTitles[$t] = $t; } } } $_aTypes = \MongoDB\Template::where("format", $_format)->distinct('type')->get(); $_aTypes = array_flatten($_aTypes->toArray()); foreach ($_aTypes as $key => $value) { if (!isset($aTypes[$value])) { $aTypes[$value] = $value;
public function getJobtypes() { return array_flatten(\MongoDB\Entity::where('documentType', 'job')->distinct('type')->get()->toArray()); }
/** * refresh search index */ public function postRefreshindex() { $searchComponent = new MediaSearchComponent(); // amount of units to index per iteration $batchsize = 500; $from = Input::get('next'); $unitCount = Entity::whereIn('tags', ['unit'])->count(); // reset index on start if ($from == 0) { $searchComponent->clear(); } // reduce last batch to remaining units if ($from + $batchsize > $unitCount) { $batchsize = $unitCount - $from; } // all units in this range $units = Entity::distinct('_id')->where('tags', ['unit'])->skip($from)->take($batchsize)->get(); // get keys for each unit in this batch $allKeys = []; for ($i = $from; $i < $from + $batchsize; $i++) { // get data of unit $unit = Entity::where('_id', $units[$i][0])->first(); // map all properties into keys with formats $keys = $this->getKeys($unit->attributesToArray()); // merge keys with set of keys and get the right format (e.g. if it occurs both at string and int we treat all of them as a string foreach ($keys as $k => $v) { if (!array_key_exists($k, $allKeys)) { $allKeys[$k] = ['key' => $keys[$k]['key'], 'label' => $keys[$k]['label'], 'format' => $keys[$k]['format'], 'documents' => [$keys[$k]['document']]]; } else { $allKeys[$k]['format'] = $searchComponent->prioritizeFormat([$allKeys[$k]['format'], $keys[$k]['format']]); // add document type if its not in the list yet if (!in_array($keys[$k]['document'], $allKeys[$k]['documents'])) { array_push($allKeys[$k]['documents'], $keys[$k]['document']); } } } } $searchComponent->store($allKeys); return ['log' => $from . ' to ' . ($from + $batchsize) . ' of ' . $unitCount, 'next' => $from + $batchsize, 'last' => $unitCount]; }
/** * * @param $data = * array(unitids) */ public function fire($job, $data) { // TODO: some error handling $jobIdsPerType = array(); foreach (Job::get() as $j) { if (isset($jobIdsPerType[$j->type])) { array_push($jobIdsPerType[$j->type], $j->_id); } else { $jobIdsPerType[$j->type] = [$j->_id]; } } foreach ($data as $id) { set_time_limit(30); $unit = \MongoDB\Entity::id($id)->first(); // dd($id); $batch['count'] = count(\MongoDB\Entity::where('documentType', 'batch')->where('parents', 'all', array($unit->_id))->get()->toArray()); $workerunit = array('count' => 0, 'spam' => 0, 'nonSpam' => 0); $workerlist = $workersspam = $workersnonspam = $joblist = array(); foreach (Workerunit::where('unit_id', $unit->_id)->get() as $a) { $joblist[] = $a->job_id; $workerlist[] = $a->crowdAgent_id; if ($a->spam) { $workerunit['spam']++; $workersspam[] = $a->crowdAgent_id; } else { $workerunit['nonSpam']++; $workersnonspam[] = $a->crowdAgent_id; } } $workerunit['count'] = $workerunit['spam'] + $workerunit['nonSpam']; $workers['count'] = count(array_unique($workerlist)); $workers['spam'] = count(array_unique($workersspam)); $workers['nonSpam'] = count(array_unique($workersnonspam)); $workers['potentialSpam'] = count(array_intersect($workersspam, $workersnonspam)); // Jobs $jobs['count'] = count(array_unique($joblist)); foreach (array_keys($jobIdsPerType) as $type) { $jobs['types'] = array(); $count = count(array_intersect(array_unique($joblist), $jobIdsPerType[$type])); if ($count != 0) { $jobs["types"][$type] = $count; } } if (array_key_exists('types', $jobs)) { $jobs['distinct'] = count($jobs['types']); } else { $jobs['distinct'] = 0; } $platformField = array(); $platformField['cf'] = count(\MongoDB\Entity::where('unit_id', $unit->_id)->where('softwareAgent_id', 'cf')->get()->toArray()); $platformField['amt'] = count(\MongoDB\Entity::where('unit_id', $unit->_id)->where('softwareAgent_id', 'amt')->get()->toArray()); // filtered $filteredField = array(); $filteredField['job_ids'] = array_flatten(Job::where('metrics.filteredUnits.list', 'all', array($unit['_id']))->get(['_id'])->toArray()); $filteredField['count'] = count($filteredField['job_ids']); $derivatives = \MongoDB\Entity::whereIn('parents', array($unit->_id))->lists('_id'); $children["count"] = count($derivatives); $children["list"] = $derivatives; $unit->cache = ["jobs" => $jobs, "workers" => $workers, "softwareAgent" => $platformField, "workerunits" => $workerunit, "filtered" => $filteredField, "batches" => $batch, "children" => $children]; $unit->update(); $avg_clarity = \MongoDB\Entity::where('metrics.units.withoutSpam.' . $unit->_id, 'exists', 'true')->avg('metrics.units.withoutSpam.' . $unit->id . '.max_relation_Cos.avg'); if (!isset($avg_clarity)) { $avg_clarity = 0; } $unit->avg_clarity = $avg_clarity; $unit->update(); \Log::debug("Updated unit {$unit->_id}."); } $job->delete(); // the Queue job... }
public function getUnitsrelex() { $count = 0; foreach (\Workerunit::where('type', 'RelEx')->get() as $ann) { set_time_limit(30); if (!isset($ann->content)) { echo "{$ann->_id} no content\r\n"; echo "--------------------------------\r\n"; continue; } if (!empty($ann->unit_id)) { echo "{$ann->_id} has unitid\r\n"; continue; } //dd($ann->question); //$xml = simplexml_load_string($ann->question); //$url = (string) $xml->ExternalURL; $xml = simplexml_load_string($ann->question); //$html = $ann->question;// $html = (string) $xml->HTMLContent; //dd($html); $dom = HtmlDomParser::str_get_html($html); $sentence = rtrim($dom->find('span[class=senval]', 0)->innertext, '.'); $term1 = $dom->find('span[style=color:#0000CD;]', 1)->innertext; $term2 = $dom->find('span[style=color:#0000CD;]', 2)->innertext; /* $sentence = "Poisson regression analysis which included data for multiple measurements of Tme/[TE] over the first year of life and adjusted for age-at-test and maternal smoking during [PREGNANCY] also demonstrated a greater decrease in Tme/Te in female infants who subsequently develop an LRI (P = 0.08"; $term1 = "[PREGNANCY]"; $term2 = "[TE]";*/ $unit = \MongoDB\Entity::where('content.terms.first.formatted', $term1)->where('content.terms.second.formatted', $term2)->where('content.sentence.formatted', $sentence)->first(); /* if(!$unit){ $hi = 0; $units = \MongoDB\Entity::where('content.terms.first.formatted', $term1) ->where('content.terms.second.formatted', $term2) ->get(); foreach($units as $punit){ try{ $pct = similar_text($sentence, $punit->content['sentence']['formatted']); } catch (ErrorException $e) { echo "\r\n\r\n\r\n\r\n{$punit->_id}\r\n\r\n\r\n\r\n"; $pct = similar_text(strtolower($sentence), strtolower($punit->content['sentence']['text'])); } if($pct>$hi) { $hi=$pct; $unit = $punit; } } } */ if ($unit) { echo "\r\n\\YY {$sentence}\r\n"; echo "== " . $unit->content['sentence']['formatted']; echo "\r\n{$ann->_id}->{$unit->_id}\r\n"; $ann->unit_id = $unit->_id; $ann->save(); } else { echo "\r\nNO {$ann->_id}\r\n{$term1}--{$term2}--{$sentence}\r\n"; //echo $punit->content['sentence']['formatted']; //echo "\r\n{$ann->unit_id}----------------\r\n"; echo "----------------------------------------"; continue; } } }
public function OldcreateWorkerunitsAndCrowdAgents($mappedWorkerunitsWithUnits, $job_id, $taskType = "FactSpan") { $status = array(); $index = 0; try { $activity = new Activity(); $activity->softwareAgent_id = "cf"; $activity->save(); } catch (Exception $e) { $activity->forceDelete(); $status['error'][$index]['activity'] = $e->getMessage(); } foreach ($mappedWorkerunitsWithUnits as $mappedWorkerunitsWithUnit) { $index++; $crowdagent = CrowdAgent::where('platformAgentId', $mappedWorkerunitsWithUnit['_worker_id'])->where('softwareAgent_id', 'cf')->first(); if (!$crowdagent) { try { $crowdagent = new CrowdAgent(); $crowdagent->_id = "crowdagent/cf/" . $mappedWorkerunitsWithUnit['_worker_id']; $crowdagent->softwareAgent_id = 'cf'; $crowdagent->platformAgentId = (int) $mappedWorkerunitsWithUnit['_worker_id']; $crowdagent->country = $mappedWorkerunitsWithUnit['_country']; $crowdagent->region = $mappedWorkerunitsWithUnit['_region']; $crowdagent->city = $mappedWorkerunitsWithUnit['_city']; $crowdagent->cfWorkerTrust = (double) $mappedWorkerunitsWithUnit['_trust']; $crowdagent->save(); } catch (Exception $e) { $status['error'][$index]['crowdagent'] = $e->getMessage(); // continue; } } if (!Entity::where('softwareAgent_id', 'cf')->where('platformWorkerunitId', $mappedWorkerunitsWithUnit['_id'])->first()) { $entity = new Entity(); $entity->format = "text"; $entity->domain = "medical"; $entity->documentType = "workerunit"; $entity->job_id = $job_id; $entity->activity_id = $activity->_id; $entity->crowdAgent_id = $crowdagent->_id; $entity->softwareAgent_id = "cf"; $entity->unit_id = $mappedWorkerunitsWithUnit['unit']['_id']; $entity->platformWorkerunitId = (int) $mappedWorkerunitsWithUnit['_id']; $entity->cfChannel = $mappedWorkerunitsWithUnit['_channel']; $entity->acceptTime = new MongoDate(strtotime($mappedWorkerunitsWithUnit['_started_at'])); $entity->submitTime = new MongoDate(strtotime($mappedWorkerunitsWithUnit['_created_at'])); $entity->cfTrust = (double) $mappedWorkerunitsWithUnit['_trust']; if ($taskType == "FactSpan") { $entity->content = ["confirmfirstfactor" => $mappedWorkerunitsWithUnit['confirmfirstfactor'], "confirmsecondfactor" => $mappedWorkerunitsWithUnit['confirmsecondfactor'], "firstfactor" => $mappedWorkerunitsWithUnit['firstfactor'], "secondfactor" => $mappedWorkerunitsWithUnit['secondfactor'], "saveselectionids1" => $mappedWorkerunitsWithUnit['saveselectionids1'], "saveselectionids2" => $mappedWorkerunitsWithUnit['saveselectionids2'], "confirmids1" => $mappedWorkerunitsWithUnit['confirmids1'], "confirmids2" => $mappedWorkerunitsWithUnit['confirmids2'], "sentencefirstfactor" => $mappedWorkerunitsWithUnit['sentencefirstfactor'], "sentencesecondfactor" => $mappedWorkerunitsWithUnit['sentencesecondfactor']]; } elseif ($taskType == "RelEx") { $entity->content = ["step_1_select_the_valid_relations" => $mappedWorkerunitsWithUnit['step_1_select_the_valid_relations'], "step_2a_copy__paste_only_the_words_from_the_sentence_that_express_the_relation_you_selected_in_step1" => $mappedWorkerunitsWithUnit['step_2a_copy__paste_only_the_words_from_the_sentence_that_express_the_relation_you_selected_in_step1'], "step_2b_if_you_selected_none_in_step_1_explain_why" => $mappedWorkerunitsWithUnit['step_2b_if_you_selected_none_in_step_1_explain_why']]; } elseif ($taskType == "RelDir") { $entity->content = ["direction" => $mappedWorkerunitsWithUnit['direction']]; } try { $entity->save(); } catch (Exception $e) { $status['error'][$index]['entity'] = $e->getMessage(); } } } return $status; }
/** * Display a listing of the resource. * * @return Response */ public function index() { /** * Return one entity * * @return one Entity */ if (Input::has('id')) { $id = Input::get('id'); // Check if is workerunit, when workerunit append units, if not append workerunits (assumption that it is a unit so far valid) $workerunitType = strpos($id, 'workerunit'); if ($workerunitType === false) { $result = \MongoDB\Entity::with('hasWorkerunits')->where('_id', $id)->get(); } else { $result = \MongoDB\Entity::with('hasUnit')->where('_id', $id)->get(); } $result = $result->toArray(); return $result; } $documents = $this->repository->returnCollectionObjectFor("entity")->where('documentType', 'job')->with('hasConfiguration')->with('wasAttributedToUserAgent'); //Filter on wished for fields using using field of v2 json_encode($documents); if (Input::has('filter')) { foreach (Input::get('filter') as $filter => $value) { if (is_numeric($value)) { $documents = $documents->where($filter, (int) $value); continue; } if ($filter == "userAgent") { $filter = "user_id"; } if (is_array($value)) { foreach ($value as $operator => $subvalue) { if ($filter == "username") { $documents = $documents->where('user_id', 'like', '%' . $subvalue . '%'); continue; } if (in_array($operator, $this->operators)) { if (is_numeric($subvalue)) { $subvalue = (int) $subvalue; } if ($operator == 'like') { $subvalue = '%' . $subvalue . '%'; } // if (strpos($a,'are') !== false) { // echo 'true'; // } if (strpos($filter, "hasConfiguration") !== false) { $filter = explode(".", $filter); $jobConf = Entity::where('documentType', '=', 'jobconf')->where(end($filter), 'like', $subvalue); $allJobConfIDs = array_flatten($jobConf->get(['_id'])->toArray()); $documents = $documents->whereIn('jobConf_id', $allJobConfIDs); continue; } $documents = $documents->where($filter, $operator, $subvalue); } } continue; } else { $value = array($value); } $documents = $documents->whereIn($filter, $value); } } // Make sort possible on if (Input::has('sortBy')) { $sortBy = Input::get('sortBy'); if (Input::has('order')) { $order = Input::get('order'); } $documents = $documents->OrderBy($sortBy, $order); } // If no sort is selected, newest jobs come on top if (!Input::has('sortBy')) { $documents = $documents->OrderBy('created_at', 'des'); } // Take limit of 100 unless otherwise indicated // if(!$limit = (int) Input::get('limit')) // { // $limit = 100; // } if (!($perPage = (int) Input::get('perPage'))) { $perPage = 10; } if (!($page = (int) Input::get('page'))) { $page = 1; $calcPage = 0; } else { $calcPage = $page - 1; } $total = $documents->count(); $skip = $calcPage * $perPage; $jobs = $documents->skip($skip)->take($perPage)->get(); // Paginate results, total amount of records, records per page and currentPage etc. //todo efficiency improvement to cache $jobs for pagination $paginator = array("total" => $total, "perPage" => $perPage, "currentPage" => $page, "data" => $jobs->toArray()); //dd($paginator); return Response::json($paginator); // Take limit of 100 unless otherwise indicated //Eager load jobConfiguration into job entity $entities = $documents->take($limit)->get(); $jobs = array(); //Push entity objects into array for paginator foreach ($entities as $entity) { array_push($jobs, $entity); } // Paginate results, current page, page of choice etc. if (!($perPage = (int) Input::get('perpage'))) { $perPage = 2; } $paginator = Paginator::make($jobs, count($entities), $perPage); //Return paginator return Response::json($paginator); }