Beispiel #1
0
 /**
  *
  * @param $data =
  *        	array(unitids)
  */
 public function fire($job, $data)
 {
     // TODO: some error handling
     $jobIdsPerType = array();
     foreach (Job::get() as $j) {
         if (isset($jobIdsPerType[$j->type])) {
             array_push($jobIdsPerType[$j->type], $j->_id);
         } else {
             $jobIdsPerType[$j->type] = [$j->_id];
         }
     }
     foreach ($data as $id) {
         set_time_limit(30);
         $unit = \MongoDB\Entity::id($id)->first();
         // dd($id);
         $batch['count'] = count(\MongoDB\Entity::where('documentType', 'batch')->where('parents', 'all', array($unit->_id))->get()->toArray());
         $workerunit = array('count' => 0, 'spam' => 0, 'nonSpam' => 0);
         $workerlist = $workersspam = $workersnonspam = $joblist = array();
         foreach (Workerunit::where('unit_id', $unit->_id)->get() as $a) {
             $joblist[] = $a->job_id;
             $workerlist[] = $a->crowdAgent_id;
             if ($a->spam) {
                 $workerunit['spam']++;
                 $workersspam[] = $a->crowdAgent_id;
             } else {
                 $workerunit['nonSpam']++;
                 $workersnonspam[] = $a->crowdAgent_id;
             }
         }
         $workerunit['count'] = $workerunit['spam'] + $workerunit['nonSpam'];
         $workers['count'] = count(array_unique($workerlist));
         $workers['spam'] = count(array_unique($workersspam));
         $workers['nonSpam'] = count(array_unique($workersnonspam));
         $workers['potentialSpam'] = count(array_intersect($workersspam, $workersnonspam));
         // Jobs
         $jobs['count'] = count(array_unique($joblist));
         foreach (array_keys($jobIdsPerType) as $type) {
             $jobs['types'] = array();
             $count = count(array_intersect(array_unique($joblist), $jobIdsPerType[$type]));
             if ($count != 0) {
                 $jobs["types"][$type] = $count;
             }
         }
         if (array_key_exists('types', $jobs)) {
             $jobs['distinct'] = count($jobs['types']);
         } else {
             $jobs['distinct'] = 0;
         }
         $platformField = array();
         $platformField['cf'] = count(\MongoDB\Entity::where('unit_id', $unit->_id)->where('softwareAgent_id', 'cf')->get()->toArray());
         $platformField['amt'] = count(\MongoDB\Entity::where('unit_id', $unit->_id)->where('softwareAgent_id', 'amt')->get()->toArray());
         // filtered
         $filteredField = array();
         $filteredField['job_ids'] = array_flatten(Job::where('metrics.filteredUnits.list', 'all', array($unit['_id']))->get(['_id'])->toArray());
         $filteredField['count'] = count($filteredField['job_ids']);
         $derivatives = \MongoDB\Entity::whereIn('parents', array($unit->_id))->lists('_id');
         $children["count"] = count($derivatives);
         $children["list"] = $derivatives;
         $unit->cache = ["jobs" => $jobs, "workers" => $workers, "softwareAgent" => $platformField, "workerunits" => $workerunit, "filtered" => $filteredField, "batches" => $batch, "children" => $children];
         $unit->update();
         $avg_clarity = \MongoDB\Entity::where('metrics.units.withoutSpam.' . $unit->_id, 'exists', 'true')->avg('metrics.units.withoutSpam.' . $unit->id . '.max_relation_Cos.avg');
         if (!isset($avg_clarity)) {
             $avg_clarity = 0;
         }
         $unit->avg_clarity = $avg_clarity;
         $unit->update();
         \Log::debug("Updated unit {$unit->_id}.");
     }
     $job->delete();
     // the Queue job...
 }
Beispiel #2
0
 public function getWasDerivedFromAttribute()
 {
     if (isset($this->parents)) {
         return Entity::whereIn('_id', array_values($this->parents))->remember(1)->get()->toArray();
     }
 }
Beispiel #3
0
Route::get('/urlsurls', function () {
    echo '-------- paintings -------' . PHP_EOL;
    $results = \MongoDB\Entity::whereIn('documentType', ['painting'])->get(['content.url']);
    $results2 = \MongoDB\Entity::whereIn('documentType', ['drawing'])->get(['content.url']);
    foreach ($results as $result) {
        echo $result['content']['url'] . ' ';
        echo $result['_id'] . PHP_EOL;
    }
    echo PHP_EOL . PHP_EOL;
    echo '-------- drawings-------' . PHP_EOL;
    foreach ($results2 as $result) {
        echo $result['content']['url'] . PHP_EOL;
        echo $result['_id'] . PHP_EOL;
    }
    echo PHP_EOL . PHP_EOL . "[";
    $results = \MongoDB\Entity::whereIn('documentType', ['painting', 'drawing'])->get();
    foreach ($results as $result) {
        echo $result . "," . PHP_EOL;
    }
    echo "]";
    exit;
    return Redirect::to('home');
});
// define routes
Route::get('home', 'PagesController@index');
Route::controller('api/v1', '\\Api\\v1\\apiController');
Route::controller('api/media', '\\Api\\media\\apiController');
Route::controller('api/search', '\\Api\\search\\apiController');
Route::controller('api/actions', '\\Api\\actions\\apiController');
Route::controller('api/analytics', '\\Api\\analytics\\apiController');
Route::get('login', 'UserController@login');
Beispiel #4
0
 public function getUnit()
 {
     $result = array();
     $aggregateOperators = $this->processAggregateInput(Input::all());
     $unitID = Input::get('unit');
     $resultT = \MongoDB\Temp::where('_id', $unitID)->get()->toArray();
     if (sizeof($resultT) != 0) {
         $result['infoStat'] = \MongoDB\Temp::where('_id', $unitID)->get()->toArray()[0];
     } else {
         $result['infoStat'] = \MongoDB\Entity::where('_id', $unitID)->get()->toArray()[0];
     }
     $selection = \MongoDB\Entity::raw(function ($collection) use($aggregateOperators, $unitID) {
         $aggregateOperators['$match']['unit_id'] = $unitID;
         $aggregateOperators['$match']['documentType'] = 'workerunit';
         $aggregateOperators['$project']['job_id'] = array('$ifNull' => array('$' . 'job_id', 0));
         $aggregateOperators['$project']['crowdAgent_id'] = array('$ifNull' => array('$' . 'crowdAgent_id', 0));
         $aggregateOperators['$project']['type'] = array('$ifNull' => array('$' . 'type', 0));
         $aggregateOperators['$project']['workerunit'] = array('$ifNull' => array('$' . 'annotationVector', 0));
         $aggregateOperators['$group']['_id'] = '$crowdAgent_id';
         $aggregateOperators['$group']['count'] = array('$sum' => 1);
         $aggregateOperators['$group']['job_id'] = array('$push' => '$job_id');
         $aggregateOperators['$group']['type'] = array('$push' => '$type');
         $aggregateOperators['$group']['workerunit'] = array('$push' => '$workerunit');
         return $collection->aggregate(array(array('$match' => $aggregateOperators['$match']), array('$project' => $aggregateOperators['$project']), array('$group' => $aggregateOperators['$group'])));
     });
     $response = $selection['result'];
     $crowdAgentIDs = array();
     $jobIDs = array();
     $result['workerunitContent'] = array();
     $result['jobContent'] = array();
     $result['agentContent'] = array();
     foreach ($response as $agent => $value) {
         $result['workerunitContent'][$value['_id']] = $value;
         array_push($crowdAgentIDs, $value['_id']);
         $workerunitType = array();
         foreach ($value['job_id'] as $index => $type) {
             array_push($jobIDs, $value['job_id'][$index]);
             if (!array_key_exists($type, $workerunitType)) {
                 $workerunitType[$type] = $value['workerunit'][$index];
             } else {
                 $annInfo = $value['workerunit'][$index];
                 foreach ($annInfo as $k => $v) {
                     if (is_numeric($v)) {
                         $workerunitType[$type][$k] += $v;
                     } else {
                         foreach ($v as $embeddedK => $embeddedV) {
                             $workerunitType[$type][$k][$embeddedK] += $embeddedV;
                         }
                     }
                 }
             }
         }
         $result['workerunitContent'][$value['_id']]['workerunitType'] = array();
         foreach ($workerunitType as $job => $workerunit) {
             $workerunitInfo = array('job_id' => $job, 'workerunit' => $workerunit);
             $result['workerunitContent'][$value['_id']]['workerunitType'][$job] = $workerunitInfo;
         }
     }
     $crowdAgentIDs = array_unique($crowdAgentIDs);
     $agents = \MongoDB\CrowdAgent::whereIn('_id', $crowdAgentIDs)->get(array('cache', 'cfWorkerTrust', 'softwareAgent_id'))->toArray();
     foreach ($agents as $index => $value) {
         $result['workerunitContent'][$value['_id']]["valuesWorker"] = $value;
         //        $result['agentContent'][$value['_id']] = $value;
     }
     $jobIDs = array_unique($jobIDs);
     $jobs = \MongoDB\Entity::whereIn('_id', $jobIDs)->get(array('results.withoutSpam.' . $unitID, 'results.withSpam.' . $unitID, 'metrics.units.withoutSpam.' . $unitID, 'metrics.aggUnits', 'metrics.filteredunits', 'metrics.workers.withFilter', 'sofwareAgent_id', 'platformJobId'))->toArray();
     foreach ($jobs as $index => $value) {
         $result['jobContent'][$value['_id']] = $value;
         $jobConfID = \MongoDB\Entity::where('_id', '=', $value['_id'])->lists('jobConf_id');
         $jobTitle = \MongoDB\Entity::whereIn('_id', $jobConfID)->get(array('content.title'))->toArray();
         $result['jobContent'][$value['_id']]['jobConf'] = $jobTitle[0];
     }
     foreach ($result['workerunitContent'] as $id => $annInfo) {
         foreach ($result['workerunitContent'][$id]['workerunitType'] as $index => $value) {
             $job_id = $value['job_id'];
             $result['workerunitContent'][$id]['workerunitType'][$index]['job_info'] = $result['jobContent'][$job_id];
         }
     }
     return $result;
 }
 function createStatisticsForMetadatadescriptionCache($id)
 {
     set_time_limit(5200);
     \Session::flash('rawArray', 1);
     $db = \DB::getMongoDB();
     $db = $db->entities;
     $result = \MongoDB\Entity::where('_id', $id)->get()->toArray();
     //    dd($result);
     foreach ($result as &$parent) {
         $children = \MongoDB\Entity::whereIn('parents', [$id])->where('content.features.cleanedUpEntities', 'exists', true)->where("documentType", '!=', "annotatedmetadatadescription")->get(['content.features'])->toArray();
         // dd($children);
         $eventChildren = \MongoDB\Entity::whereIn('parents', [$id])->where('content.automatedEvents', 'exists', true)->get(['content.automatedEvents'])->toArray();
         $parent['content']['features'] = array();
         $parent['content']['features']['cleanedUpEntities'] = array();
         $parent['content']['features']['automatedEvents'] = array();
         $parent['content']['features']['topics'] = array();
         $parent['content']['features']['people'] = array();
         $parent['content']['features']['time'] = array();
         $parent['content']['features']['location'] = array();
         $parent['content']['features']['other'] = array();
         $parent['annotations'] = array();
         $parent['annotations']['statistics'] = array();
         $parent['annotations']['features'] = array();
         $parent['annotations']['statistics']['majvoting'] = array();
         $parent['annotations']['statistics']["crowdtruthmetrics"] = array();
         $parent['annotations']['features']['cleanedUpEntities'] = array();
         $parent['annotations']['statistics']['majvoting']["cleanedUpEntities"] = array();
         $parent['annotations']['statistics']["crowdtruthmetrics"]["cleanedUpEntities"] = array();
         $parent['annotations']['features']['topics'] = array();
         foreach ($children as $child) {
             if (isset($child["content"]["features"]["topics"])) {
                 $parent['annotations']['features']['topics'] = $child["content"]["features"]["topics"];
                 $parent['content']['features']['topics'] = $child["content"]["features"]["topics"];
             }
         }
         foreach ($eventChildren as $child) {
             if (isset($child["content"]["automatedEvents"])) {
                 $parent['annotations']['automatedEvents'] = $child["content"]["automatedEvents"];
                 $parent['content']['features']['automatedEvents'] = $child["content"]["automatedEvents"];
             }
         }
         foreach ($children as $child) {
             if (!empty($child['content']['features']['cleanedUpEntities'])) {
                 foreach ($child['content']['features']['cleanedUpEntities'] as $childKey => $childValue) {
                     $found = false;
                     foreach ($parent['annotations']['statistics']['majvoting']["cleanedUpEntities"] as $parentKey => $parentValue) {
                         if (strtolower($childValue["label"]) == strtolower($parentValue["label"]) && intval($childValue["startOffset"]) == intval($parentValue["startOffset"]) && intval($childValue["endOffset"]) == intval($parentValue["endOffset"])) {
                             $found = true;
                             array_push($parent['annotations']["features"]["cleanedUpEntities"][$parentKey]["extractors"], $childValue["provenance"]);
                             array_push($parent['content']["features"]["cleanedUpEntities"][$parentKey]["extractors"], $childValue["provenance"]);
                             array_push($parent["annotations"]["statistics"]["majvoting"]["cleanedUpEntities"][$parentKey]["noExtractorsPerLabel"]["extractors"], $childValue["provenance"]);
                             $parent['annotations']["statistics"]["majvoting"]["cleanedUpEntities"][$parentKey]["noExtractorsPerLabel"]["count"] += 1;
                             $parent['annotations']["statistics"]["majvoting"]["cleanedUpEntities"][$parentKey]["noExtractorsPerLabel"]["relevanceScore"]["value"] = $parent["annotations"]["statistics"]["majvoting"]["cleanedUpEntities"][$parentKey]["noExtractorsPerLabel"]["count"] / 6;
                             $parent['annotations']["statistics"]["majvoting"]["cleanedUpEntities"][$parentKey]["confidence"]["extractors"][$childValue["provenance"]] = $childValue["confidence"];
                             $parent['annotations']["statistics"]["majvoting"]["cleanedUpEntities"][$parentKey]["confidence"]["value"] = max($parent["annotations"]["statistics"]["majvoting"]["cleanedUpEntities"][$parentKey]["confidence"]["extractors"]);
                             $noConf = 0;
                             foreach ($parent['annotations']["statistics"]["majvoting"]["cleanedUpEntities"][$parentKey]["confidence"]["extractors"] as $confKey => $confVal) {
                                 if ($confVal != null) {
                                     $noConf++;
                                 }
                             }
                             if ($noConf != 0) {
                                 $parent['annotations']["statistics"]["majvoting"]["cleanedUpEntities"][$parentKey]["clarity"]["mean"] = array_sum($parent['annotations']["statistics"]["majvoting"]["cleanedUpEntities"][$parentKey]["confidence"]["extractors"]) / $noConf;
                                 $parent['annotations']["statistics"]["majvoting"]["cleanedUpEntities"][$parentKey]["clarity"]["stddev"] = $this->stats_stddev_func($parent['annotations']["statistics"]["majvoting"]["cleanedUpEntities"][$parentKey]["confidence"]["extractors"]);
                                 $parent['annotations']["statistics"]["majvoting"]["cleanedUpEntities"][$parentKey]["clarity"]["mse"] = pow($parent['annotations']["statistics"]["majvoting"]["cleanedUpEntities"][$parentKey]["clarity"]["stddev"], 2) / $noConf;
                             } else {
                                 $parent['annotations']["statistics"]["majvoting"]["cleanedUpEntities"][$parentKey]["clarity"]["mean"] = $parent['annotations']["statistics"]["majvoting"]["cleanedUpEntities"][$parentKey]["confidence"]["value"];
                                 $parent['annotations']["statistics"]["majvoting"]["cleanedUpEntities"][$parentKey]["clarity"]["stddev"] = $this->stats_stddev_func($parent['annotations']["statistics"]["majvoting"]["cleanedUpEntities"][$parentKey]["confidence"]["extractors"]);
                                 $parent['annotations']["statistics"]["majvoting"]["cleanedUpEntities"][$parentKey]["clarity"]["mse"] = pow($parent['annotations']["statistics"]["majvoting"]["cleanedUpEntities"][$parentKey]["clarity"]["stddev"], 2);
                             }
                             foreach ($childValue["types"] as $keyType => $valueType) {
                                 $foundType = false;
                                 foreach ($parent['annotations']['statistics']['majvoting']["cleanedUpEntities"][$parentKey]["noExtractorsPerType"] as $parentTypeKey => $parentTypeValue) {
                                     if ($parentTypeKey == strtolower($valueType["typeURI"])) {
                                         if (!in_array($childValue["provenance"], $parent['annotations']['statistics']['majvoting']["cleanedUpEntities"][$parentKey]["noExtractorsPerType"][$parentTypeKey]["extractors"])) {
                                             $parent['annotations']['statistics']['majvoting']["cleanedUpEntities"][$parentKey]["noExtractorsPerType"][$parentTypeKey]["count"] += 1;
                                             array_push($parent['annotations']['statistics']['majvoting']["cleanedUpEntities"][$parentKey]["noExtractorsPerType"][$parentTypeKey]["extractors"], $childValue["provenance"]);
                                             $parent['annotations']['statistics']['majvoting']["cleanedUpEntities"][$parentKey]["noExtractorsPerType"][$parentTypeKey]["relevanceScore"]["value"] = $parent['annotations']['statistics']['majvoting']["cleanedUpEntities"][$parentKey]["noExtractorsPerType"][$parentTypeKey]["count"] / $parent['annotations']["statistics"]["majvoting"]["cleanedUpEntities"][$parentKey]["noExtractorsPerLabel"]["count"];
                                         }
                                         $foundType = true;
                                     } else {
                                         $parent['annotations']['statistics']['majvoting']["cleanedUpEntities"][$parentKey]["noExtractorsPerType"][$parentTypeKey]["relevanceScore"]["value"] = $parent['annotations']['statistics']['majvoting']["cleanedUpEntities"][$parentKey]["noExtractorsPerType"][$parentTypeKey]["count"] / $parent['annotations']["statistics"]["majvoting"]["cleanedUpEntities"][$parentKey]["noExtractorsPerLabel"]["count"];
                                     }
                                     if ($foundType == true) {
                                         break;
                                     }
                                 }
                                 if ($foundType == false) {
                                     if ($valueType["typeURI"] != null) {
                                         $parent['annotations']['statistics']['majvoting']["cleanedUpEntities"][$parentKey]["noExtractorsPerType"][strtolower($valueType["typeURI"])] = array();
                                         $parent['annotations']['statistics']['majvoting']["cleanedUpEntities"][$parentKey]["noExtractorsPerType"][strtolower($valueType["typeURI"])]["count"] = 1;
                                         $parent['annotations']['statistics']['majvoting']["cleanedUpEntities"][$parentKey]["noExtractorsPerType"][strtolower($valueType["typeURI"])]["extractors"] = array();
                                         array_push($parent['annotations']['statistics']['majvoting']["cleanedUpEntities"][$parentKey]["noExtractorsPerType"][strtolower($valueType["typeURI"])]["extractors"], $childValue["provenance"]);
                                         $parent['annotations']['statistics']['majvoting']["cleanedUpEntities"][$parentKey]["noExtractorsPerType"][strtolower($valueType["typeURI"])]["relevanceScore"] = array();
                                         $parent['annotations']['statistics']['majvoting']["cleanedUpEntities"][$parentKey]["noExtractorsPerType"][strtolower($valueType["typeURI"])]["relevanceScore"]["value"] = 1 / $parent["annotations"]["statistics"]["majvoting"]["cleanedUpEntities"][$parentKey]["noExtractorsPerLabel"]["count"];
                                     }
                                 }
                                 $foundResource = false;
                                 foreach ($parent['annotations']['statistics']['majvoting']["cleanedUpEntities"][$parentKey]["noExtractorsPerResource"] as $parentResourceKey => $parentResourceValue) {
                                     if (strtolower($parentResourceKey) == strtolower($valueType["entityURI"])) {
                                         if (!in_array($childValue["provenance"], $parent['annotations']['statistics']['majvoting']["cleanedUpEntities"][$parentKey]["noExtractorsPerResource"][$parentResourceKey]["extractors"])) {
                                             $parent['annotations']['statistics']['majvoting']["cleanedUpEntities"][$parentKey]["noExtractorsPerResource"][$parentResourceKey]["count"] += 1;
                                             array_push($parent['annotations']['statistics']['majvoting']["cleanedUpEntities"][$parentKey]["noExtractorsPerResource"][$parentResourceKey]["extractors"], $childValue["provenance"]);
                                             $parent['annotations']['statistics']['majvoting']["cleanedUpEntities"][$parentKey]["noExtractorsPerResource"][$parentResourceKey]["relevanceScore"]["value"] = $parent['annotations']['statistics']['majvoting']["cleanedUpEntities"][$parentKey]["noExtractorsPerResource"][$parentResourceKey]["count"] / $parent["annotations"]["statistics"]["majvoting"]["cleanedUpEntities"][$parentKey]["noExtractorsPerLabel"]["count"];
                                         }
                                         $foundResource = true;
                                     } else {
                                         $parent['annotations']['statistics']['majvoting']["cleanedUpEntities"][$parentKey]["noExtractorsPerResource"][$parentResourceKey]["relevanceScore"]["value"] = $parent['annotations']['statistics']['majvoting']["cleanedUpEntities"][$parentKey]["noExtractorsPerResource"][$parentResourceKey]["count"] / $parent["annotations"]["statistics"]["majvoting"]["cleanedUpEntities"][$parentKey]["noExtractorsPerLabel"]["count"];
                                     }
                                     if ($foundResource == true) {
                                         break;
                                     }
                                 }
                                 if ($foundResource == false) {
                                     if ($valueType["entityURI"] != null) {
                                         $parent['annotations']['statistics']['majvoting']["cleanedUpEntities"][$parentKey]["noExtractorsPerResource"][$valueType["entityURI"]] = array();
                                         $parent['annotations']['statistics']['majvoting']["cleanedUpEntities"][$parentKey]["noExtractorsPerResource"][$valueType["entityURI"]]["count"] = 1;
                                         $parent['annotations']['statistics']['majvoting']["cleanedUpEntities"][$parentKey]["noExtractorsPerResource"][$valueType["entityURI"]]["extractors"] = array();
                                         array_push($parent['annotations']['statistics']['majvoting']["cleanedUpEntities"][$parentKey]["noExtractorsPerResource"][$valueType["entityURI"]]["extractors"], $childValue["provenance"]);
                                         $parent['annotations']['statistics']['majvoting']["cleanedUpEntities"][$parentKey]["noExtractorsPerResource"][$valueType["entityURI"]]["relevanceScore"] = array();
                                         $parent['annotations']['statistics']['majvoting']["cleanedUpEntities"][$parentKey]["noExtractorsPerResource"][$valueType["entityURI"]]["relevanceScore"]["value"] = 1 / $parent["annotations"]["statistics"]["majvoting"]["cleanedUpEntities"][$parentKey]["noExtractorsPerLabel"]["count"];
                                     }
                                 }
                                 $foundLabelTypePair = false;
                                 $tempTypeValue = "";
                                 if ($valueType["typeURI"] != null) {
                                     $tempTypeValue = strtolower($valueType["typeURI"]);
                                 }
                                 foreach ($parent['annotations']['statistics']['majvoting']["cleanedUpEntities"][$parentKey]["noExtractorsLabelTypePair"] as $parentLabelTypeKey => $parentLabelTypeValue) {
                                     if (strtolower($parentLabelTypeKey) == strtolower($childValue["label"] . "-" . $tempTypeValue)) {
                                         if (!in_array($childValue["provenance"], $parent['annotations']['statistics']['majvoting']["cleanedUpEntities"][$parentKey]["noExtractorsLabelTypePair"][$parentLabelTypeKey]["extractors"])) {
                                             $parent['annotations']['statistics']['majvoting']["cleanedUpEntities"][$parentKey]["noExtractorsLabelTypePair"][$parentLabelTypeKey]["count"] += 1;
                                             array_push($parent['annotations']['statistics']['majvoting']["cleanedUpEntities"][$parentKey]["noExtractorsLabelTypePair"][$parentLabelTypeKey]["extractors"], $childValue["provenance"]);
                                             $parent['annotations']['statistics']['majvoting']["cleanedUpEntities"][$parentKey]["noExtractorsLabelTypePair"][$parentLabelTypeKey]["relevanceScore"]["value"] = $parent['annotations']['statistics']['majvoting']["cleanedUpEntities"][$parentKey]["noExtractorsLabelTypePair"][$parentLabelTypeKey]["count"] / $parent["annotations"]["statistics"]["majvoting"]["cleanedUpEntities"][$parentKey]["noExtractorsPerLabel"]["count"];
                                         }
                                         $foundLabelTypePair = true;
                                     } else {
                                         $parent['annotations']['statistics']['majvoting']["cleanedUpEntities"][$parentKey]["noExtractorsLabelTypePair"][$parentLabelTypeKey]["relevanceScore"]["value"] = $parent['annotations']['statistics']['majvoting']["cleanedUpEntities"][$parentKey]["noExtractorsLabelTypePair"][$parentLabelTypeKey]["count"] / $parent["annotations"]["statistics"]["majvoting"]["cleanedUpEntities"][$parentKey]["noExtractorsPerLabel"]["count"];
                                     }
                                     if ($foundLabelTypePair == true) {
                                         break;
                                     }
                                 }
                                 if ($foundLabelTypePair == false) {
                                     $parent['annotations']['statistics']['majvoting']["cleanedUpEntities"][$parentKey]["noExtractorsLabelTypePair"][$childValue["label"] . "-" . $tempTypeValue] = array();
                                     $parent['annotations']['statistics']['majvoting']["cleanedUpEntities"][$parentKey]["noExtractorsLabelTypePair"][$childValue["label"] . "-" . $tempTypeValue]["count"] = 1;
                                     $parent['annotations']['statistics']['majvoting']["cleanedUpEntities"][$parentKey]["noExtractorsLabelTypePair"][$childValue["label"] . "-" . $tempTypeValue]["extractors"] = array();
                                     array_push($parent['annotations']['statistics']['majvoting']["cleanedUpEntities"][$parentKey]["noExtractorsLabelTypePair"][$childValue["label"] . "-" . $tempTypeValue]["extractors"], $childValue["provenance"]);
                                     $parent['annotations']['statistics']['majvoting']["cleanedUpEntities"][$parentKey]["noExtractorsLabelTypePair"][$childValue["label"] . "-" . $tempTypeValue]["relevanceScore"] = array();
                                     $parent['annotations']['statistics']['majvoting']["cleanedUpEntities"][$parentKey]["noExtractorsLabelTypePair"][$childValue["label"] . "-" . $tempTypeValue]["relevanceScore"]["value"] = 1 / $parent["annotations"]["statistics"]["majvoting"]["cleanedUpEntities"][$parentKey]["noExtractorsPerLabel"]["count"];
                                 }
                                 $foundLabelResourcePair = false;
                                 $tempResourceValue = "";
                                 if ($valueType["entityURI"] != null) {
                                     $tempResourceValue = $valueType["entityURI"];
                                 }
                                 foreach ($parent['annotations']['statistics']['majvoting']["cleanedUpEntities"][$parentKey]["noExtractorsLabelResourcePair"] as $parentLabelResourceKey => $parentLabelResourceValue) {
                                     if (strtolower($parentLabelResourceKey) == strtolower($childValue["label"] . "-" . $tempResourceValue)) {
                                         if (!in_array($childValue["provenance"], $parent['annotations']['statistics']['majvoting']["cleanedUpEntities"][$parentKey]["noExtractorsLabelResourcePair"][$parentLabelResourceKey]["extractors"])) {
                                             $parent['annotations']['statistics']['majvoting']["cleanedUpEntities"][$parentKey]["noExtractorsLabelResourcePair"][$parentLabelResourceKey]["count"] += 1;
                                             array_push($parent['annotations']['statistics']['majvoting']["cleanedUpEntities"][$parentKey]["noExtractorsLabelResourcePair"][$parentLabelResourceKey]["extractors"], $childValue["provenance"]);
                                             $parent['annotations']['statistics']['majvoting']["cleanedUpEntities"][$parentKey]["noExtractorsLabelResourcePair"][$parentLabelResourceKey]["relevanceScore"]["value"] = $parent['annotations']['statistics']['majvoting']["cleanedUpEntities"][$parentKey]["noExtractorsLabelResourcePair"][$parentLabelResourceKey]["count"] / $parent["annotations"]["statistics"]["majvoting"]["cleanedUpEntities"][$parentKey]["noExtractorsPerLabel"]["count"];
                                         }
                                         $foundLabelResourcePair = true;
                                     } else {
                                         $parent['annotations']['statistics']['majvoting']["cleanedUpEntities"][$parentKey]["noExtractorsLabelResourcePair"][$parentLabelResourceKey]["relevanceScore"]["value"] = $parent['annotations']['statistics']['majvoting']["cleanedUpEntities"][$parentKey]["noExtractorsLabelResourcePair"][$parentLabelResourceKey]["count"] / $parent["annotations"]["statistics"]["majvoting"]["cleanedUpEntities"][$parentKey]["noExtractorsPerLabel"]["count"];
                                     }
                                     if ($foundLabelResourcePair == true) {
                                         break;
                                     }
                                 }
                                 if ($foundLabelResourcePair == false) {
                                     $parent['annotations']['statistics']['majvoting']["cleanedUpEntities"][$parentKey]["noExtractorsLabelResourcePair"][$parentLabelResourceKey] = array();
                                     $parent['annotations']['statistics']['majvoting']["cleanedUpEntities"][$parentKey]["noExtractorsLabelResourcePair"][$parentLabelResourceKey]["count"] = 1;
                                     $parent['annotations']['statistics']['majvoting']["cleanedUpEntities"][$parentKey]["noExtractorsLabelResourcePair"][$parentLabelResourceKey]["extractors"] = array();
                                     array_push($parent['annotations']['statistics']['majvoting']["cleanedUpEntities"][$parentKey]["noExtractorsLabelResourcePair"][$parentLabelResourceKey]["extractors"], $childValue["provenance"]);
                                     $parent['annotations']['statistics']['majvoting']["cleanedUpEntities"][$parentKey]["noExtractorsLabelResourcePair"][$parentLabelResourceKey]["relevanceScore"] = array();
                                     $parent['annotations']['statistics']['majvoting']["cleanedUpEntities"][$parentKey]["noExtractorsLabelResourcePair"][$parentLabelResourceKey]["relevanceScore"]["value"] = 1 / $parent["annotations"]["statistics"]["majvoting"]["cleanedUpEntities"][$parentKey]["noExtractorsPerLabel"]["count"];
                                 }
                                 $foundTypeResourcePair = false;
                                 $tempTypeValue = "";
                                 $tempResourceValue = "";
                                 if ($valueType["entityURI"] != null) {
                                     $tempResourceValue = $valueType["entityURI"];
                                 }
                                 if ($valueType["typeURI"] != null) {
                                     $tempTypeValue = $valueType["typeURI"];
                                 }
                                 foreach ($parent['annotations']['statistics']['majvoting']["cleanedUpEntities"][$parentKey]["noExtractorsTypeResourcePair"] as $parentLabelTypeKey => $parentLabelTypeValue) {
                                     if (strtolower($parentLabelTypeKey) == strtolower($tempTypeValue . "-" . $tempResourceValue)) {
                                         if (!in_array($childValue["provenance"], $parent['annotations']['statistics']['majvoting']["cleanedUpEntities"][$parentKey]["noExtractorsTypeResourcePair"][$parentLabelTypeKey]["extractors"])) {
                                             $parent['annotations']['statistics']['majvoting']["cleanedUpEntities"][$parentKey]["noExtractorsTypeResourcePair"][$parentLabelTypeKey]["count"] += 1;
                                             array_push($parent['annotations']['statistics']['majvoting']["cleanedUpEntities"][$parentKey]["noExtractorsTypeResourcePair"][$parentLabelTypeKey]["extractors"], $childValue["provenance"]);
                                             $parent['annotations']['statistics']['majvoting']["cleanedUpEntities"][$parentKey]["noExtractorsTypeResourcePair"][$parentLabelTypeKey]["relevanceScore"]["value"] = $parent['annotations']['statistics']['majvoting']["cleanedUpEntities"][$parentKey]["noExtractorsTypeResourcePair"][$parentLabelTypeKey]["count"] / $parent["annotations"]["statistics"]["majvoting"]["cleanedUpEntities"][$parentKey]["noExtractorsPerLabel"]["count"];
                                         }
                                         $foundTypeResourcePair = true;
                                     } else {
                                         $parent['annotations']['statistics']['majvoting']["cleanedUpEntities"][$parentKey]["noExtractorsTypeResourcePair"][$parentLabelTypeKey]["relevanceScore"]["value"] = $parent['annotations']['statistics']['majvoting']["cleanedUpEntities"][$parentKey]["noExtractorsTypeResourcePair"][$parentLabelTypeKey]["count"] / $parent["annotations"]["statistics"]["majvoting"]["cleanedUpEntities"][$parentKey]["noExtractorsPerLabel"]["count"];
                                     }
                                     if ($foundTypeResourcePair == true) {
                                         break;
                                     }
                                 }
                                 if ($foundTypeResourcePair == false) {
                                     $parent['annotations']['statistics']['majvoting']["cleanedUpEntities"][$parentKey]["noExtractorsTypeResourcePair"][$parentLabelTypeKey] = array();
                                     $parent['annotations']['statistics']['majvoting']["cleanedUpEntities"][$parentKey]["noExtractorsTypeResourcePair"][$parentLabelTypeKey]["count"] = 1;
                                     $parent['annotations']['statistics']['majvoting']["cleanedUpEntities"][$parentKey]["noExtractorsTypeResourcePair"][$parentLabelTypeKey]["extractors"] = array();
                                     array_push($parent['annotations']['statistics']['majvoting']["cleanedUpEntities"][$parentKey]["noExtractorsTypeResourcePair"][$parentLabelTypeKey]["extractors"], $childValue["provenance"]);
                                     $parent['annotations']['statistics']['majvoting']["cleanedUpEntities"][$parentKey]["noExtractorsTypeResourcePair"][$parentLabelTypeKey]["relevanceScore"] = array();
                                     $parent['annotations']['statistics']['majvoting']["cleanedUpEntities"][$parentKey]["noExtractorsTypeResourcePair"][$parentLabelTypeKey]["relevanceScore"]["value"] = 1 / $parent["annotations"]["statistics"]["majvoting"]["cleanedUpEntities"][$parentKey]["noExtractorsPerLabel"]["count"];
                                 }
                                 $foundLabelTypeResourcePair = false;
                                 foreach ($parent['annotations']['statistics']['majvoting']["cleanedUpEntities"][$parentKey]["noExtractorsLabelTypeResourcePair"] as $parentLabelTypeResourceKey => $parentLabelTypeResourceValue) {
                                     if (strtolower($parentLabelTypeResourceKey) == strtolower($childValue["label"] . "-" . $tempTypeValue . "-" . $tempResourceValue)) {
                                         if (!in_array($childValue["provenance"], $parent['annotations']['statistics']['majvoting']["cleanedUpEntities"][$parentKey]["noExtractorsLabelTypeResourcePair"][$parentLabelTypeResourceKey]["extractors"])) {
                                             $parent['annotations']['statistics']['majvoting']["cleanedUpEntities"][$parentKey]["noExtractorsLabelTypeResourcePair"][$parentLabelTypeResourceKey]["count"] += 1;
                                             array_push($parent['annotations']['statistics']['majvoting']["cleanedUpEntities"][$parentKey]["noExtractorsLabelTypeResourcePair"][$parentLabelTypeResourceKey]["extractors"], $childValue["provenance"]);
                                             $parent['annotations']['statistics']['majvoting']["cleanedUpEntities"][$parentKey]["noExtractorsLabelTypeResourcePair"][$parentLabelTypeResourceKey]["relevanceScore"]["value"] = $parent['annotations']['statistics']['majvoting']["cleanedUpEntities"][$parentKey]["noExtractorsLabelTypeResourcePair"][$parentLabelTypeResourceKey]["count"] / $parent["annotations"]["statistics"]["majvoting"]["cleanedUpEntities"][$parentKey]["noExtractorsPerLabel"]["count"];
                                         }
                                         $foundLabelTypeResourcePair = true;
                                     } else {
                                         $parent['annotations']['statistics']['majvoting']["cleanedUpEntities"][$parentKey]["noExtractorsLabelTypeResourcePair"][$parentLabelTypeResourceKey]["relevanceScore"]["value"] = $parent['annotations']['statistics']['majvoting']["cleanedUpEntities"][$parentKey]["noExtractorsLabelTypeResourcePair"][$parentLabelTypeResourceKey]["count"] / $parent["annotations"]["statistics"]["majvoting"]["cleanedUpEntities"][$parentKey]["noExtractorsPerLabel"]["count"];
                                     }
                                     if ($foundLabelTypeResourcePair == true) {
                                         break;
                                     }
                                 }
                                 if ($foundLabelTypeResourcePair == false) {
                                     $parent['annotations']['statistics']['majvoting']["cleanedUpEntities"][$parentKey]["noExtractorsLabelTypeResourcePair"][$parentLabelTypeResourceKey] = array();
                                     $parent['annotations']['statistics']['majvoting']["cleanedUpEntities"][$parentKey]["noExtractorsLabelTypeResourcePair"][$parentLabelTypeResourceKey]["count"] = 1;
                                     $parent['annotations']['statistics']['majvoting']["cleanedUpEntities"][$parentKey]["noExtractorsLabelTypeResourcePair"][$parentLabelTypeResourceKey]["extractors"] = array();
                                     array_push($parent['annotations']['statistics']['majvoting']["cleanedUpEntities"][$parentKey]["noExtractorsLabelTypeResourcePair"][$parentLabelTypeResourceKey]["extractors"], $childValue["provenance"]);
                                     $parent['annotations']['statistics']['majvoting']["cleanedUpEntities"][$parentKey]["noExtractorsLabelTypeResourcePair"][$parentLabelTypeResourceKey]["relevanceScore"] = array();
                                     $parent['annotations']['statistics']['majvoting']["cleanedUpEntities"][$parentKey]["noExtractorsLabelTypeResourcePair"][$parentLabelTypeResourceKey]["relevanceScore"]["value"] = 1 / $parent["annotations"]["statistics"]["majvoting"]["cleanedUpEntities"][$parentKey]["noExtractorsPerLabel"]["count"];
                                 }
                             }
                         }
                         if ($found == true) {
                             break;
                         }
                         //    break;
                     }
                     if ($found == false) {
                         $extractedEntity = array();
                         $extractedEntity["label"] = $childValue["label"];
                         $extractedEntity["startOffset"] = intval($childValue["startOffset"]);
                         $extractedEntity["endOffset"] = intval($childValue["endOffset"]);
                         $extractedEntity["extractors"] = array();
                         array_push($extractedEntity["extractors"], $childValue["provenance"]);
                         $newEntity = array();
                         $newEntity["label"] = $childValue["label"];
                         $newEntity["startOffset"] = $childValue["startOffset"];
                         $newEntity["endOffset"] = $childValue["endOffset"];
                         $newEntity["noExtractorsPerLabel"] = array();
                         $newEntity["noExtractorsPerLabel"]["extractors"] = array();
                         array_push($newEntity["noExtractorsPerLabel"]["extractors"], $childValue["provenance"]);
                         $newEntity["noExtractorsPerLabel"]["count"] = 1;
                         $newEntity["noExtractorsPerLabel"]["relevanceScore"] = array();
                         $newEntity["noExtractorsPerLabel"]["relevanceScore"]["value"] = 1 / 6;
                         $newEntity["confidence"] = array();
                         $newEntity["confidence"]["extractors"] = array();
                         $newEntity["confidence"]["extractors"][$childValue["provenance"]] = $childValue["confidence"];
                         $newEntity["confidence"]["value"] = $childValue["confidence"];
                         $newEntity["clarity"] = array();
                         $newEntity["clarity"]["mean"] = $childValue["confidence"];
                         $newEntity["clarity"]["stddev"] = $this->stats_stddev_func($newEntity["confidence"]["extractors"]);
                         $newEntity["clarity"]["mse"] = pow($newEntity["clarity"]["stddev"], 2) / 1;
                         $newEntity["noExtractorsPerType"] = array();
                         $newEntity["noExtractorsPerResource"] = array();
                         $newEntity["noExtractorsLabelTypePair"] = array();
                         $newEntity["noExtractorsLabelResourcePair"] = array();
                         $newEntity["noExtractorsTypeResourcePair"] = array();
                         $newEntity["noExtractorsLabelTypeResourcePair"] = array();
                         foreach ($childValue["types"] as $keyType => $valueType) {
                             if ($valueType["typeURI"] != null || $valueType["typeURI"] != "") {
                                 $newEntity["noExtractorsPerType"][strtolower($valueType["typeURI"])] = array();
                                 $newEntity["noExtractorsPerType"][strtolower($valueType["typeURI"])]["count"] = 1;
                                 $newEntity["noExtractorsPerType"][strtolower($valueType["typeURI"])]["extractors"] = array();
                                 array_push($newEntity["noExtractorsPerType"][strtolower($valueType["typeURI"])]["extractors"], $childValue["provenance"]);
                                 $newEntity["noExtractorsPerType"][strtolower($valueType["typeURI"])]["relevanceScore"] = array();
                                 $newEntity["noExtractorsPerType"][strtolower($valueType["typeURI"])]["relevanceScore"]["value"] = 1;
                             }
                             if ($valueType["entityURI"] != null) {
                                 $newEntity["noExtractorsPerResource"][$valueType["entityURI"]] = array();
                                 $newEntity["noExtractorsPerResource"][$valueType["entityURI"]]["count"] = 1;
                                 $newEntity["noExtractorsPerResource"][$valueType["entityURI"]]["extractors"] = array();
                                 array_push($newEntity["noExtractorsPerResource"][$valueType["entityURI"]]["extractors"], $childValue["provenance"]);
                                 $newEntity["noExtractorsPerResource"][$valueType["entityURI"]]["relevanceScore"] = array();
                                 $newEntity["noExtractorsPerResource"][$valueType["entityURI"]]["relevanceScore"]["value"] = 1;
                             }
                             $tempTypeValue = "";
                             if ($valueType["typeURI"] != null || $valueType["typeURI"] != "") {
                                 $tempTypeValue = strtolower($valueType["typeURI"]);
                             }
                             if (!array_key_exists($childValue["label"] . "-" . strtolower($tempTypeValue), $newEntity["noExtractorsLabelTypePair"])) {
                                 $newEntity["noExtractorsLabelTypePair"][$childValue["label"] . "-" . strtolower($tempTypeValue)] = array();
                                 $newEntity["noExtractorsLabelTypePair"][$childValue["label"] . "-" . strtolower($tempTypeValue)]["count"] = 1;
                                 $newEntity["noExtractorsLabelTypePair"][$childValue["label"] . "-" . strtolower($tempTypeValue)]["extractors"] = array();
                                 array_push($newEntity["noExtractorsLabelTypePair"][$childValue["label"] . "-" . strtolower($tempTypeValue)]["extractors"], $childValue["provenance"]);
                                 $newEntity["noExtractorsLabelTypePair"][$childValue["label"] . "-" . strtolower($tempTypeValue)]["relevanceScore"] = array();
                                 $newEntity["noExtractorsLabelTypePair"][$childValue["label"] . "-" . strtolower($tempTypeValue)]["relevanceScore"]["value"] = 1;
                             }
                             $tempResourceValue = "";
                             if ($valueType["entityURI"] != null || $valueType["typeURI"] != "") {
                                 $tempResourceValue = $valueType["entityURI"];
                             }
                             if (!array_key_exists($childValue["label"] . "-" . $tempResourceValue, $newEntity["noExtractorsLabelResourcePair"])) {
                                 $newEntity["noExtractorsLabelResourcePair"][$childValue["label"] . "-" . $tempResourceValue] = array();
                                 $newEntity["noExtractorsLabelResourcePair"][$childValue["label"] . "-" . $tempResourceValue]["count"] = 1;
                                 $newEntity["noExtractorsLabelResourcePair"][$childValue["label"] . "-" . $tempResourceValue]["extractors"] = array();
                                 array_push($newEntity["noExtractorsLabelResourcePair"][$childValue["label"] . "-" . $tempResourceValue]["extractors"], $childValue["provenance"]);
                                 $newEntity["noExtractorsLabelResourcePair"][$childValue["label"] . "-" . $tempResourceValue]["relevanceScore"] = array();
                                 $newEntity["noExtractorsLabelResourcePair"][$childValue["label"] . "-" . $tempResourceValue]["relevanceScore"]["value"] = 1;
                             }
                             if (!array_key_exists($tempTypeValue . "-" . $tempResourceValue, $newEntity["noExtractorsTypeResourcePair"])) {
                                 $newEntity["noExtractorsTypeResourcePair"][$tempTypeValue . "-" . $tempResourceValue] = array();
                                 $newEntity["noExtractorsTypeResourcePair"][$tempTypeValue . "-" . $tempResourceValue]["count"] = 1;
                                 $newEntity["noExtractorsTypeResourcePair"][$tempTypeValue . "-" . $tempResourceValue]["extractors"] = array();
                                 array_push($newEntity["noExtractorsTypeResourcePair"][$tempTypeValue . "-" . $tempResourceValue]["extractors"], $childValue["provenance"]);
                                 $newEntity["noExtractorsTypeResourcePair"][$tempTypeValue . "-" . $tempResourceValue]["relevanceScore"] = array();
                                 $newEntity["noExtractorsTypeResourcePair"][$tempTypeValue . "-" . $tempResourceValue]["relevanceScore"]["value"] = 1;
                             }
                             if (!array_key_exists($childValue["label"] . "-" . $tempTypeValue . "-" . $tempResourceValue, $newEntity["noExtractorsLabelTypeResourcePair"])) {
                                 $newEntity["noExtractorsLabelTypeResourcePair"][$childValue["label"] . "-" . $tempTypeValue . "-" . $tempResourceValue] = array();
                                 $newEntity["noExtractorsLabelTypeResourcePair"][$childValue["label"] . "-" . $tempTypeValue . "-" . $tempResourceValue]["count"] = 1;
                                 $newEntity["noExtractorsLabelTypeResourcePair"][$childValue["label"] . "-" . $tempTypeValue . "-" . $tempResourceValue]["extractors"] = array();
                                 array_push($newEntity["noExtractorsLabelTypeResourcePair"][$childValue["label"] . "-" . $tempTypeValue . "-" . $tempResourceValue]["extractors"], $childValue["provenance"]);
                                 $newEntity["noExtractorsLabelTypeResourcePair"][$childValue["label"] . "-" . $tempTypeValue . "-" . $tempResourceValue]["relevanceScore"] = array();
                                 $newEntity["noExtractorsLabelTypeResourcePair"][$childValue["label"] . "-" . $tempTypeValue . "-" . $tempResourceValue]["relevanceScore"]["value"] = 1;
                             }
                         }
                         array_push($parent['annotations']['statistics']['majvoting']['cleanedUpEntities'], $newEntity);
                         array_push($parent['annotations']['features']['cleanedUpEntities'], $extractedEntity);
                         array_push($parent['content']['features']['cleanedUpEntities'], $extractedEntity);
                         //            dd($parent['annotations']['features']['cleanedUpEntities']);
                     }
                 }
             }
         }
         foreach ($parent["annotations"]["statistics"]["majvoting"]["cleanedUpEntities"] as $ent) {
             $foundP = false;
             $foundL = false;
             $foundT = false;
             $foundO = false;
             $types = array_keys($ent["noExtractorsPerType"]);
             if (count($types) == 0) {
                 $newOther = array();
                 $newOther["label"] = $ent["label"];
                 $newOther["startOffset"] = $ent["startOffset"];
                 $newOther["endOffset"] = $ent["endOffset"];
                 array_push($parent['content']['features']['other'], $newOther);
                 continue;
             }
             foreach ($types as $type) {
                 if (stripos($type, "person") || stripos($type, "agent") || stripos($type, "organization")) {
                     if ($foundP == true) {
                         continue;
                     }
                     $foundP = true;
                     $newPeople = array();
                     $newPeople["label"] = $ent["label"];
                     $newPeople["startOffset"] = $ent["startOffset"];
                     $newPeople["endOffset"] = $ent["endOffset"];
                     array_push($parent['content']['features']['people'], $newPeople);
                 } else {
                     if (stripos($type, "place") || stripos($type, "settlement") || stripos($type, "country") || stripos($type, "city") || stripos($type, "location") || stripos($type, "land")) {
                         if ($foundL == true) {
                             continue;
                         }
                         $foundL = true;
                         $newLocation = array();
                         $newLocation["label"] = $ent["label"];
                         $newLocation["startOffset"] = $ent["startOffset"];
                         $newLocation["endOffset"] = $ent["endOffset"];
                         array_push($parent['content']['features']['location'], $newLocation);
                     } else {
                         if (stripos($type, "time") || stripos($type, "period") || stripos($type, "year") || stripos($type, "date")) {
                             if ($foundT == true) {
                                 continue;
                             }
                             $foundT = true;
                             $newTime = array();
                             $newTime["label"] = $ent["label"];
                             $newTime["startOffset"] = $ent["startOffset"];
                             $newTime["endOffset"] = $ent["endOffset"];
                             array_push($parent['content']['features']['time'], $newTime);
                         } else {
                             if ($foundO == true) {
                                 continue;
                             }
                             $foundO = true;
                             $newOther = array();
                             $newOther["label"] = $ent["label"];
                             $newOther["startOffset"] = $ent["startOffset"];
                             $newOther["endOffset"] = $ent["endOffset"];
                             array_push($parent['content']['features']['other'], $newOther);
                         }
                     }
                 }
             }
         }
         $parent['content']['otherCount'] = count($parent['content']['features']['other']);
         $parent['content']['peopleCount'] = count($parent['content']['features']['people']);
         $parent['content']['timeCount'] = count($parent['content']['features']['time']);
         $parent['content']['locationCount'] = count($parent['content']['features']['location']);
         $parent['content']['automatedEventsCount'] = count($parent['content']['features']['automatedEvents']);
         $words = explode(" ", $parent["content"]["description"]);
         $parent["wordCount"] = count($words);
         $parent["totalNoOfFeatures"] = count($parent["content"]["features"]["cleanedUpEntities"]) + count($parent["content"]["features"]["automatedEvents"]);
         //    $content = $result["content"];
         //    $content["features"] = $parent['annotations']['features'];
         //    $result["content"] = $content;
         //  $result["annotations"] = $parent['annotations'];
         //    dd($result);
         try {
             \MongoDB\Entity::where('_id', '=', $id)->forceDelete();
             //    \MongoDB\Entity::insert($result);
             //    $result->save();
             $db->batchInsert($result, array('continueOnError' => true));
         } catch (Exception $e) {
             // ContinueOnError will still throw an exception on duplication, even though it continues, so we just move on.
         }
     }
     \Session::forget('rawArray');
     //       dd("done");
 }
Beispiel #6
0
 /**
  * refresh search index
  */
 public function postRefreshindex()
 {
     $searchComponent = new MediaSearchComponent();
     // amount of units to index per iteration
     $batchsize = 500;
     $from = Input::get('next');
     $unitCount = Entity::whereIn('tags', ['unit'])->count();
     // reset index on start
     if ($from == 0) {
         $searchComponent->clear();
     }
     // reduce last batch to remaining units
     if ($from + $batchsize > $unitCount) {
         $batchsize = $unitCount - $from;
     }
     // all units in this range
     $units = Entity::distinct('_id')->where('tags', ['unit'])->skip($from)->take($batchsize)->get();
     // get keys for each unit in this batch
     $allKeys = [];
     for ($i = $from; $i < $from + $batchsize; $i++) {
         // get data of unit
         $unit = Entity::where('_id', $units[$i][0])->first();
         // map all properties into keys with formats
         $keys = $this->getKeys($unit->attributesToArray());
         // merge keys with set of keys and get the right format (e.g. if it occurs both at string and int we treat all of them as a string
         foreach ($keys as $k => $v) {
             if (!array_key_exists($k, $allKeys)) {
                 $allKeys[$k] = ['key' => $keys[$k]['key'], 'label' => $keys[$k]['label'], 'format' => $keys[$k]['format'], 'documents' => [$keys[$k]['document']]];
             } else {
                 $allKeys[$k]['format'] = $searchComponent->prioritizeFormat([$allKeys[$k]['format'], $keys[$k]['format']]);
                 // add document type if its not in the list yet
                 if (!in_array($keys[$k]['document'], $allKeys[$k]['documents'])) {
                     array_push($allKeys[$k]['documents'], $keys[$k]['document']);
                 }
             }
         }
     }
     $searchComponent->store($allKeys);
     return ['log' => $from . ' to ' . ($from + $batchsize) . ' of ' . $unitCount, 'next' => $from + $batchsize, 'last' => $unitCount];
 }
Beispiel #7
0
 public function updateStats2()
 {
     // take all the jobs for that worker
     if ($crowdAgentJobs = Job::where('metrics.workers.withFilter.' . $this->_id, 'exists', true)->get(['_id'])) {
         //if there is at least one job with that worker
         if (count($crowdAgentJobs->toArray()) > 0) {
             $domains = $formats = $types = $jobids = array();
             $spam = $nonspam = $totalNoOfWorkerunits = 0;
             foreach ($this->workerunits as $a) {
                 $totalNoOfWorkerunits++;
                 if ($a->spam) {
                     $spam++;
                 } else {
                     $nonspam++;
                 }
                 $domains[] = $a->domain;
                 $formats[] = $a->format;
                 $types[] = $a->type;
                 $jobids[] = $a->job_id;
                 $unitids[] = $a->unit_id;
             }
             // $this->WorkerunitStats = array('count'=>$total['count'], 'spam'=>$spam, 'nonspam'=>$nonspam);
             $distinctWorkerunitTypes = array_unique($types);
             // These actually are the Workerunit types
             $distinctMediaFormats = array_unique($formats);
             $distinctMediaDomains = array_unique($domains);
             $workerParticipatedIn = count(array_unique($unitids));
             $cache["workerunits"] = ["count" => $totalNoOfWorkerunits, "spam" => $spam, "nonspam" => $nonspam];
             // take all distinct batches
             $distinctBatchIds = \MongoDB\Entity::whereIn('_id', array_flatten($crowdAgentJobs->toArray()))->distinct('batch_id')->get(['_id']);
             $cache["mediaTypes"] = ["count" => count($distinctWorkerunitTypes), "types" => []];
             foreach ($distinctBatchIds as $distinctBatchId) {
                 $batchParents = array_flatten(\MongoDB\Entity::where('_id', '=', $distinctBatchId[0])->lists('parents'));
                 //print_r($batchParents[0]);
                 $batchParentsType = \MongoDB\Entity::where('_id', '=', $batchParents[0])->distinct('documentType')->get(['documentType']);
                 //print_r(array_flatten($batchParentsType->toArray())[0]);
                 if (isset($cache["mediaTypes"]["types"][array_flatten($batchParentsType->toArray())[0]])) {
                     $cache["mediaTypes"]["types"][array_flatten($batchParentsType->toArray())[0]] = $cache["mediaTypes"][array_flatten($batchParentsType->toArray())[0]] + 1;
                 } else {
                     $cache["mediaTypes"]["types"] = [];
                     $cache["mediaTypes"]["types"][array_flatten($batchParentsType->toArray())[0]] = 1;
                 }
             }
             $cache["mediaTypes"]["distinct"] = sizeof(array_keys($cache["mediaTypes"]["types"]));
             if (count($distinctWorkerunitTypes) > 0) {
                 $cache["jobTypes"] = ["distinct" => count($distinctWorkerunitTypes), "count" => count(array_unique($jobids)), "types" => []];
                 foreach ($distinctWorkerunitTypes as $distinctJobType) {
                     $distinctJobTypeCount = Job::whereIn('_id', array_flatten($crowdAgentJobs->toArray()))->type($distinctJobType)->count();
                     $distinctJobTemplateTypes = Job::whereIn('_id', array_flatten($crowdAgentJobs->toArray()))->type($distinctJobType)->distinct('template')->get()->toArray();
                     $countJobTemplateTypes = Job::whereIn('_id', array_flatten($crowdAgentJobs->toArray()))->type($distinctJobType)->count();
                     //$cache["jobTypes"]["types"][$distinctJobType[0]] = [];
                     $cache["jobTypes"]["types"][$distinctJobType]['distinct'] = count($distinctJobTemplateTypes);
                     $cache["jobTypes"]["types"][$distinctJobType]['count'] = count($countJobTemplateTypes);
                     $cache["jobTypes"]["types"][$distinctJobType]["templates"] = [];
                     foreach ($distinctJobTemplateTypes as $distinctJobTemplateType) {
                         $distinctJobTemplateAndCount = Job::whereIn('_id', array_flatten($crowdAgentJobs->toArray()))->where('template', $distinctJobTemplateType)->count();
                         $cache["jobTypes"]["types"][$distinctJobType]["templates"][$distinctJobTemplateType[0]] = $distinctJobTemplateAndCount;
                     }
                 }
             }
             if (count($distinctMediaFormats) > 0) {
                 $cache["mediaFormats"] = ["distinct" => count($distinctMediaFormats), "count" => $workerParticipatedIn, "formats" => []];
                 $cache["mediaDomains"] = ["distinct" => count($distinctMediaDomains), "count" => $workerParticipatedIn, "domains" => []];
                 foreach ($distinctMediaFormats as $distinctMediaFormat) {
                     $distinctMediaFormatAndCount = \MongoDB\Entity::whereIn('_id', array_flatten($crowdAgentJobs->toArray()))->where('documentType', 'job')->where('format', $distinctMediaFormat)->count();
                     $cache["mediaFormats"]["formats"][$distinctMediaFormat] = $distinctMediaFormatAndCount;
                 }
                 foreach ($distinctMediaDomains as $distinctMediaDomain) {
                     $distinctMediaDomainAndCount = \MongoDB\Entity::whereIn('_id', array_flatten($crowdAgentJobs->toArray()))->where('documentType', 'job')->where('domain', $distinctMediaDomain)->count();
                     $cache["mediaDomains"]["domains"][$distinctMediaDomain] = $distinctMediaDomainAndCount;
                 }
             }
             $jobsAsSpammer = \MongoDB\Entity::whereIn('_id', array_flatten($crowdAgentJobs->toArray()))->whereIn('metrics.spammers.list', [$this->_id])->lists('platformJobId');
             $cache["spammer"]["count"] = count($jobsAsSpammer);
             $cache["spammer"]["jobs"] = array_flatten($jobsAsSpammer);
             $this->cache = $cache;
             $this->save();
         } else {
             $this->save();
         }
     }
 }
Beispiel #8
0
 public static function createMainSearchFiltersCache()
 {
     // $mainSearchFilters['media']['formats'] = $this->getDistinctFieldAndCount('format', ['unit']);
     // $mainSearchFilters['media']['domains'] = $this->getDistinctFieldAndCount('domain', ['unit']);
     $mainSearchFilters['media']['categories'] = static::getCategories();
     $mainSearchFilters['media']['all'] = ["count" => \MongoDB\Entity::whereIn('tags', ['unit'])->count(), "label" => "All Media"];
     //unset($mainSearchFilters['media']['documentTypes']['twrex']);
     $mainSearchFilters['job']['count'] = Entity::where('documentType', 'job')->count();
     $mainSearchFilters['workers']['count'] = \MongoDB\CrowdAgent::all()->count();
     ksort($mainSearchFilters['media']['categories']);
     $entity = new \MongoDB\Temp();
     $entity->_id = "mainSearchFilters";
     $entity->filters = $mainSearchFilters;
     $entity->save();
     return $entity->toArray();
 }
Beispiel #9
0
 public static function getCategories()
 {
     // get projects
     $projects = array_flatten(Entity::whereIn('tags', ['unit'])->distinct('project')->get()->toArray());
     $categories = [];
     // for each project, get the document types and their count
     foreach ($projects as $project) {
         $types = array_flatten(Entity::whereIn('tags', ['unit'])->where('project', $project)->distinct('documentType')->get()->toArray());
         foreach ($types as $type) {
             $categories[$project][$type]['label'] = ucfirst(str_replace('-', ' ', $type));
             $categories[$project][$type]['count'] = Entity::where('project', $project)->where('documentType', $type)->count();
         }
     }
     return $categories;
 }