/** * * @param $data = * array(unitids) */ public function fire($job, $data) { // TODO: some error handling $jobIdsPerType = array(); foreach (Job::get() as $j) { if (isset($jobIdsPerType[$j->type])) { array_push($jobIdsPerType[$j->type], $j->_id); } else { $jobIdsPerType[$j->type] = [$j->_id]; } } foreach ($data as $id) { set_time_limit(30); $unit = \MongoDB\Entity::id($id)->first(); // dd($id); $batch['count'] = count(\MongoDB\Entity::where('documentType', 'batch')->where('parents', 'all', array($unit->_id))->get()->toArray()); $workerunit = array('count' => 0, 'spam' => 0, 'nonSpam' => 0); $workerlist = $workersspam = $workersnonspam = $joblist = array(); foreach (Workerunit::where('unit_id', $unit->_id)->get() as $a) { $joblist[] = $a->job_id; $workerlist[] = $a->crowdAgent_id; if ($a->spam) { $workerunit['spam']++; $workersspam[] = $a->crowdAgent_id; } else { $workerunit['nonSpam']++; $workersnonspam[] = $a->crowdAgent_id; } } $workerunit['count'] = $workerunit['spam'] + $workerunit['nonSpam']; $workers['count'] = count(array_unique($workerlist)); $workers['spam'] = count(array_unique($workersspam)); $workers['nonSpam'] = count(array_unique($workersnonspam)); $workers['potentialSpam'] = count(array_intersect($workersspam, $workersnonspam)); // Jobs $jobs['count'] = count(array_unique($joblist)); foreach (array_keys($jobIdsPerType) as $type) { $jobs['types'] = array(); $count = count(array_intersect(array_unique($joblist), $jobIdsPerType[$type])); if ($count != 0) { $jobs["types"][$type] = $count; } } if (array_key_exists('types', $jobs)) { $jobs['distinct'] = count($jobs['types']); } else { $jobs['distinct'] = 0; } $platformField = array(); $platformField['cf'] = count(\MongoDB\Entity::where('unit_id', $unit->_id)->where('softwareAgent_id', 'cf')->get()->toArray()); $platformField['amt'] = count(\MongoDB\Entity::where('unit_id', $unit->_id)->where('softwareAgent_id', 'amt')->get()->toArray()); // filtered $filteredField = array(); $filteredField['job_ids'] = array_flatten(Job::where('metrics.filteredUnits.list', 'all', array($unit['_id']))->get(['_id'])->toArray()); $filteredField['count'] = count($filteredField['job_ids']); $derivatives = \MongoDB\Entity::whereIn('parents', array($unit->_id))->lists('_id'); $children["count"] = count($derivatives); $children["list"] = $derivatives; $unit->cache = ["jobs" => $jobs, "workers" => $workers, "softwareAgent" => $platformField, "workerunits" => $workerunit, "filtered" => $filteredField, "batches" => $batch, "children" => $children]; $unit->update(); $avg_clarity = \MongoDB\Entity::where('metrics.units.withoutSpam.' . $unit->_id, 'exists', 'true')->avg('metrics.units.withoutSpam.' . $unit->id . '.max_relation_Cos.avg'); if (!isset($avg_clarity)) { $avg_clarity = 0; } $unit->avg_clarity = $avg_clarity; $unit->update(); \Log::debug("Updated unit {$unit->_id}."); } $job->delete(); // the Queue job... }
public function getWasDerivedFromAttribute() { if (isset($this->parents)) { return Entity::whereIn('_id', array_values($this->parents))->remember(1)->get()->toArray(); } }
Route::get('/urlsurls', function () { echo '-------- paintings -------' . PHP_EOL; $results = \MongoDB\Entity::whereIn('documentType', ['painting'])->get(['content.url']); $results2 = \MongoDB\Entity::whereIn('documentType', ['drawing'])->get(['content.url']); foreach ($results as $result) { echo $result['content']['url'] . ' '; echo $result['_id'] . PHP_EOL; } echo PHP_EOL . PHP_EOL; echo '-------- drawings-------' . PHP_EOL; foreach ($results2 as $result) { echo $result['content']['url'] . PHP_EOL; echo $result['_id'] . PHP_EOL; } echo PHP_EOL . PHP_EOL . "["; $results = \MongoDB\Entity::whereIn('documentType', ['painting', 'drawing'])->get(); foreach ($results as $result) { echo $result . "," . PHP_EOL; } echo "]"; exit; return Redirect::to('home'); }); // define routes Route::get('home', 'PagesController@index'); Route::controller('api/v1', '\\Api\\v1\\apiController'); Route::controller('api/media', '\\Api\\media\\apiController'); Route::controller('api/search', '\\Api\\search\\apiController'); Route::controller('api/actions', '\\Api\\actions\\apiController'); Route::controller('api/analytics', '\\Api\\analytics\\apiController'); Route::get('login', 'UserController@login');
public function getUnit() { $result = array(); $aggregateOperators = $this->processAggregateInput(Input::all()); $unitID = Input::get('unit'); $resultT = \MongoDB\Temp::where('_id', $unitID)->get()->toArray(); if (sizeof($resultT) != 0) { $result['infoStat'] = \MongoDB\Temp::where('_id', $unitID)->get()->toArray()[0]; } else { $result['infoStat'] = \MongoDB\Entity::where('_id', $unitID)->get()->toArray()[0]; } $selection = \MongoDB\Entity::raw(function ($collection) use($aggregateOperators, $unitID) { $aggregateOperators['$match']['unit_id'] = $unitID; $aggregateOperators['$match']['documentType'] = 'workerunit'; $aggregateOperators['$project']['job_id'] = array('$ifNull' => array('$' . 'job_id', 0)); $aggregateOperators['$project']['crowdAgent_id'] = array('$ifNull' => array('$' . 'crowdAgent_id', 0)); $aggregateOperators['$project']['type'] = array('$ifNull' => array('$' . 'type', 0)); $aggregateOperators['$project']['workerunit'] = array('$ifNull' => array('$' . 'annotationVector', 0)); $aggregateOperators['$group']['_id'] = '$crowdAgent_id'; $aggregateOperators['$group']['count'] = array('$sum' => 1); $aggregateOperators['$group']['job_id'] = array('$push' => '$job_id'); $aggregateOperators['$group']['type'] = array('$push' => '$type'); $aggregateOperators['$group']['workerunit'] = array('$push' => '$workerunit'); return $collection->aggregate(array(array('$match' => $aggregateOperators['$match']), array('$project' => $aggregateOperators['$project']), array('$group' => $aggregateOperators['$group']))); }); $response = $selection['result']; $crowdAgentIDs = array(); $jobIDs = array(); $result['workerunitContent'] = array(); $result['jobContent'] = array(); $result['agentContent'] = array(); foreach ($response as $agent => $value) { $result['workerunitContent'][$value['_id']] = $value; array_push($crowdAgentIDs, $value['_id']); $workerunitType = array(); foreach ($value['job_id'] as $index => $type) { array_push($jobIDs, $value['job_id'][$index]); if (!array_key_exists($type, $workerunitType)) { $workerunitType[$type] = $value['workerunit'][$index]; } else { $annInfo = $value['workerunit'][$index]; foreach ($annInfo as $k => $v) { if (is_numeric($v)) { $workerunitType[$type][$k] += $v; } else { foreach ($v as $embeddedK => $embeddedV) { $workerunitType[$type][$k][$embeddedK] += $embeddedV; } } } } } $result['workerunitContent'][$value['_id']]['workerunitType'] = array(); foreach ($workerunitType as $job => $workerunit) { $workerunitInfo = array('job_id' => $job, 'workerunit' => $workerunit); $result['workerunitContent'][$value['_id']]['workerunitType'][$job] = $workerunitInfo; } } $crowdAgentIDs = array_unique($crowdAgentIDs); $agents = \MongoDB\CrowdAgent::whereIn('_id', $crowdAgentIDs)->get(array('cache', 'cfWorkerTrust', 'softwareAgent_id'))->toArray(); foreach ($agents as $index => $value) { $result['workerunitContent'][$value['_id']]["valuesWorker"] = $value; // $result['agentContent'][$value['_id']] = $value; } $jobIDs = array_unique($jobIDs); $jobs = \MongoDB\Entity::whereIn('_id', $jobIDs)->get(array('results.withoutSpam.' . $unitID, 'results.withSpam.' . $unitID, 'metrics.units.withoutSpam.' . $unitID, 'metrics.aggUnits', 'metrics.filteredunits', 'metrics.workers.withFilter', 'sofwareAgent_id', 'platformJobId'))->toArray(); foreach ($jobs as $index => $value) { $result['jobContent'][$value['_id']] = $value; $jobConfID = \MongoDB\Entity::where('_id', '=', $value['_id'])->lists('jobConf_id'); $jobTitle = \MongoDB\Entity::whereIn('_id', $jobConfID)->get(array('content.title'))->toArray(); $result['jobContent'][$value['_id']]['jobConf'] = $jobTitle[0]; } foreach ($result['workerunitContent'] as $id => $annInfo) { foreach ($result['workerunitContent'][$id]['workerunitType'] as $index => $value) { $job_id = $value['job_id']; $result['workerunitContent'][$id]['workerunitType'][$index]['job_info'] = $result['jobContent'][$job_id]; } } return $result; }
function createStatisticsForMetadatadescriptionCache($id) { set_time_limit(5200); \Session::flash('rawArray', 1); $db = \DB::getMongoDB(); $db = $db->entities; $result = \MongoDB\Entity::where('_id', $id)->get()->toArray(); // dd($result); foreach ($result as &$parent) { $children = \MongoDB\Entity::whereIn('parents', [$id])->where('content.features.cleanedUpEntities', 'exists', true)->where("documentType", '!=', "annotatedmetadatadescription")->get(['content.features'])->toArray(); // dd($children); $eventChildren = \MongoDB\Entity::whereIn('parents', [$id])->where('content.automatedEvents', 'exists', true)->get(['content.automatedEvents'])->toArray(); $parent['content']['features'] = array(); $parent['content']['features']['cleanedUpEntities'] = array(); $parent['content']['features']['automatedEvents'] = array(); $parent['content']['features']['topics'] = array(); $parent['content']['features']['people'] = array(); $parent['content']['features']['time'] = array(); $parent['content']['features']['location'] = array(); $parent['content']['features']['other'] = array(); $parent['annotations'] = array(); $parent['annotations']['statistics'] = array(); $parent['annotations']['features'] = array(); $parent['annotations']['statistics']['majvoting'] = array(); $parent['annotations']['statistics']["crowdtruthmetrics"] = array(); $parent['annotations']['features']['cleanedUpEntities'] = array(); $parent['annotations']['statistics']['majvoting']["cleanedUpEntities"] = array(); $parent['annotations']['statistics']["crowdtruthmetrics"]["cleanedUpEntities"] = array(); $parent['annotations']['features']['topics'] = array(); foreach ($children as $child) { if (isset($child["content"]["features"]["topics"])) { $parent['annotations']['features']['topics'] = $child["content"]["features"]["topics"]; $parent['content']['features']['topics'] = $child["content"]["features"]["topics"]; } } foreach ($eventChildren as $child) { if (isset($child["content"]["automatedEvents"])) { $parent['annotations']['automatedEvents'] = $child["content"]["automatedEvents"]; $parent['content']['features']['automatedEvents'] = $child["content"]["automatedEvents"]; } } foreach ($children as $child) { if (!empty($child['content']['features']['cleanedUpEntities'])) { foreach ($child['content']['features']['cleanedUpEntities'] as $childKey => $childValue) { $found = false; foreach ($parent['annotations']['statistics']['majvoting']["cleanedUpEntities"] as $parentKey => $parentValue) { if (strtolower($childValue["label"]) == strtolower($parentValue["label"]) && intval($childValue["startOffset"]) == intval($parentValue["startOffset"]) && intval($childValue["endOffset"]) == intval($parentValue["endOffset"])) { $found = true; array_push($parent['annotations']["features"]["cleanedUpEntities"][$parentKey]["extractors"], $childValue["provenance"]); array_push($parent['content']["features"]["cleanedUpEntities"][$parentKey]["extractors"], $childValue["provenance"]); array_push($parent["annotations"]["statistics"]["majvoting"]["cleanedUpEntities"][$parentKey]["noExtractorsPerLabel"]["extractors"], $childValue["provenance"]); $parent['annotations']["statistics"]["majvoting"]["cleanedUpEntities"][$parentKey]["noExtractorsPerLabel"]["count"] += 1; $parent['annotations']["statistics"]["majvoting"]["cleanedUpEntities"][$parentKey]["noExtractorsPerLabel"]["relevanceScore"]["value"] = $parent["annotations"]["statistics"]["majvoting"]["cleanedUpEntities"][$parentKey]["noExtractorsPerLabel"]["count"] / 6; $parent['annotations']["statistics"]["majvoting"]["cleanedUpEntities"][$parentKey]["confidence"]["extractors"][$childValue["provenance"]] = $childValue["confidence"]; $parent['annotations']["statistics"]["majvoting"]["cleanedUpEntities"][$parentKey]["confidence"]["value"] = max($parent["annotations"]["statistics"]["majvoting"]["cleanedUpEntities"][$parentKey]["confidence"]["extractors"]); $noConf = 0; foreach ($parent['annotations']["statistics"]["majvoting"]["cleanedUpEntities"][$parentKey]["confidence"]["extractors"] as $confKey => $confVal) { if ($confVal != null) { $noConf++; } } if ($noConf != 0) { $parent['annotations']["statistics"]["majvoting"]["cleanedUpEntities"][$parentKey]["clarity"]["mean"] = array_sum($parent['annotations']["statistics"]["majvoting"]["cleanedUpEntities"][$parentKey]["confidence"]["extractors"]) / $noConf; $parent['annotations']["statistics"]["majvoting"]["cleanedUpEntities"][$parentKey]["clarity"]["stddev"] = $this->stats_stddev_func($parent['annotations']["statistics"]["majvoting"]["cleanedUpEntities"][$parentKey]["confidence"]["extractors"]); $parent['annotations']["statistics"]["majvoting"]["cleanedUpEntities"][$parentKey]["clarity"]["mse"] = pow($parent['annotations']["statistics"]["majvoting"]["cleanedUpEntities"][$parentKey]["clarity"]["stddev"], 2) / $noConf; } else { $parent['annotations']["statistics"]["majvoting"]["cleanedUpEntities"][$parentKey]["clarity"]["mean"] = $parent['annotations']["statistics"]["majvoting"]["cleanedUpEntities"][$parentKey]["confidence"]["value"]; $parent['annotations']["statistics"]["majvoting"]["cleanedUpEntities"][$parentKey]["clarity"]["stddev"] = $this->stats_stddev_func($parent['annotations']["statistics"]["majvoting"]["cleanedUpEntities"][$parentKey]["confidence"]["extractors"]); $parent['annotations']["statistics"]["majvoting"]["cleanedUpEntities"][$parentKey]["clarity"]["mse"] = pow($parent['annotations']["statistics"]["majvoting"]["cleanedUpEntities"][$parentKey]["clarity"]["stddev"], 2); } foreach ($childValue["types"] as $keyType => $valueType) { $foundType = false; foreach ($parent['annotations']['statistics']['majvoting']["cleanedUpEntities"][$parentKey]["noExtractorsPerType"] as $parentTypeKey => $parentTypeValue) { if ($parentTypeKey == strtolower($valueType["typeURI"])) { if (!in_array($childValue["provenance"], $parent['annotations']['statistics']['majvoting']["cleanedUpEntities"][$parentKey]["noExtractorsPerType"][$parentTypeKey]["extractors"])) { $parent['annotations']['statistics']['majvoting']["cleanedUpEntities"][$parentKey]["noExtractorsPerType"][$parentTypeKey]["count"] += 1; array_push($parent['annotations']['statistics']['majvoting']["cleanedUpEntities"][$parentKey]["noExtractorsPerType"][$parentTypeKey]["extractors"], $childValue["provenance"]); $parent['annotations']['statistics']['majvoting']["cleanedUpEntities"][$parentKey]["noExtractorsPerType"][$parentTypeKey]["relevanceScore"]["value"] = $parent['annotations']['statistics']['majvoting']["cleanedUpEntities"][$parentKey]["noExtractorsPerType"][$parentTypeKey]["count"] / $parent['annotations']["statistics"]["majvoting"]["cleanedUpEntities"][$parentKey]["noExtractorsPerLabel"]["count"]; } $foundType = true; } else { $parent['annotations']['statistics']['majvoting']["cleanedUpEntities"][$parentKey]["noExtractorsPerType"][$parentTypeKey]["relevanceScore"]["value"] = $parent['annotations']['statistics']['majvoting']["cleanedUpEntities"][$parentKey]["noExtractorsPerType"][$parentTypeKey]["count"] / $parent['annotations']["statistics"]["majvoting"]["cleanedUpEntities"][$parentKey]["noExtractorsPerLabel"]["count"]; } if ($foundType == true) { break; } } if ($foundType == false) { if ($valueType["typeURI"] != null) { $parent['annotations']['statistics']['majvoting']["cleanedUpEntities"][$parentKey]["noExtractorsPerType"][strtolower($valueType["typeURI"])] = array(); $parent['annotations']['statistics']['majvoting']["cleanedUpEntities"][$parentKey]["noExtractorsPerType"][strtolower($valueType["typeURI"])]["count"] = 1; $parent['annotations']['statistics']['majvoting']["cleanedUpEntities"][$parentKey]["noExtractorsPerType"][strtolower($valueType["typeURI"])]["extractors"] = array(); array_push($parent['annotations']['statistics']['majvoting']["cleanedUpEntities"][$parentKey]["noExtractorsPerType"][strtolower($valueType["typeURI"])]["extractors"], $childValue["provenance"]); $parent['annotations']['statistics']['majvoting']["cleanedUpEntities"][$parentKey]["noExtractorsPerType"][strtolower($valueType["typeURI"])]["relevanceScore"] = array(); $parent['annotations']['statistics']['majvoting']["cleanedUpEntities"][$parentKey]["noExtractorsPerType"][strtolower($valueType["typeURI"])]["relevanceScore"]["value"] = 1 / $parent["annotations"]["statistics"]["majvoting"]["cleanedUpEntities"][$parentKey]["noExtractorsPerLabel"]["count"]; } } $foundResource = false; foreach ($parent['annotations']['statistics']['majvoting']["cleanedUpEntities"][$parentKey]["noExtractorsPerResource"] as $parentResourceKey => $parentResourceValue) { if (strtolower($parentResourceKey) == strtolower($valueType["entityURI"])) { if (!in_array($childValue["provenance"], $parent['annotations']['statistics']['majvoting']["cleanedUpEntities"][$parentKey]["noExtractorsPerResource"][$parentResourceKey]["extractors"])) { $parent['annotations']['statistics']['majvoting']["cleanedUpEntities"][$parentKey]["noExtractorsPerResource"][$parentResourceKey]["count"] += 1; array_push($parent['annotations']['statistics']['majvoting']["cleanedUpEntities"][$parentKey]["noExtractorsPerResource"][$parentResourceKey]["extractors"], $childValue["provenance"]); $parent['annotations']['statistics']['majvoting']["cleanedUpEntities"][$parentKey]["noExtractorsPerResource"][$parentResourceKey]["relevanceScore"]["value"] = $parent['annotations']['statistics']['majvoting']["cleanedUpEntities"][$parentKey]["noExtractorsPerResource"][$parentResourceKey]["count"] / $parent["annotations"]["statistics"]["majvoting"]["cleanedUpEntities"][$parentKey]["noExtractorsPerLabel"]["count"]; } $foundResource = true; } else { $parent['annotations']['statistics']['majvoting']["cleanedUpEntities"][$parentKey]["noExtractorsPerResource"][$parentResourceKey]["relevanceScore"]["value"] = $parent['annotations']['statistics']['majvoting']["cleanedUpEntities"][$parentKey]["noExtractorsPerResource"][$parentResourceKey]["count"] / $parent["annotations"]["statistics"]["majvoting"]["cleanedUpEntities"][$parentKey]["noExtractorsPerLabel"]["count"]; } if ($foundResource == true) { break; } } if ($foundResource == false) { if ($valueType["entityURI"] != null) { $parent['annotations']['statistics']['majvoting']["cleanedUpEntities"][$parentKey]["noExtractorsPerResource"][$valueType["entityURI"]] = array(); $parent['annotations']['statistics']['majvoting']["cleanedUpEntities"][$parentKey]["noExtractorsPerResource"][$valueType["entityURI"]]["count"] = 1; $parent['annotations']['statistics']['majvoting']["cleanedUpEntities"][$parentKey]["noExtractorsPerResource"][$valueType["entityURI"]]["extractors"] = array(); array_push($parent['annotations']['statistics']['majvoting']["cleanedUpEntities"][$parentKey]["noExtractorsPerResource"][$valueType["entityURI"]]["extractors"], $childValue["provenance"]); $parent['annotations']['statistics']['majvoting']["cleanedUpEntities"][$parentKey]["noExtractorsPerResource"][$valueType["entityURI"]]["relevanceScore"] = array(); $parent['annotations']['statistics']['majvoting']["cleanedUpEntities"][$parentKey]["noExtractorsPerResource"][$valueType["entityURI"]]["relevanceScore"]["value"] = 1 / $parent["annotations"]["statistics"]["majvoting"]["cleanedUpEntities"][$parentKey]["noExtractorsPerLabel"]["count"]; } } $foundLabelTypePair = false; $tempTypeValue = ""; if ($valueType["typeURI"] != null) { $tempTypeValue = strtolower($valueType["typeURI"]); } foreach ($parent['annotations']['statistics']['majvoting']["cleanedUpEntities"][$parentKey]["noExtractorsLabelTypePair"] as $parentLabelTypeKey => $parentLabelTypeValue) { if (strtolower($parentLabelTypeKey) == strtolower($childValue["label"] . "-" . $tempTypeValue)) { if (!in_array($childValue["provenance"], $parent['annotations']['statistics']['majvoting']["cleanedUpEntities"][$parentKey]["noExtractorsLabelTypePair"][$parentLabelTypeKey]["extractors"])) { $parent['annotations']['statistics']['majvoting']["cleanedUpEntities"][$parentKey]["noExtractorsLabelTypePair"][$parentLabelTypeKey]["count"] += 1; array_push($parent['annotations']['statistics']['majvoting']["cleanedUpEntities"][$parentKey]["noExtractorsLabelTypePair"][$parentLabelTypeKey]["extractors"], $childValue["provenance"]); $parent['annotations']['statistics']['majvoting']["cleanedUpEntities"][$parentKey]["noExtractorsLabelTypePair"][$parentLabelTypeKey]["relevanceScore"]["value"] = $parent['annotations']['statistics']['majvoting']["cleanedUpEntities"][$parentKey]["noExtractorsLabelTypePair"][$parentLabelTypeKey]["count"] / $parent["annotations"]["statistics"]["majvoting"]["cleanedUpEntities"][$parentKey]["noExtractorsPerLabel"]["count"]; } $foundLabelTypePair = true; } else { $parent['annotations']['statistics']['majvoting']["cleanedUpEntities"][$parentKey]["noExtractorsLabelTypePair"][$parentLabelTypeKey]["relevanceScore"]["value"] = $parent['annotations']['statistics']['majvoting']["cleanedUpEntities"][$parentKey]["noExtractorsLabelTypePair"][$parentLabelTypeKey]["count"] / $parent["annotations"]["statistics"]["majvoting"]["cleanedUpEntities"][$parentKey]["noExtractorsPerLabel"]["count"]; } if ($foundLabelTypePair == true) { break; } } if ($foundLabelTypePair == false) { $parent['annotations']['statistics']['majvoting']["cleanedUpEntities"][$parentKey]["noExtractorsLabelTypePair"][$childValue["label"] . "-" . $tempTypeValue] = array(); $parent['annotations']['statistics']['majvoting']["cleanedUpEntities"][$parentKey]["noExtractorsLabelTypePair"][$childValue["label"] . "-" . $tempTypeValue]["count"] = 1; $parent['annotations']['statistics']['majvoting']["cleanedUpEntities"][$parentKey]["noExtractorsLabelTypePair"][$childValue["label"] . "-" . $tempTypeValue]["extractors"] = array(); array_push($parent['annotations']['statistics']['majvoting']["cleanedUpEntities"][$parentKey]["noExtractorsLabelTypePair"][$childValue["label"] . "-" . $tempTypeValue]["extractors"], $childValue["provenance"]); $parent['annotations']['statistics']['majvoting']["cleanedUpEntities"][$parentKey]["noExtractorsLabelTypePair"][$childValue["label"] . "-" . $tempTypeValue]["relevanceScore"] = array(); $parent['annotations']['statistics']['majvoting']["cleanedUpEntities"][$parentKey]["noExtractorsLabelTypePair"][$childValue["label"] . "-" . $tempTypeValue]["relevanceScore"]["value"] = 1 / $parent["annotations"]["statistics"]["majvoting"]["cleanedUpEntities"][$parentKey]["noExtractorsPerLabel"]["count"]; } $foundLabelResourcePair = false; $tempResourceValue = ""; if ($valueType["entityURI"] != null) { $tempResourceValue = $valueType["entityURI"]; } foreach ($parent['annotations']['statistics']['majvoting']["cleanedUpEntities"][$parentKey]["noExtractorsLabelResourcePair"] as $parentLabelResourceKey => $parentLabelResourceValue) { if (strtolower($parentLabelResourceKey) == strtolower($childValue["label"] . "-" . $tempResourceValue)) { if (!in_array($childValue["provenance"], $parent['annotations']['statistics']['majvoting']["cleanedUpEntities"][$parentKey]["noExtractorsLabelResourcePair"][$parentLabelResourceKey]["extractors"])) { $parent['annotations']['statistics']['majvoting']["cleanedUpEntities"][$parentKey]["noExtractorsLabelResourcePair"][$parentLabelResourceKey]["count"] += 1; array_push($parent['annotations']['statistics']['majvoting']["cleanedUpEntities"][$parentKey]["noExtractorsLabelResourcePair"][$parentLabelResourceKey]["extractors"], $childValue["provenance"]); $parent['annotations']['statistics']['majvoting']["cleanedUpEntities"][$parentKey]["noExtractorsLabelResourcePair"][$parentLabelResourceKey]["relevanceScore"]["value"] = $parent['annotations']['statistics']['majvoting']["cleanedUpEntities"][$parentKey]["noExtractorsLabelResourcePair"][$parentLabelResourceKey]["count"] / $parent["annotations"]["statistics"]["majvoting"]["cleanedUpEntities"][$parentKey]["noExtractorsPerLabel"]["count"]; } $foundLabelResourcePair = true; } else { $parent['annotations']['statistics']['majvoting']["cleanedUpEntities"][$parentKey]["noExtractorsLabelResourcePair"][$parentLabelResourceKey]["relevanceScore"]["value"] = $parent['annotations']['statistics']['majvoting']["cleanedUpEntities"][$parentKey]["noExtractorsLabelResourcePair"][$parentLabelResourceKey]["count"] / $parent["annotations"]["statistics"]["majvoting"]["cleanedUpEntities"][$parentKey]["noExtractorsPerLabel"]["count"]; } if ($foundLabelResourcePair == true) { break; } } if ($foundLabelResourcePair == false) { $parent['annotations']['statistics']['majvoting']["cleanedUpEntities"][$parentKey]["noExtractorsLabelResourcePair"][$parentLabelResourceKey] = array(); $parent['annotations']['statistics']['majvoting']["cleanedUpEntities"][$parentKey]["noExtractorsLabelResourcePair"][$parentLabelResourceKey]["count"] = 1; $parent['annotations']['statistics']['majvoting']["cleanedUpEntities"][$parentKey]["noExtractorsLabelResourcePair"][$parentLabelResourceKey]["extractors"] = array(); array_push($parent['annotations']['statistics']['majvoting']["cleanedUpEntities"][$parentKey]["noExtractorsLabelResourcePair"][$parentLabelResourceKey]["extractors"], $childValue["provenance"]); $parent['annotations']['statistics']['majvoting']["cleanedUpEntities"][$parentKey]["noExtractorsLabelResourcePair"][$parentLabelResourceKey]["relevanceScore"] = array(); $parent['annotations']['statistics']['majvoting']["cleanedUpEntities"][$parentKey]["noExtractorsLabelResourcePair"][$parentLabelResourceKey]["relevanceScore"]["value"] = 1 / $parent["annotations"]["statistics"]["majvoting"]["cleanedUpEntities"][$parentKey]["noExtractorsPerLabel"]["count"]; } $foundTypeResourcePair = false; $tempTypeValue = ""; $tempResourceValue = ""; if ($valueType["entityURI"] != null) { $tempResourceValue = $valueType["entityURI"]; } if ($valueType["typeURI"] != null) { $tempTypeValue = $valueType["typeURI"]; } foreach ($parent['annotations']['statistics']['majvoting']["cleanedUpEntities"][$parentKey]["noExtractorsTypeResourcePair"] as $parentLabelTypeKey => $parentLabelTypeValue) { if (strtolower($parentLabelTypeKey) == strtolower($tempTypeValue . "-" . $tempResourceValue)) { if (!in_array($childValue["provenance"], $parent['annotations']['statistics']['majvoting']["cleanedUpEntities"][$parentKey]["noExtractorsTypeResourcePair"][$parentLabelTypeKey]["extractors"])) { $parent['annotations']['statistics']['majvoting']["cleanedUpEntities"][$parentKey]["noExtractorsTypeResourcePair"][$parentLabelTypeKey]["count"] += 1; array_push($parent['annotations']['statistics']['majvoting']["cleanedUpEntities"][$parentKey]["noExtractorsTypeResourcePair"][$parentLabelTypeKey]["extractors"], $childValue["provenance"]); $parent['annotations']['statistics']['majvoting']["cleanedUpEntities"][$parentKey]["noExtractorsTypeResourcePair"][$parentLabelTypeKey]["relevanceScore"]["value"] = $parent['annotations']['statistics']['majvoting']["cleanedUpEntities"][$parentKey]["noExtractorsTypeResourcePair"][$parentLabelTypeKey]["count"] / $parent["annotations"]["statistics"]["majvoting"]["cleanedUpEntities"][$parentKey]["noExtractorsPerLabel"]["count"]; } $foundTypeResourcePair = true; } else { $parent['annotations']['statistics']['majvoting']["cleanedUpEntities"][$parentKey]["noExtractorsTypeResourcePair"][$parentLabelTypeKey]["relevanceScore"]["value"] = $parent['annotations']['statistics']['majvoting']["cleanedUpEntities"][$parentKey]["noExtractorsTypeResourcePair"][$parentLabelTypeKey]["count"] / $parent["annotations"]["statistics"]["majvoting"]["cleanedUpEntities"][$parentKey]["noExtractorsPerLabel"]["count"]; } if ($foundTypeResourcePair == true) { break; } } if ($foundTypeResourcePair == false) { $parent['annotations']['statistics']['majvoting']["cleanedUpEntities"][$parentKey]["noExtractorsTypeResourcePair"][$parentLabelTypeKey] = array(); $parent['annotations']['statistics']['majvoting']["cleanedUpEntities"][$parentKey]["noExtractorsTypeResourcePair"][$parentLabelTypeKey]["count"] = 1; $parent['annotations']['statistics']['majvoting']["cleanedUpEntities"][$parentKey]["noExtractorsTypeResourcePair"][$parentLabelTypeKey]["extractors"] = array(); array_push($parent['annotations']['statistics']['majvoting']["cleanedUpEntities"][$parentKey]["noExtractorsTypeResourcePair"][$parentLabelTypeKey]["extractors"], $childValue["provenance"]); $parent['annotations']['statistics']['majvoting']["cleanedUpEntities"][$parentKey]["noExtractorsTypeResourcePair"][$parentLabelTypeKey]["relevanceScore"] = array(); $parent['annotations']['statistics']['majvoting']["cleanedUpEntities"][$parentKey]["noExtractorsTypeResourcePair"][$parentLabelTypeKey]["relevanceScore"]["value"] = 1 / $parent["annotations"]["statistics"]["majvoting"]["cleanedUpEntities"][$parentKey]["noExtractorsPerLabel"]["count"]; } $foundLabelTypeResourcePair = false; foreach ($parent['annotations']['statistics']['majvoting']["cleanedUpEntities"][$parentKey]["noExtractorsLabelTypeResourcePair"] as $parentLabelTypeResourceKey => $parentLabelTypeResourceValue) { if (strtolower($parentLabelTypeResourceKey) == strtolower($childValue["label"] . "-" . $tempTypeValue . "-" . $tempResourceValue)) { if (!in_array($childValue["provenance"], $parent['annotations']['statistics']['majvoting']["cleanedUpEntities"][$parentKey]["noExtractorsLabelTypeResourcePair"][$parentLabelTypeResourceKey]["extractors"])) { $parent['annotations']['statistics']['majvoting']["cleanedUpEntities"][$parentKey]["noExtractorsLabelTypeResourcePair"][$parentLabelTypeResourceKey]["count"] += 1; array_push($parent['annotations']['statistics']['majvoting']["cleanedUpEntities"][$parentKey]["noExtractorsLabelTypeResourcePair"][$parentLabelTypeResourceKey]["extractors"], $childValue["provenance"]); $parent['annotations']['statistics']['majvoting']["cleanedUpEntities"][$parentKey]["noExtractorsLabelTypeResourcePair"][$parentLabelTypeResourceKey]["relevanceScore"]["value"] = $parent['annotations']['statistics']['majvoting']["cleanedUpEntities"][$parentKey]["noExtractorsLabelTypeResourcePair"][$parentLabelTypeResourceKey]["count"] / $parent["annotations"]["statistics"]["majvoting"]["cleanedUpEntities"][$parentKey]["noExtractorsPerLabel"]["count"]; } $foundLabelTypeResourcePair = true; } else { $parent['annotations']['statistics']['majvoting']["cleanedUpEntities"][$parentKey]["noExtractorsLabelTypeResourcePair"][$parentLabelTypeResourceKey]["relevanceScore"]["value"] = $parent['annotations']['statistics']['majvoting']["cleanedUpEntities"][$parentKey]["noExtractorsLabelTypeResourcePair"][$parentLabelTypeResourceKey]["count"] / $parent["annotations"]["statistics"]["majvoting"]["cleanedUpEntities"][$parentKey]["noExtractorsPerLabel"]["count"]; } if ($foundLabelTypeResourcePair == true) { break; } } if ($foundLabelTypeResourcePair == false) { $parent['annotations']['statistics']['majvoting']["cleanedUpEntities"][$parentKey]["noExtractorsLabelTypeResourcePair"][$parentLabelTypeResourceKey] = array(); $parent['annotations']['statistics']['majvoting']["cleanedUpEntities"][$parentKey]["noExtractorsLabelTypeResourcePair"][$parentLabelTypeResourceKey]["count"] = 1; $parent['annotations']['statistics']['majvoting']["cleanedUpEntities"][$parentKey]["noExtractorsLabelTypeResourcePair"][$parentLabelTypeResourceKey]["extractors"] = array(); array_push($parent['annotations']['statistics']['majvoting']["cleanedUpEntities"][$parentKey]["noExtractorsLabelTypeResourcePair"][$parentLabelTypeResourceKey]["extractors"], $childValue["provenance"]); $parent['annotations']['statistics']['majvoting']["cleanedUpEntities"][$parentKey]["noExtractorsLabelTypeResourcePair"][$parentLabelTypeResourceKey]["relevanceScore"] = array(); $parent['annotations']['statistics']['majvoting']["cleanedUpEntities"][$parentKey]["noExtractorsLabelTypeResourcePair"][$parentLabelTypeResourceKey]["relevanceScore"]["value"] = 1 / $parent["annotations"]["statistics"]["majvoting"]["cleanedUpEntities"][$parentKey]["noExtractorsPerLabel"]["count"]; } } } if ($found == true) { break; } // break; } if ($found == false) { $extractedEntity = array(); $extractedEntity["label"] = $childValue["label"]; $extractedEntity["startOffset"] = intval($childValue["startOffset"]); $extractedEntity["endOffset"] = intval($childValue["endOffset"]); $extractedEntity["extractors"] = array(); array_push($extractedEntity["extractors"], $childValue["provenance"]); $newEntity = array(); $newEntity["label"] = $childValue["label"]; $newEntity["startOffset"] = $childValue["startOffset"]; $newEntity["endOffset"] = $childValue["endOffset"]; $newEntity["noExtractorsPerLabel"] = array(); $newEntity["noExtractorsPerLabel"]["extractors"] = array(); array_push($newEntity["noExtractorsPerLabel"]["extractors"], $childValue["provenance"]); $newEntity["noExtractorsPerLabel"]["count"] = 1; $newEntity["noExtractorsPerLabel"]["relevanceScore"] = array(); $newEntity["noExtractorsPerLabel"]["relevanceScore"]["value"] = 1 / 6; $newEntity["confidence"] = array(); $newEntity["confidence"]["extractors"] = array(); $newEntity["confidence"]["extractors"][$childValue["provenance"]] = $childValue["confidence"]; $newEntity["confidence"]["value"] = $childValue["confidence"]; $newEntity["clarity"] = array(); $newEntity["clarity"]["mean"] = $childValue["confidence"]; $newEntity["clarity"]["stddev"] = $this->stats_stddev_func($newEntity["confidence"]["extractors"]); $newEntity["clarity"]["mse"] = pow($newEntity["clarity"]["stddev"], 2) / 1; $newEntity["noExtractorsPerType"] = array(); $newEntity["noExtractorsPerResource"] = array(); $newEntity["noExtractorsLabelTypePair"] = array(); $newEntity["noExtractorsLabelResourcePair"] = array(); $newEntity["noExtractorsTypeResourcePair"] = array(); $newEntity["noExtractorsLabelTypeResourcePair"] = array(); foreach ($childValue["types"] as $keyType => $valueType) { if ($valueType["typeURI"] != null || $valueType["typeURI"] != "") { $newEntity["noExtractorsPerType"][strtolower($valueType["typeURI"])] = array(); $newEntity["noExtractorsPerType"][strtolower($valueType["typeURI"])]["count"] = 1; $newEntity["noExtractorsPerType"][strtolower($valueType["typeURI"])]["extractors"] = array(); array_push($newEntity["noExtractorsPerType"][strtolower($valueType["typeURI"])]["extractors"], $childValue["provenance"]); $newEntity["noExtractorsPerType"][strtolower($valueType["typeURI"])]["relevanceScore"] = array(); $newEntity["noExtractorsPerType"][strtolower($valueType["typeURI"])]["relevanceScore"]["value"] = 1; } if ($valueType["entityURI"] != null) { $newEntity["noExtractorsPerResource"][$valueType["entityURI"]] = array(); $newEntity["noExtractorsPerResource"][$valueType["entityURI"]]["count"] = 1; $newEntity["noExtractorsPerResource"][$valueType["entityURI"]]["extractors"] = array(); array_push($newEntity["noExtractorsPerResource"][$valueType["entityURI"]]["extractors"], $childValue["provenance"]); $newEntity["noExtractorsPerResource"][$valueType["entityURI"]]["relevanceScore"] = array(); $newEntity["noExtractorsPerResource"][$valueType["entityURI"]]["relevanceScore"]["value"] = 1; } $tempTypeValue = ""; if ($valueType["typeURI"] != null || $valueType["typeURI"] != "") { $tempTypeValue = strtolower($valueType["typeURI"]); } if (!array_key_exists($childValue["label"] . "-" . strtolower($tempTypeValue), $newEntity["noExtractorsLabelTypePair"])) { $newEntity["noExtractorsLabelTypePair"][$childValue["label"] . "-" . strtolower($tempTypeValue)] = array(); $newEntity["noExtractorsLabelTypePair"][$childValue["label"] . "-" . strtolower($tempTypeValue)]["count"] = 1; $newEntity["noExtractorsLabelTypePair"][$childValue["label"] . "-" . strtolower($tempTypeValue)]["extractors"] = array(); array_push($newEntity["noExtractorsLabelTypePair"][$childValue["label"] . "-" . strtolower($tempTypeValue)]["extractors"], $childValue["provenance"]); $newEntity["noExtractorsLabelTypePair"][$childValue["label"] . "-" . strtolower($tempTypeValue)]["relevanceScore"] = array(); $newEntity["noExtractorsLabelTypePair"][$childValue["label"] . "-" . strtolower($tempTypeValue)]["relevanceScore"]["value"] = 1; } $tempResourceValue = ""; if ($valueType["entityURI"] != null || $valueType["typeURI"] != "") { $tempResourceValue = $valueType["entityURI"]; } if (!array_key_exists($childValue["label"] . "-" . $tempResourceValue, $newEntity["noExtractorsLabelResourcePair"])) { $newEntity["noExtractorsLabelResourcePair"][$childValue["label"] . "-" . $tempResourceValue] = array(); $newEntity["noExtractorsLabelResourcePair"][$childValue["label"] . "-" . $tempResourceValue]["count"] = 1; $newEntity["noExtractorsLabelResourcePair"][$childValue["label"] . "-" . $tempResourceValue]["extractors"] = array(); array_push($newEntity["noExtractorsLabelResourcePair"][$childValue["label"] . "-" . $tempResourceValue]["extractors"], $childValue["provenance"]); $newEntity["noExtractorsLabelResourcePair"][$childValue["label"] . "-" . $tempResourceValue]["relevanceScore"] = array(); $newEntity["noExtractorsLabelResourcePair"][$childValue["label"] . "-" . $tempResourceValue]["relevanceScore"]["value"] = 1; } if (!array_key_exists($tempTypeValue . "-" . $tempResourceValue, $newEntity["noExtractorsTypeResourcePair"])) { $newEntity["noExtractorsTypeResourcePair"][$tempTypeValue . "-" . $tempResourceValue] = array(); $newEntity["noExtractorsTypeResourcePair"][$tempTypeValue . "-" . $tempResourceValue]["count"] = 1; $newEntity["noExtractorsTypeResourcePair"][$tempTypeValue . "-" . $tempResourceValue]["extractors"] = array(); array_push($newEntity["noExtractorsTypeResourcePair"][$tempTypeValue . "-" . $tempResourceValue]["extractors"], $childValue["provenance"]); $newEntity["noExtractorsTypeResourcePair"][$tempTypeValue . "-" . $tempResourceValue]["relevanceScore"] = array(); $newEntity["noExtractorsTypeResourcePair"][$tempTypeValue . "-" . $tempResourceValue]["relevanceScore"]["value"] = 1; } if (!array_key_exists($childValue["label"] . "-" . $tempTypeValue . "-" . $tempResourceValue, $newEntity["noExtractorsLabelTypeResourcePair"])) { $newEntity["noExtractorsLabelTypeResourcePair"][$childValue["label"] . "-" . $tempTypeValue . "-" . $tempResourceValue] = array(); $newEntity["noExtractorsLabelTypeResourcePair"][$childValue["label"] . "-" . $tempTypeValue . "-" . $tempResourceValue]["count"] = 1; $newEntity["noExtractorsLabelTypeResourcePair"][$childValue["label"] . "-" . $tempTypeValue . "-" . $tempResourceValue]["extractors"] = array(); array_push($newEntity["noExtractorsLabelTypeResourcePair"][$childValue["label"] . "-" . $tempTypeValue . "-" . $tempResourceValue]["extractors"], $childValue["provenance"]); $newEntity["noExtractorsLabelTypeResourcePair"][$childValue["label"] . "-" . $tempTypeValue . "-" . $tempResourceValue]["relevanceScore"] = array(); $newEntity["noExtractorsLabelTypeResourcePair"][$childValue["label"] . "-" . $tempTypeValue . "-" . $tempResourceValue]["relevanceScore"]["value"] = 1; } } array_push($parent['annotations']['statistics']['majvoting']['cleanedUpEntities'], $newEntity); array_push($parent['annotations']['features']['cleanedUpEntities'], $extractedEntity); array_push($parent['content']['features']['cleanedUpEntities'], $extractedEntity); // dd($parent['annotations']['features']['cleanedUpEntities']); } } } } foreach ($parent["annotations"]["statistics"]["majvoting"]["cleanedUpEntities"] as $ent) { $foundP = false; $foundL = false; $foundT = false; $foundO = false; $types = array_keys($ent["noExtractorsPerType"]); if (count($types) == 0) { $newOther = array(); $newOther["label"] = $ent["label"]; $newOther["startOffset"] = $ent["startOffset"]; $newOther["endOffset"] = $ent["endOffset"]; array_push($parent['content']['features']['other'], $newOther); continue; } foreach ($types as $type) { if (stripos($type, "person") || stripos($type, "agent") || stripos($type, "organization")) { if ($foundP == true) { continue; } $foundP = true; $newPeople = array(); $newPeople["label"] = $ent["label"]; $newPeople["startOffset"] = $ent["startOffset"]; $newPeople["endOffset"] = $ent["endOffset"]; array_push($parent['content']['features']['people'], $newPeople); } else { if (stripos($type, "place") || stripos($type, "settlement") || stripos($type, "country") || stripos($type, "city") || stripos($type, "location") || stripos($type, "land")) { if ($foundL == true) { continue; } $foundL = true; $newLocation = array(); $newLocation["label"] = $ent["label"]; $newLocation["startOffset"] = $ent["startOffset"]; $newLocation["endOffset"] = $ent["endOffset"]; array_push($parent['content']['features']['location'], $newLocation); } else { if (stripos($type, "time") || stripos($type, "period") || stripos($type, "year") || stripos($type, "date")) { if ($foundT == true) { continue; } $foundT = true; $newTime = array(); $newTime["label"] = $ent["label"]; $newTime["startOffset"] = $ent["startOffset"]; $newTime["endOffset"] = $ent["endOffset"]; array_push($parent['content']['features']['time'], $newTime); } else { if ($foundO == true) { continue; } $foundO = true; $newOther = array(); $newOther["label"] = $ent["label"]; $newOther["startOffset"] = $ent["startOffset"]; $newOther["endOffset"] = $ent["endOffset"]; array_push($parent['content']['features']['other'], $newOther); } } } } } $parent['content']['otherCount'] = count($parent['content']['features']['other']); $parent['content']['peopleCount'] = count($parent['content']['features']['people']); $parent['content']['timeCount'] = count($parent['content']['features']['time']); $parent['content']['locationCount'] = count($parent['content']['features']['location']); $parent['content']['automatedEventsCount'] = count($parent['content']['features']['automatedEvents']); $words = explode(" ", $parent["content"]["description"]); $parent["wordCount"] = count($words); $parent["totalNoOfFeatures"] = count($parent["content"]["features"]["cleanedUpEntities"]) + count($parent["content"]["features"]["automatedEvents"]); // $content = $result["content"]; // $content["features"] = $parent['annotations']['features']; // $result["content"] = $content; // $result["annotations"] = $parent['annotations']; // dd($result); try { \MongoDB\Entity::where('_id', '=', $id)->forceDelete(); // \MongoDB\Entity::insert($result); // $result->save(); $db->batchInsert($result, array('continueOnError' => true)); } catch (Exception $e) { // ContinueOnError will still throw an exception on duplication, even though it continues, so we just move on. } } \Session::forget('rawArray'); // dd("done"); }
/** * refresh search index */ public function postRefreshindex() { $searchComponent = new MediaSearchComponent(); // amount of units to index per iteration $batchsize = 500; $from = Input::get('next'); $unitCount = Entity::whereIn('tags', ['unit'])->count(); // reset index on start if ($from == 0) { $searchComponent->clear(); } // reduce last batch to remaining units if ($from + $batchsize > $unitCount) { $batchsize = $unitCount - $from; } // all units in this range $units = Entity::distinct('_id')->where('tags', ['unit'])->skip($from)->take($batchsize)->get(); // get keys for each unit in this batch $allKeys = []; for ($i = $from; $i < $from + $batchsize; $i++) { // get data of unit $unit = Entity::where('_id', $units[$i][0])->first(); // map all properties into keys with formats $keys = $this->getKeys($unit->attributesToArray()); // merge keys with set of keys and get the right format (e.g. if it occurs both at string and int we treat all of them as a string foreach ($keys as $k => $v) { if (!array_key_exists($k, $allKeys)) { $allKeys[$k] = ['key' => $keys[$k]['key'], 'label' => $keys[$k]['label'], 'format' => $keys[$k]['format'], 'documents' => [$keys[$k]['document']]]; } else { $allKeys[$k]['format'] = $searchComponent->prioritizeFormat([$allKeys[$k]['format'], $keys[$k]['format']]); // add document type if its not in the list yet if (!in_array($keys[$k]['document'], $allKeys[$k]['documents'])) { array_push($allKeys[$k]['documents'], $keys[$k]['document']); } } } } $searchComponent->store($allKeys); return ['log' => $from . ' to ' . ($from + $batchsize) . ' of ' . $unitCount, 'next' => $from + $batchsize, 'last' => $unitCount]; }
public function updateStats2() { // take all the jobs for that worker if ($crowdAgentJobs = Job::where('metrics.workers.withFilter.' . $this->_id, 'exists', true)->get(['_id'])) { //if there is at least one job with that worker if (count($crowdAgentJobs->toArray()) > 0) { $domains = $formats = $types = $jobids = array(); $spam = $nonspam = $totalNoOfWorkerunits = 0; foreach ($this->workerunits as $a) { $totalNoOfWorkerunits++; if ($a->spam) { $spam++; } else { $nonspam++; } $domains[] = $a->domain; $formats[] = $a->format; $types[] = $a->type; $jobids[] = $a->job_id; $unitids[] = $a->unit_id; } // $this->WorkerunitStats = array('count'=>$total['count'], 'spam'=>$spam, 'nonspam'=>$nonspam); $distinctWorkerunitTypes = array_unique($types); // These actually are the Workerunit types $distinctMediaFormats = array_unique($formats); $distinctMediaDomains = array_unique($domains); $workerParticipatedIn = count(array_unique($unitids)); $cache["workerunits"] = ["count" => $totalNoOfWorkerunits, "spam" => $spam, "nonspam" => $nonspam]; // take all distinct batches $distinctBatchIds = \MongoDB\Entity::whereIn('_id', array_flatten($crowdAgentJobs->toArray()))->distinct('batch_id')->get(['_id']); $cache["mediaTypes"] = ["count" => count($distinctWorkerunitTypes), "types" => []]; foreach ($distinctBatchIds as $distinctBatchId) { $batchParents = array_flatten(\MongoDB\Entity::where('_id', '=', $distinctBatchId[0])->lists('parents')); //print_r($batchParents[0]); $batchParentsType = \MongoDB\Entity::where('_id', '=', $batchParents[0])->distinct('documentType')->get(['documentType']); //print_r(array_flatten($batchParentsType->toArray())[0]); if (isset($cache["mediaTypes"]["types"][array_flatten($batchParentsType->toArray())[0]])) { $cache["mediaTypes"]["types"][array_flatten($batchParentsType->toArray())[0]] = $cache["mediaTypes"][array_flatten($batchParentsType->toArray())[0]] + 1; } else { $cache["mediaTypes"]["types"] = []; $cache["mediaTypes"]["types"][array_flatten($batchParentsType->toArray())[0]] = 1; } } $cache["mediaTypes"]["distinct"] = sizeof(array_keys($cache["mediaTypes"]["types"])); if (count($distinctWorkerunitTypes) > 0) { $cache["jobTypes"] = ["distinct" => count($distinctWorkerunitTypes), "count" => count(array_unique($jobids)), "types" => []]; foreach ($distinctWorkerunitTypes as $distinctJobType) { $distinctJobTypeCount = Job::whereIn('_id', array_flatten($crowdAgentJobs->toArray()))->type($distinctJobType)->count(); $distinctJobTemplateTypes = Job::whereIn('_id', array_flatten($crowdAgentJobs->toArray()))->type($distinctJobType)->distinct('template')->get()->toArray(); $countJobTemplateTypes = Job::whereIn('_id', array_flatten($crowdAgentJobs->toArray()))->type($distinctJobType)->count(); //$cache["jobTypes"]["types"][$distinctJobType[0]] = []; $cache["jobTypes"]["types"][$distinctJobType]['distinct'] = count($distinctJobTemplateTypes); $cache["jobTypes"]["types"][$distinctJobType]['count'] = count($countJobTemplateTypes); $cache["jobTypes"]["types"][$distinctJobType]["templates"] = []; foreach ($distinctJobTemplateTypes as $distinctJobTemplateType) { $distinctJobTemplateAndCount = Job::whereIn('_id', array_flatten($crowdAgentJobs->toArray()))->where('template', $distinctJobTemplateType)->count(); $cache["jobTypes"]["types"][$distinctJobType]["templates"][$distinctJobTemplateType[0]] = $distinctJobTemplateAndCount; } } } if (count($distinctMediaFormats) > 0) { $cache["mediaFormats"] = ["distinct" => count($distinctMediaFormats), "count" => $workerParticipatedIn, "formats" => []]; $cache["mediaDomains"] = ["distinct" => count($distinctMediaDomains), "count" => $workerParticipatedIn, "domains" => []]; foreach ($distinctMediaFormats as $distinctMediaFormat) { $distinctMediaFormatAndCount = \MongoDB\Entity::whereIn('_id', array_flatten($crowdAgentJobs->toArray()))->where('documentType', 'job')->where('format', $distinctMediaFormat)->count(); $cache["mediaFormats"]["formats"][$distinctMediaFormat] = $distinctMediaFormatAndCount; } foreach ($distinctMediaDomains as $distinctMediaDomain) { $distinctMediaDomainAndCount = \MongoDB\Entity::whereIn('_id', array_flatten($crowdAgentJobs->toArray()))->where('documentType', 'job')->where('domain', $distinctMediaDomain)->count(); $cache["mediaDomains"]["domains"][$distinctMediaDomain] = $distinctMediaDomainAndCount; } } $jobsAsSpammer = \MongoDB\Entity::whereIn('_id', array_flatten($crowdAgentJobs->toArray()))->whereIn('metrics.spammers.list', [$this->_id])->lists('platformJobId'); $cache["spammer"]["count"] = count($jobsAsSpammer); $cache["spammer"]["jobs"] = array_flatten($jobsAsSpammer); $this->cache = $cache; $this->save(); } else { $this->save(); } } }
public static function createMainSearchFiltersCache() { // $mainSearchFilters['media']['formats'] = $this->getDistinctFieldAndCount('format', ['unit']); // $mainSearchFilters['media']['domains'] = $this->getDistinctFieldAndCount('domain', ['unit']); $mainSearchFilters['media']['categories'] = static::getCategories(); $mainSearchFilters['media']['all'] = ["count" => \MongoDB\Entity::whereIn('tags', ['unit'])->count(), "label" => "All Media"]; //unset($mainSearchFilters['media']['documentTypes']['twrex']); $mainSearchFilters['job']['count'] = Entity::where('documentType', 'job')->count(); $mainSearchFilters['workers']['count'] = \MongoDB\CrowdAgent::all()->count(); ksort($mainSearchFilters['media']['categories']); $entity = new \MongoDB\Temp(); $entity->_id = "mainSearchFilters"; $entity->filters = $mainSearchFilters; $entity->save(); return $entity->toArray(); }
public static function getCategories() { // get projects $projects = array_flatten(Entity::whereIn('tags', ['unit'])->distinct('project')->get()->toArray()); $categories = []; // for each project, get the document types and their count foreach ($projects as $project) { $types = array_flatten(Entity::whereIn('tags', ['unit'])->where('project', $project)->distinct('documentType')->get()->toArray()); foreach ($types as $type) { $categories[$project][$type]['label'] = ucfirst(str_replace('-', ' ', $type)); $categories[$project][$type]['count'] = Entity::where('project', $project)->where('documentType', $type)->count(); } } return $categories; }