function ml_compute_ranking($r_id) { global $dbh1, $dbh2; // Global variables for database connections $sql = 'SELECT student_id FROM student_recruiter WHERE recruiter_id=' . $r_id; $result = mysql_query($sql, $dbh1); $rank_dist = []; while ($data = mysql_fetch_array($result)) { $s_id = $data['student_id']; // Extract feature vector from s_id $sql1 = 'SELECT feature FROM ml_feature_vectors WHERE recruiter_id=' . $r_id . ' AND student_id=' . $s_id; $result1 = mysql_query($sql1, $dbh2); $data1 = mysql_fetch_array($result1); $feature = $data1['feature']; // Convert feature vector string to array of integers $feature_array = array(); for ($i = 0; $i < strlen($feature); $i++) { $feature_array[$i] = (int) $feature[$i]; } // Create best array --> For Comparison $best_array = array(); for ($i = 0; $i < strlen($feature); $i++) { $best_array[$i] = 5; } // Calculate distance between feature vector and best vector based on Cosine similarity distance $cdist = cosinus($feature_array, $best_array); $rank_dist[$s_id] = $cdist; } //while (student) // Sort $rank_dist array arsort($rank_dist); $rank_dist_keys = array_keys($rank_dist); $rank_id = ''; for ($i = 0; $i < count($rank_dist_keys); $i++) { if ($i == count($rank_dist_keys) - 1) { $rank_id = $rank_id . $rank_dist_keys[$i]; } else { $rank_id = $rank_id . $rank_dist_keys[$i] . ','; } } $sql2 = 'SELECT * FROM ml_rank WHERE recruiter_id=' . $r_id; $result2 = mysql_query($sql2, $dbh2); $count2 = mysql_num_rows($result2); if ($count2 == 0) { $sql3 = 'INSERT INTO ml_rank VALUES(' . $r_id . ',"' . $rank_id . '")'; } else { $sql3 = 'UPDATE ml_rank SET rank_id="' . $rank_id . '" WHERE recruiter_id=' . $r_id; } mysql_query($sql3, $dbh2); }
<!--Zadanie **. Napisz funkcję mySin($x, $iterNum) i myCos($x, $iterNum), która oblicza aproksymacje funkcji sinus i cosinus używając następujących wzorów: Żeby to zrobić najpierw napisz funkcje do wyliczania silni, potem do wyliczania potęgi a na koniec połącz je w całość.--> <!DOCTYPE html> <html lang="pl-PL"> <head> <meta charset="utf-8"> </head> <?php $x = 4; $iterNum = 4; echo "Cosinus z {$x} to: " . cosinus($x, $iterNum) . "<br>"; echo "Sinus z {$x} to: " . sinus($x, $iterNum) . "<br>"; function sinus($x, $iterNum) { $sin = 0; $k = 3; //początkowa wartość dla silni i wykladnka potegi for ($i = 1; $i <= $iterNum; $i++) { $zm = power($x, $k) / factorial($k); //wyliczenie potega/silnia if ($i % 2 == 1) { $zm = $zm * -1; //zmiana znaków co drugie przejście } $k += 2; $sin += $zm; //dodanie potegi/silnia do wyniku } return $sin + $x;
function triangle_rectangle($r) { //adj/opp/hyp //renvoie la longueur manquante $angle = tri_rect_angle($r); if (!$r[0]) { $r[0] = $r[2] * cosinus($angle); } if (!$r[1]) { $r[1] = $r[2] * sinus($angle); } if (!$r[2]) { $r[2] = $r[0] / cosinus($angle); } return $r; }
$feature_array = array(); for ($i = 0; $i < strlen($feature); $i++) { $feature_array[$i] = (int) $feature[$i]; } // echo 'Feature Vector (Array) <br>'; // print_r($feature_array); // echo '<br> Feature Vector Array Length<br>'; // echo count($feature_array).'<br>'; // Create best array --> For Comparison $best_array = array(); for ($i = 0; $i < strlen($feature); $i++) { $best_array[$i] = 5; } // Calculate distance between feature vector and best vector based on Euclidean and Cosine similarity distance $edist = euclidean($feature_array, $best_array); $cdist = cosinus($feature_array, $best_array); // echo '<br>-----------------------------------------------------------------------------------------------------<br>'; // echo '<br> Euclidean Distance (for ranking): '.$edist.'<br>'; // echo '<br> Cosine Similarity Distance (for ranking): '.$cdist.'<br>'; echo '<div style=" width:100%; height:45px; padding:10px; margin:5px -10px; background-color:#1B5FA3; color:white; font-size:20px;">Step 3: Rank based on cosine similarity score <br> Cosine similarity score : ' . $cdist . ' </div>'; echo '</div>'; // Function Implementation of Euclidean Distance and Cosine Similarity Distance /** * Euclidean distance * d(a, b) = sqrt( summation{i=1,n}((b[i] - a[i]) ^ 2) ) * * @param array $a * @param array $b * @return boolean */ function euclidean(array $a, array $b)
public static function createStatisticsForMetadatadescriptionCache() { set_time_limit(5200); \Session::flash('rawArray', 1); $db = \DB::getMongoDB(); $db = $db->temp; $result = \MongoDB\Entity::where('documentType', '=', 'metadatadescription')->where('content.description', 'exists', true)->get()->toArray(); if (count($result) > 0) { foreach ($result as &$parent) { $children = \MongoDB\Entity::whereIn('parents', [$parent['_id']])->where('content.features.entities', 'exists', true)->get(['content.features'])->toArray(); $parent['content']['statistics']['majvoting'] = array(); $parent['content']['statistics']["crowdtruth"] = array(); $parent['content']['features']['entities'] = array(); //$parent['content']['features']['initialEntities'] = array(); $parent['content']['features']['topics'] = array(); foreach ($children as $child) { if (isset($child["content"]["features"]["topics"])) { $parent['content']['features']['topics'] = $child["content"]["features"]["topics"]; } } foreach ($children as $child) { if (!empty($child['content']['features']['initialEntities'])) { foreach ($child['content']['features']['initialEntities'] as $childKey => $childValue) { $found = false; foreach ($parent['content']['statistics']['majvoting'] as $parentKey => $parentValue) { if (strtolower($childValue["label"]) == strtolower($parentValue["label"]) && intval($childValue["startOffset"]) == intval($parentValue["startOffset"]) && intval($childValue["endOffset"]) == intval($parentValue["endOffset"])) { $found = true; // $parent["content"]["statistics"]["entities"][$parentKey]["relevanceScore"]["count"] += 1; // $parent["content"]["statistics"]["entities"][$parentKey]["relevanceScore"]["value"] = $parent["content"]["statistics"]["entities"][$parentKey]["relevanceScore"]["count"] / 6; array_push($parent["content"]["features"]["entities"][$parentKey]["extractors"], $childValue["provenance"]); array_push($parent["content"]["statistics"]["majvoting"][$parentKey]["noExtractorsPerLabel"]["extractors"], $childValue["provenance"]); $parent["content"]["statistics"]["majvoting"][$parentKey]["noExtractorsPerLabel"]["count"] += 1; $parent["content"]["statistics"]["majvoting"][$parentKey]["noExtractorsPerLabel"]["relevanceScore"]["value"] = $parent["content"]["statistics"]["majvoting"][$parentKey]["noExtractorsPerLabel"]["count"] / 6; $parent["content"]["statistics"]["majvoting"][$parentKey]["confidence"]["extractors"][$childValue["provenance"]] = $childValue["confidence"]; $parent["content"]["statistics"]["majvoting"][$parentKey]["confidence"]["value"] = max($parent["content"]["statistics"]["majvoting"][$parentKey]["confidence"]["extractors"]); $noConf = 0; foreach ($parent["content"]["statistics"]["majvoting"][$parentKey]["confidence"]["extractors"] as $confKey => $confVal) { if ($confVal != null) { $noConf++; } } if ($noConf != 0) { $parent["content"]["statistics"]["majvoting"][$parentKey]["clarity"]["mean"] = array_sum($parent["content"]["statistics"]["majvoting"][$parentKey]["confidence"]["extractors"]) / $noConf; $parent["content"]["statistics"]["majvoting"][$parentKey]["clarity"]["stddev"] = stats_stddev($parent["content"]["statistics"]["majvoting"][$parentKey]["confidence"]["extractors"]); $parent["content"]["statistics"]["majvoting"][$parentKey]["clarity"]["mse"] = pow($parent["content"]["statistics"]["majvoting"][$parentKey]["clarity"]["stddev"], 2) / $noConf; } else { $parent["content"]["statistics"]["majvoting"][$parentKey]["clarity"]["mean"] = $parent["content"]["statistics"]["majvoting"][$parentKey]["confidence"]["value"]; $parent["content"]["statistics"]["majvoting"][$parentKey]["clarity"]["stddev"] = stats_stddev($parent["content"]["statistics"]["majvoting"][$parentKey]["confidence"]["extractors"]); $parent["content"]["statistics"]["majvoting"][$parentKey]["clarity"]["mse"] = pow($parent["content"]["statistics"]["majvoting"][$parentKey]["clarity"]["stddev"], 2); } foreach ($childValue["types"] as $keyType => $valueType) { $foundType = false; foreach ($parent['content']['statistics']['majvoting'][$parentKey]["noExtractorsPerType"] as $parentTypeKey => $parentTypeValue) { if (strtolower($parentTypeKey) == strtolower($valueType["typeURI"])) { if (!in_array($childValue["provenance"], $parent['content']['statistics']['majvoting'][$parentKey]["noExtractorsPerType"][$parentTypeKey]["extractors"])) { $parent['content']['statistics']['majvoting'][$parentKey]["noExtractorsPerType"][$parentTypeKey]["count"] += 1; array_push($parent['content']['statistics']['majvoting'][$parentKey]["noExtractorsPerType"][$parentTypeKey]["extractors"], $childValue["provenance"]); $parent['content']['statistics']['majvoting'][$parentKey]["noExtractorsPerType"][$parentTypeKey]["relevanceScore"]["value"] = $parent['content']['statistics']['majvoting'][$parentKey]["noExtractorsPerType"][$parentTypeKey]["count"] / $parent["content"]["statistics"]["majvoting"][$parentKey]["noExtractorsPerLabel"]["count"]; } $foundType = true; } else { $parent['content']['statistics']['majvoting'][$parentKey]["noExtractorsPerType"][$parentTypeKey]["relevanceScore"]["value"] = $parent['content']['statistics']['majvoting'][$parentKey]["noExtractorsPerType"][$parentTypeKey]["count"] / $parent["content"]["statistics"]["majvoting"][$parentKey]["noExtractorsPerLabel"]["count"]; } if ($foundType == true) { break; } } if ($foundType == false) { if ($valueType["typeURI"] != null) { $parent['content']['statistics']['majvoting'][$parentKey]["noExtractorsPerType"][$valueType["typeURI"]] = array(); $parent['content']['statistics']['majvoting'][$parentKey]["noExtractorsPerType"][$valueType["typeURI"]]["count"] = 1; $parent['content']['statistics']['majvoting'][$parentKey]["noExtractorsPerType"][$valueType["typeURI"]]["extractors"] = array(); array_push($parent['content']['statistics']['majvoting'][$parentKey]["noExtractorsPerType"][$valueType["typeURI"]]["extractors"], $childValue["provenance"]); $parent['content']['statistics']['majvoting'][$parentKey]["noExtractorsPerType"][$valueType["typeURI"]]["relevanceScore"] = array(); $parent['content']['statistics']['majvoting'][$parentKey]["noExtractorsPerType"][$valueType["typeURI"]]["relevanceScore"]["value"] = 1 / $parent["content"]["statistics"]["majvoting"][$parentKey]["noExtractorsPerLabel"]["count"]; } } $foundResource = false; foreach ($parent['content']['statistics']['majvoting'][$parentKey]["noExtractorsPerResource"] as $parentResourceKey => $parentResourceValue) { if (strtolower($parentResourceKey) == strtolower($valueType["entityURI"])) { if (!in_array($childValue["provenance"], $parent['content']['statistics']['majvoting'][$parentKey]["noExtractorsPerResource"][$parentResourceKey]["extractors"])) { $parent['content']['statistics']['majvoting'][$parentKey]["noExtractorsPerResource"][$parentResourceKey]["count"] += 1; array_push($parent['content']['statistics']['majvoting'][$parentKey]["noExtractorsPerResource"][$parentResourceKey]["extractors"], $childValue["provenance"]); $parent['content']['statistics']['majvoting'][$parentKey]["noExtractorsPerResource"][$parentResourceKey]["relevanceScore"]["value"] = $parent['content']['statistics']['majvoting'][$parentKey]["noExtractorsPerResource"][$parentResourceKey]["count"] / $parent["content"]["statistics"]["majvoting"][$parentKey]["noExtractorsPerLabel"]["count"]; } $foundResource = true; } else { $parent['content']['statistics']['majvoting'][$parentKey]["noExtractorsPerResource"][$parentResourceKey]["relevanceScore"]["value"] = $parent['content']['statistics']['majvoting'][$parentKey]["noExtractorsPerResource"][$parentResourceKey]["count"] / $parent["content"]["statistics"]["majvoting"][$parentKey]["noExtractorsPerLabel"]["count"]; } if ($foundResource == true) { break; } } if ($foundResource == false) { if ($valueType["entityURI"] != null) { $parent['content']['statistics']['majvoting'][$parentKey]["noExtractorsPerResource"][$valueType["entityURI"]] = array(); $parent['content']['statistics']['majvoting'][$parentKey]["noExtractorsPerResource"][$valueType["entityURI"]]["count"] = 1; $parent['content']['statistics']['majvoting'][$parentKey]["noExtractorsPerResource"][$valueType["entityURI"]]["extractors"] = array(); array_push($parent['content']['statistics']['majvoting'][$parentKey]["noExtractorsPerResource"][$valueType["entityURI"]]["extractors"], $childValue["provenance"]); $parent['content']['statistics']['majvoting'][$parentKey]["noExtractorsPerResource"][$valueType["entityURI"]]["relevanceScore"] = array(); $parent['content']['statistics']['majvoting'][$parentKey]["noExtractorsPerResource"][$valueType["entityURI"]]["relevanceScore"]["value"] = 1 / $parent["content"]["statistics"]["majvoting"][$parentKey]["noExtractorsPerLabel"]["count"]; } } $foundLabelTypePair = false; $tempTypeValue = ""; if ($valueType["typeURI"] != null) { $tempTypeValue = $valueType["typeURI"]; } foreach ($parent['content']['statistics']['majvoting'][$parentKey]["noExtractorsLabelTypePair"] as $parentLabelTypeKey => $parentLabelTypeValue) { if (strtolower($parentLabelTypeKey) == strtolower($childValue["label"] . "-" . $tempTypeValue)) { if (!in_array($childValue["provenance"], $parent['content']['statistics']['majvoting'][$parentKey]["noExtractorsLabelTypePair"][$parentLabelTypeKey]["extractors"])) { $parent['content']['statistics']['majvoting'][$parentKey]["noExtractorsLabelTypePair"][$parentLabelTypeKey]["count"] += 1; array_push($parent['content']['statistics']['majvoting'][$parentKey]["noExtractorsLabelTypePair"][$parentLabelTypeKey]["extractors"], $childValue["provenance"]); $parent['content']['statistics']['majvoting'][$parentKey]["noExtractorsLabelTypePair"][$parentLabelTypeKey]["relevanceScore"]["value"] = $parent['content']['statistics']['majvoting'][$parentKey]["noExtractorsLabelTypePair"][$parentLabelTypeKey]["count"] / $parent["content"]["statistics"]["majvoting"][$parentKey]["noExtractorsPerLabel"]["count"]; } $foundLabelTypePair = true; } else { $parent['content']['statistics']['majvoting'][$parentKey]["noExtractorsLabelTypePair"][$parentLabelTypeKey]["relevanceScore"]["value"] = $parent['content']['statistics']['majvoting'][$parentKey]["noExtractorsLabelTypePair"][$parentLabelTypeKey]["count"] / $parent["content"]["statistics"]["majvoting"][$parentKey]["noExtractorsPerLabel"]["count"]; } if ($foundLabelTypePair == true) { break; } } if ($foundLabelTypePair == false) { $parent['content']['statistics']['majvoting'][$parentKey]["noExtractorsLabelTypePair"][$childValue["label"] . "-" . $tempTypeValue] = array(); $parent['content']['statistics']['majvoting'][$parentKey]["noExtractorsLabelTypePair"][$childValue["label"] . "-" . $tempTypeValue]["count"] = 1; $parent['content']['statistics']['majvoting'][$parentKey]["noExtractorsLabelTypePair"][$childValue["label"] . "-" . $tempTypeValue]["extractors"] = array(); array_push($parent['content']['statistics']['majvoting'][$parentKey]["noExtractorsLabelTypePair"][$childValue["label"] . "-" . $tempTypeValue]["extractors"], $childValue["provenance"]); $parent['content']['statistics']['majvoting'][$parentKey]["noExtractorsLabelTypePair"][$childValue["label"] . "-" . $tempTypeValue]["relevanceScore"] = array(); $parent['content']['statistics']['majvoting'][$parentKey]["noExtractorsLabelTypePair"][$childValue["label"] . "-" . $tempTypeValue]["relevanceScore"]["value"] = 1 / $parent["content"]["statistics"]["majvoting"][$parentKey]["noExtractorsPerLabel"]["count"]; } /* $foundLabelResourcePair = false; $tempResourceValue = ""; if ($valueType["entityURI"] != null) { $tempResourceValue = $valueType["entityURI"]; } foreach ($parent['content']['statistics']['majvoting'][$parentKey]["noExtractorsLabelResourcePair"] as $parentLabelResourceKey => $parentLabelResourceValue) { if (strtolower($parentLabelResourceKey) == strtolower($childValue["label"] . "-" . $tempResourceValue)) { if (!in_array($childValue["provenance"], $parent['content']['statistics']['majvoting'][$parentKey]["noExtractorsLabelResourcePair"][$parentLabelResourceKey]["extractors"])) { //echo $parent["_id"]; //dd($parent['content']['statistics']['majvoting'][$parentKey]["noExtractorsLabelResourcePair"]); $parent['content']['statistics']['majvoting'][$parentKey]["noExtractorsLabelResourcePair"][$childValue["label"] . "-" . $tempResourceValue]["count"] += 1; array_push($parent['content']['statistics']['majvoting'][$parentKey]["noExtractorsLabelResourcePair"][$childValue["label"] . "-" . $tempResourceValue]["extractors"], $childValue["provenance"]); $parent['content']['statistics']['majvoting'][$parentKey]["noExtractorsLabelResourcePair"][$childValue["label"] . "-" . $tempResourceValue]["relevanceScore"]["value"] = $parent['content']['statistics']['majvoting'][$parentKey]["noExtractorsLabelResourcePair"][$childValue["label"] . "-" . $tempResourceValue]["count"] / $parent["content"]["statistics"]["majvoting"][$parentKey]["noExtractorsPerLabel"]["count"]; } $foundLabelResourcePair = true; } else { $parent['content']['statistics']['majvoting'][$parentKey]["noExtractorsLabelResourcePair"][$parentLabelResourceKey]["relevanceScore"]["value"] = $parent['content']['statistics']['majvoting'][$parentKey]["noExtractorsLabelResourcePair"][$parentLabelResourceKey]["count"] / $parent["content"]["statistics"]["majvoting"][$parentKey]["noExtractorsPerLabel"]["count"]; } if ($foundLabelResourcePair == true) { break; } } if ($foundLabelResourcePair == false) { $parent['content']['statistics']['majvoting'][$parentKey]["noExtractorsLabelResourcePair"][$childValue["label"] . "-" . $tempResourceValue] = array(); $parent['content']['statistics']['majvoting'][$parentKey]["noExtractorsLabelResourcePair"][$childValue["label"] . "-" . $tempResourceValue]["count"] = 1; $parent['content']['statistics']['majvoting'][$parentKey]["noExtractorsLabelResourcePair"][$childValue["label"] . "-" . $tempResourceValue]["extractors"] = array(); array_push($parent['content']['statistics']['majvoting'][$parentKey]["noExtractorsLabelResourcePair"][$childValue["label"] . "-" . $tempResourceValue]["extractors"], $childValue["provenance"]); $parent['content']['statistics']['majvoting'][$parentKey]["noExtractorsLabelResourcePair"][$childValue["label"] . "-" . $tempResourceValue]["relevanceScore"] = array(); $parent['content']['statistics']['majvoting'][$parentKey]["noExtractorsLabelResourcePair"][$childValue["label"] . "-" . $tempResourceValue]["relevanceScore"]["value"] = 1/$parent["content"]["statistics"]["majvoting"][$parentKey]["noExtractorsPerLabel"]["count"]; } */ $foundTypeResourcePair = false; $tempTypeValue = ""; $tempResourceValue = ""; if ($valueType["entityURI"] != null) { $tempResourceValue = $valueType["entityURI"]; } if ($valueType["typeURI"] != null) { $tempTypeValue = $valueType["typeURI"]; } foreach ($parent['content']['statistics']['majvoting'][$parentKey]["noExtractorsTypeResourcePair"] as $parentLabelTypeKey => $parentLabelTypeValue) { if (strtolower($parentLabelTypeKey) == strtolower($tempTypeValue . "-" . $tempResourceValue)) { if (!in_array($childValue["provenance"], $parent['content']['statistics']['majvoting'][$parentKey]["noExtractorsTypeResourcePair"][$parentLabelTypeKey]["extractors"])) { $parent['content']['statistics']['majvoting'][$parentKey]["noExtractorsTypeResourcePair"][$parentLabelTypeKey]["count"] += 1; array_push($parent['content']['statistics']['majvoting'][$parentKey]["noExtractorsTypeResourcePair"][$parentLabelTypeKey]["extractors"], $childValue["provenance"]); $parent['content']['statistics']['majvoting'][$parentKey]["noExtractorsTypeResourcePair"][$parentLabelTypeKey]["relevanceScore"]["value"] = $parent['content']['statistics']['majvoting'][$parentKey]["noExtractorsTypeResourcePair"][$parentLabelTypeKey]["count"] / $parent["content"]["statistics"]["majvoting"][$parentKey]["noExtractorsPerLabel"]["count"]; } $foundTypeResourcePair = true; } else { $parent['content']['statistics']['majvoting'][$parentKey]["noExtractorsTypeResourcePair"][$parentLabelTypeKey]["relevanceScore"]["value"] = $parent['content']['statistics']['majvoting'][$parentKey]["noExtractorsTypeResourcePair"][$parentLabelTypeKey]["count"] / $parent["content"]["statistics"]["majvoting"][$parentKey]["noExtractorsPerLabel"]["count"]; } if ($foundTypeResourcePair == true) { break; } } if ($foundTypeResourcePair == false) { $parent['content']['statistics']['majvoting'][$parentKey]["noExtractorsTypeResourcePair"][$tempTypeValue . "-" . $tempResourceValue] = array(); $parent['content']['statistics']['majvoting'][$parentKey]["noExtractorsTypeResourcePair"][$tempTypeValue . "-" . $tempResourceValue]["count"] = 1; $parent['content']['statistics']['majvoting'][$parentKey]["noExtractorsTypeResourcePair"][$tempTypeValue . "-" . $tempResourceValue]["extractors"] = array(); array_push($parent['content']['statistics']['majvoting'][$parentKey]["noExtractorsTypeResourcePair"][$tempTypeValue . "-" . $tempResourceValue]["extractors"], $childValue["provenance"]); $parent['content']['statistics']['majvoting'][$parentKey]["noExtractorsTypeResourcePair"][$tempTypeValue . "-" . $tempResourceValue]["relevanceScore"] = array(); $parent['content']['statistics']['majvoting'][$parentKey]["noExtractorsTypeResourcePair"][$tempTypeValue . "-" . $tempResourceValue]["relevanceScore"]["value"] = 1 / $parent["content"]["statistics"]["majvoting"][$parentKey]["noExtractorsPerLabel"]["count"]; } $foundLabelTypeResourcePair = false; foreach ($parent['content']['statistics']['majvoting'][$parentKey]["noExtractorsLabelTypeResourcePair"] as $parentLabelTypeResourceKey => $parentLabelTypeResourceValue) { if (strtolower($parentLabelTypeResourceKey) == strtolower($childValue["label"] . "-" . $tempTypeValue . "-" . $tempResourceValue)) { if (!in_array($childValue["provenance"], $parent['content']['statistics']['majvoting'][$parentKey]["noExtractorsLabelTypeResourcePair"][$parentLabelTypeResourceKey]["extractors"])) { $parent['content']['statistics']['majvoting'][$parentKey]["noExtractorsLabelTypeResourcePair"][$parentLabelTypeResourceKey]["count"] += 1; array_push($parent['content']['statistics']['majvoting'][$parentKey]["noExtractorsLabelTypeResourcePair"][$parentLabelTypeResourceKey]["extractors"], $childValue["provenance"]); $parent['content']['statistics']['majvoting'][$parentKey]["noExtractorsLabelTypeResourcePair"][$parentLabelTypeResourceKey]["relevanceScore"]["value"] = $parent['content']['statistics']['majvoting'][$parentKey]["noExtractorsLabelTypeResourcePair"][$parentLabelTypeResourceKey]["count"] / $parent["content"]["statistics"]["majvoting"][$parentKey]["noExtractorsPerLabel"]["count"]; } $foundLabelTypeResourcePair = true; } else { $parent['content']['statistics']['majvoting'][$parentKey]["noExtractorsLabelTypeResourcePair"][$parentLabelTypeResourceKey]["relevanceScore"]["value"] = $parent['content']['statistics']['majvoting'][$parentKey]["noExtractorsLabelTypeResourcePair"][$parentLabelTypeResourceKey]["count"] / $parent["content"]["statistics"]["majvoting"][$parentKey]["noExtractorsPerLabel"]["count"]; } if ($foundLabelTypeResourcePair == true) { break; } } if ($foundLabelTypeResourcePair == false) { $parent['content']['statistics']['majvoting'][$parentKey]["noExtractorsLabelTypeResourcePair"][$childValue["label"] . "-" . $tempTypeValue . "-" . $tempResourceValue] = array(); $parent['content']['statistics']['majvoting'][$parentKey]["noExtractorsLabelTypeResourcePair"][$childValue["label"] . "-" . $tempTypeValue . "-" . $tempResourceValue]["count"] = 1; $parent['content']['statistics']['majvoting'][$parentKey]["noExtractorsLabelTypeResourcePair"][$childValue["label"] . "-" . $tempTypeValue . "-" . $tempResourceValue]["extractors"] = array(); array_push($parent['content']['statistics']['majvoting'][$parentKey]["noExtractorsLabelTypeResourcePair"][$childValue["label"] . "-" . $tempTypeValue . "-" . $tempResourceValue]["extractors"], $childValue["provenance"]); $parent['content']['statistics']['majvoting'][$parentKey]["noExtractorsLabelTypeResourcePair"][$childValue["label"] . "-" . $tempTypeValue . "-" . $tempResourceValue]["relevanceScore"] = array(); $parent['content']['statistics']['majvoting'][$parentKey]["noExtractorsLabelTypeResourcePair"][$childValue["label"] . "-" . $tempTypeValue . "-" . $tempResourceValue]["relevanceScore"]["value"] = 1 / $parent["content"]["statistics"]["majvoting"][$parentKey]["noExtractorsPerLabel"]["count"]; } } } if ($found == true) { break; } // break; } if ($found == false) { $extractedEntity = array(); $extractedEntity["label"] = $childValue["label"]; $extractedEntity["startOffset"] = intval($childValue["startOffset"]); $extractedEntity["endOffset"] = intval($childValue["endOffset"]); $extractedEntity["extractors"] = array(); array_push($extractedEntity["extractors"], $childValue["provenance"]); $newEntity = array(); $newEntity["label"] = $childValue["label"]; $newEntity["startOffset"] = $childValue["startOffset"]; $newEntity["endOffset"] = $childValue["endOffset"]; // $newEntity["relevanceScore"] = array(); // $newEntity["relevanceScore"]["count"] = 1; // $newEntity["relevanceScore"]["value"] = 1/6; $newEntity["noExtractorsPerLabel"] = array(); $newEntity["noExtractorsPerLabel"]["extractors"] = array(); array_push($newEntity["noExtractorsPerLabel"]["extractors"], $childValue["provenance"]); $newEntity["noExtractorsPerLabel"]["count"] = 1; $newEntity["noExtractorsPerLabel"]["relevanceScore"] = array(); $newEntity["noExtractorsPerLabel"]["relevanceScore"]["value"] = 1 / 6; $newEntity["confidence"] = array(); $newEntity["confidence"]["extractors"] = array(); $newEntity["confidence"]["extractors"][$childValue["provenance"]] = $childValue["confidence"]; $newEntity["confidence"]["value"] = $childValue["confidence"]; $newEntity["clarity"] = array(); $newEntity["clarity"]["mean"] = $childValue["confidence"]; $newEntity["clarity"]["stddev"] = stats_stddev($newEntity["confidence"]["extractors"]); $newEntity["clarity"]["mse"] = pow($newEntity["clarity"]["stddev"], 2) / 1; $newEntity["noExtractorsPerType"] = array(); $newEntity["noExtractorsPerResource"] = array(); $newEntity["noExtractorsLabelTypePair"] = array(); $newEntity["noExtractorsLabelResourcePair"] = array(); $newEntity["noExtractorsTypeResourcePair"] = array(); $newEntity["noExtractorsLabelTypeResourcePair"] = array(); foreach ($childValue["types"] as $keyType => $valueType) { if ($valueType["typeURI"] != null) { $newEntity["noExtractorsPerType"][$valueType["typeURI"]] = array(); $newEntity["noExtractorsPerType"][$valueType["typeURI"]]["count"] = 1; $newEntity["noExtractorsPerType"][$valueType["typeURI"]]["extractors"] = array(); array_push($newEntity["noExtractorsPerType"][$valueType["typeURI"]]["extractors"], $childValue["provenance"]); $newEntity["noExtractorsPerType"][$valueType["typeURI"]]["relevanceScore"] = array(); $newEntity["noExtractorsPerType"][$valueType["typeURI"]]["relevanceScore"]["value"] = 1; } if ($valueType["entityURI"] != null) { $newEntity["noExtractorsPerResource"][$valueType["entityURI"]] = array(); $newEntity["noExtractorsPerResource"][$valueType["entityURI"]]["count"] = 1; $newEntity["noExtractorsPerResource"][$valueType["entityURI"]]["extractors"] = array(); array_push($newEntity["noExtractorsPerResource"][$valueType["entityURI"]]["extractors"], $childValue["provenance"]); $newEntity["noExtractorsPerResource"][$valueType["entityURI"]]["relevanceScore"] = array(); $newEntity["noExtractorsPerResource"][$valueType["entityURI"]]["relevanceScore"]["value"] = 1; } $tempTypeValue = ""; if ($valueType["typeURI"] != null) { $tempTypeValue = $valueType["typeURI"]; } if (!array_key_exists($childValue["label"] . "-" . $tempTypeValue, $newEntity["noExtractorsLabelTypePair"]) || !array_key_exists(strtolower($childValue["label"] . "-" . $tempTypeValue), $newEntity["noExtractorsLabelTypePair"])) { $newEntity["noExtractorsLabelTypePair"][$childValue["label"] . "-" . $tempTypeValue] = array(); $newEntity["noExtractorsLabelTypePair"][$childValue["label"] . "-" . $tempTypeValue]["count"] = 1; $newEntity["noExtractorsLabelTypePair"][$childValue["label"] . "-" . $tempTypeValue]["extractors"] = array(); array_push($newEntity["noExtractorsLabelTypePair"][$childValue["label"] . "-" . $tempTypeValue]["extractors"], $childValue["provenance"]); $newEntity["noExtractorsLabelTypePair"][$childValue["label"] . "-" . $tempTypeValue]["relevanceScore"] = array(); $newEntity["noExtractorsLabelTypePair"][$childValue["label"] . "-" . $tempTypeValue]["relevanceScore"]["value"] = 1; } $tempResourceValue = ""; if ($valueType["entityURI"] != null) { $tempResourceValue = $valueType["entityURI"]; } if (!array_key_exists($childValue["label"] . "-" . $tempResourceValue, $newEntity["noExtractorsLabelResourcePair"]) || !array_key_exists(strtolower($childValue["label"] . "-" . $tempResourceValue), $newEntity["noExtractorsLabelResourcePair"])) { $newEntity["noExtractorsLabelResourcePair"][$childValue["label"] . "-" . $tempResourceValue] = array(); $newEntity["noExtractorsLabelResourcePair"][$childValue["label"] . "-" . $tempResourceValue]["count"] = 1; $newEntity["noExtractorsLabelResourcePair"][$childValue["label"] . "-" . $tempResourceValue]["extractors"] = array(); array_push($newEntity["noExtractorsLabelResourcePair"][$childValue["label"] . "-" . $tempResourceValue]["extractors"], $childValue["provenance"]); $newEntity["noExtractorsLabelResourcePair"][$childValue["label"] . "-" . $tempResourceValue]["relevanceScore"] = array(); $newEntity["noExtractorsLabelResourcePair"][$childValue["label"] . "-" . $tempResourceValue]["relevanceScore"]["value"] = 1; } if (!array_key_exists($tempTypeValue . "-" . $tempResourceValue, $newEntity["noExtractorsTypeResourcePair"]) || !array_key_exists(strtolower($tempTypeValue . "-" . $tempResourceValue), $newEntity["noExtractorsTypeResourcePair"])) { $newEntity["noExtractorsTypeResourcePair"][$tempTypeValue . "-" . $tempResourceValue] = array(); $newEntity["noExtractorsTypeResourcePair"][$tempTypeValue . "-" . $tempResourceValue]["count"] = 1; $newEntity["noExtractorsTypeResourcePair"][$tempTypeValue . "-" . $tempResourceValue]["extractors"] = array(); array_push($newEntity["noExtractorsTypeResourcePair"][$tempTypeValue . "-" . $tempResourceValue]["extractors"], $childValue["provenance"]); $newEntity["noExtractorsTypeResourcePair"][$tempTypeValue . "-" . $tempResourceValue]["relevanceScore"] = array(); $newEntity["noExtractorsTypeResourcePair"][$tempTypeValue . "-" . $tempResourceValue]["relevanceScore"]["value"] = 1; } if (!array_key_exists($childValue["label"] . "-" . $tempTypeValue . "-" . $tempResourceValue, $newEntity["noExtractorsLabelTypeResourcePair"]) || !array_key_exists(strtolower($childValue["label"] . "-" . $tempTypeValue . "-" . $tempResourceValue), $newEntity["noExtractorsLabelTypeResourcePair"])) { $newEntity["noExtractorsLabelTypeResourcePair"][$childValue["label"] . "-" . $tempTypeValue . "-" . $tempResourceValue] = array(); $newEntity["noExtractorsLabelTypeResourcePair"][$childValue["label"] . "-" . $tempTypeValue . "-" . $tempResourceValue]["count"] = 1; $newEntity["noExtractorsLabelTypeResourcePair"][$childValue["label"] . "-" . $tempTypeValue . "-" . $tempResourceValue]["extractors"] = array(); array_push($newEntity["noExtractorsLabelTypeResourcePair"][$childValue["label"] . "-" . $tempTypeValue . "-" . $tempResourceValue]["extractors"], $childValue["provenance"]); $newEntity["noExtractorsLabelTypeResourcePair"][$childValue["label"] . "-" . $tempTypeValue . "-" . $tempResourceValue]["relevanceScore"] = array(); $newEntity["noExtractorsLabelTypeResourcePair"][$childValue["label"] . "-" . $tempTypeValue . "-" . $tempResourceValue]["relevanceScore"]["value"] = 1; } } array_push($parent['content']['statistics']['majvoting'], $newEntity); array_push($parent['content']['features']['entities'], $extractedEntity); // dd($parent['content']['statistics']['entities']); } // dd($parent['content']['statistics']['entities']); } } } $parent['content']['statistics']["crowdtruth"]["cosineSimilarityAllLabels"] = array(); $extractors = array("thd", "nerd", "textrazor", "lupedia", "semitags", "dbpediaspotlight"); if (count($parent['content']['statistics']['majvoting']) != 0) { $pivotTable = array(); $cosineSimilarity = array(); for ($j = 0; $j < sizeof($extractors); $j++) { $pivotTable[$extractors[$j]] = array(); $cosineSimilarity[$extractors[$j]] = array(); for ($i = 0; $i < count($parent["content"]["statistics"]["majvoting"]); $i++) { $pivotTable[$extractors[$j]][$parent["content"]["statistics"]["majvoting"][$i]["label"] . '-' . $parent["content"]["statistics"]["majvoting"][$i]["startOffset"] . '-' . $parent["content"]["statistics"]["majvoting"][$i]["endOffset"]] = 0; } } foreach ($parent["content"]["statistics"]["majvoting"] as $keyEntity => $keyValue) { foreach ($keyValue["noExtractorsPerLabel"]["extractors"] as $keyExtractor => $valueExtractor) { $pivotTable[$valueExtractor][$keyValue["label"] . '-' . $keyValue["startOffset"] . '-' . $keyValue["endOffset"]] = 1; } } foreach ($extractors as $extractorName) { $cosineSimilarity[$extractorName] = array(); $sumArray = sumUpArrays($pivotTable); $diffArray = extractArrays($sumArray, $pivotTable[$extractorName]); // dd($pivotTable[$key][$extractorName]); $cosineSimilarity[$extractorName] = cosinus($pivotTable[$extractorName], $diffArray); } $parent['content']['statistics']["crowdtruth"]["cosineSimilarityAllLabels"] = $cosineSimilarity; $parent['content']['statistics']['crowdtruth']['entities'] = array(); foreach ($parent['content']['statistics']['majvoting'] as $key => $value) { $labelDetails = $value["label"] . '-' . $value["startOffset"] . '-' . $value["endOffset"]; $parent['content']['statistics']['crowdtruth']['entities'][$key] = array(); $parent['content']['statistics']['crowdtruth']['entities'][$key]["label"] = $value["label"]; $parent['content']['statistics']['crowdtruth']['entities'][$key]["startOffset"] = $value["startOffset"]; $parent['content']['statistics']['crowdtruth']['entities'][$key]["endOffset"] = $value["endOffset"]; $parent['content']['statistics']['crowdtruth']['entities'][$key]["cosineSimilarityPerType"] = array(); $pivotTable = array(); $cosineSimilarity = array(); $extractors = $parent['content']['statistics']['majvoting'][$key]["noExtractorsPerLabel"]["extractors"]; for ($j = 0; $j < sizeof($extractors); $j++) { $pivotTable[$extractors[$j]] = array(); $cosineSimilarity[$extractors[$j]] = array(); } foreach ($parent["content"]["statistics"]["majvoting"][$key]["noExtractorsPerType"] as $typeKey => $typeValue) { foreach ($extractors as $extractorName) { $pivotTable[$extractorName][$typeKey] = 0; } } foreach ($parent["content"]["statistics"]["majvoting"][$key]["noExtractorsPerType"] as $typeKey => $typeValue) { foreach ($typeValue["extractors"] as $keyExtractor => $valueExtractor) { $pivotTable[$valueExtractor][$typeKey] = 1; } } foreach ($extractors as $extractorName) { $cosineSimilarity[$extractorName] = array(); $sumArray = sumUpArrays($pivotTable); $diffArray = extractArrays($sumArray, $pivotTable[$extractorName]); $cosineSimilarity[$extractorName] = cosinus($pivotTable[$extractorName], $diffArray); } $parent['content']['statistics']["crowdtruth"]["entities"][$key]["cosineSimilarityPerType"] = $cosineSimilarity; $parent['content']['statistics']['crowdtruth']['entities'][$key]["cosineSimilarityPerResource"] = array(); $pivotTable = array(); $cosineSimilarity = array(); for ($j = 0; $j < sizeof($extractors); $j++) { $pivotTable[$extractors[$j]] = array(); $cosineSimilarity[$extractors[$j]] = array(); } foreach ($parent["content"]["statistics"]["majvoting"][$key]["noExtractorsPerResource"] as $resourceKey => $resourceValue) { foreach ($extractors as $extractorName) { $pivotTable[$extractorName][$resourceKey] = 0; } } foreach ($parent["content"]["statistics"]["majvoting"][$key]["noExtractorsPerResource"] as $resourceKey => $resourceValue) { foreach ($resourceValue["extractors"] as $keyExtractor => $valueExtractor) { $pivotTable[$valueExtractor][$resourceKey] = 1; } } foreach ($extractors as $extractorName) { $cosineSimilarity[$extractorName] = array(); $sumArray = sumUpArrays($pivotTable); $diffArray = extractArrays($sumArray, $pivotTable[$extractorName]); $cosineSimilarity[$extractorName] = cosinus($pivotTable[$extractorName], $diffArray); } $parent['content']['statistics']["crowdtruth"]["entities"][$key]["cosineSimilarityPerResource"] = $cosineSimilarity; $parent['content']['statistics']['crowdtruth']['entities'][$key]["cosineSimilarityPerLabelTypePair"] = array(); $pivotTable = array(); $cosineSimilarity = array(); for ($j = 0; $j < sizeof($extractors); $j++) { $pivotTable[$extractors[$j]] = array(); $cosineSimilarity[$extractors[$j]] = array(); } foreach ($parent["content"]["statistics"]["majvoting"][$key]["noExtractorsLabelTypePair"] as $resourceKey => $resourceValue) { foreach ($extractors as $extractorName) { $pivotTable[$extractorName][$resourceKey] = 0; } } foreach ($parent["content"]["statistics"]["majvoting"][$key]["noExtractorsLabelTypePair"] as $resourceKey => $resourceValue) { foreach ($resourceValue["extractors"] as $keyExtractor => $valueExtractor) { $pivotTable[$valueExtractor][$resourceKey] = 1; } } foreach ($extractors as $extractorName) { $cosineSimilarity[$extractorName] = array(); $sumArray = sumUpArrays($pivotTable); $diffArray = extractArrays($sumArray, $pivotTable[$extractorName]); $cosineSimilarity[$extractorName] = cosinus($pivotTable[$extractorName], $diffArray); } $parent['content']['statistics']["crowdtruth"]["entities"][$key]["cosineSimilarityPerLabelTypePair"] = $cosineSimilarity; $parent['content']['statistics']['crowdtruth']['entities'][$key]["cosineSimilarityPerLabelResourcePair"] = array(); $pivotTable = array(); $cosineSimilarity = array(); for ($j = 0; $j < sizeof($extractors); $j++) { $pivotTable[$extractors[$j]] = array(); $cosineSimilarity[$extractors[$j]] = array(); } foreach ($parent["content"]["statistics"]["majvoting"][$key]["noExtractorsLabelResourcePair"] as $resourceKey => $resourceValue) { foreach ($extractors as $extractorName) { $pivotTable[$extractorName][$resourceKey] = 0; } } foreach ($parent["content"]["statistics"]["majvoting"][$key]["noExtractorsLabelResourcePair"] as $resourceKey => $resourceValue) { foreach ($resourceValue["extractors"] as $keyExtractor => $valueExtractor) { $pivotTable[$valueExtractor][$resourceKey] = 1; } } foreach ($extractors as $extractorName) { $cosineSimilarity[$extractorName] = array(); $sumArray = sumUpArrays($pivotTable); $diffArray = extractArrays($sumArray, $pivotTable[$extractorName]); $cosineSimilarity[$extractorName] = cosinus($pivotTable[$extractorName], $diffArray); } $parent['content']['statistics']["crowdtruth"]["entities"][$key]["cosineSimilarityPerLabelResourcePair"] = $cosineSimilarity; $parent['content']['statistics']['crowdtruth']['entities'][$key]["cosineSimilarityPerTypeResourcePair"] = array(); $pivotTable = array(); $cosineSimilarity = array(); for ($j = 0; $j < sizeof($extractors); $j++) { $pivotTable[$extractors[$j]] = array(); $cosineSimilarity[$extractors[$j]] = array(); } foreach ($parent["content"]["statistics"]["majvoting"][$key]["noExtractorsTypeResourcePair"] as $resourceKey => $resourceValue) { foreach ($extractors as $extractorName) { $pivotTable[$extractorName][$resourceKey] = 0; } } foreach ($parent["content"]["statistics"]["majvoting"][$key]["noExtractorsTypeResourcePair"] as $resourceKey => $resourceValue) { foreach ($resourceValue["extractors"] as $keyExtractor => $valueExtractor) { $pivotTable[$valueExtractor][$resourceKey] = 1; } } foreach ($extractors as $extractorName) { $cosineSimilarity[$extractorName] = array(); $sumArray = sumUpArrays($pivotTable); $diffArray = extractArrays($sumArray, $pivotTable[$extractorName]); $cosineSimilarity[$extractorName] = cosinus($pivotTable[$extractorName], $diffArray); } $parent['content']['statistics']["crowdtruth"]["entities"][$key]["cosineSimilarityPerTypeResourcePair"] = $cosineSimilarity; $parent['content']['statistics']['crowdtruth']['entities'][$key]["cosineSimilarityPerLabelTypeResourcePair"] = array(); $pivotTable = array(); $cosineSimilarity = array(); for ($j = 0; $j < sizeof($extractors); $j++) { $pivotTable[$extractors[$j]] = array(); $cosineSimilarity[$extractors[$j]] = array(); } foreach ($parent["content"]["statistics"]["majvoting"][$key]["noExtractorsLabelTypeResourcePair"] as $resourceKey => $resourceValue) { foreach ($extractors as $extractorName) { $pivotTable[$extractorName][$resourceKey] = 0; } } foreach ($parent["content"]["statistics"]["majvoting"][$key]["noExtractorsLabelTypeResourcePair"] as $resourceKey => $resourceValue) { foreach ($resourceValue["extractors"] as $keyExtractor => $valueExtractor) { $pivotTable[$valueExtractor][$resourceKey] = 1; } } foreach ($extractors as $extractorName) { $cosineSimilarity[$extractorName] = array(); $sumArray = sumUpArrays($pivotTable); $diffArray = extractArrays($sumArray, $pivotTable[$extractorName]); $cosineSimilarity[$extractorName] = cosinus($pivotTable[$extractorName], $diffArray); } $parent['content']['statistics']["crowdtruth"]["entities"][$key]["cosineSimilarityPerLabelTypeResourcePair"] = $cosineSimilarity; } } } try { \MongoDB\Temp::where('documentType', '=', 'metadatadescription')->forceDelete(); $db->batchInsert($result, array('continueOnError' => true)); } catch (Exception $e) { // ContinueOnError will still throw an exception on duplication, even though it continues, so we just move on. } // dd("done"); } \Session::forget('rawArray'); // print_r($parent["content"]); }