function toDocDocMetadataUses($docterm, $old_docdoc = array(), $old_docterm = array()) { global $CONFIG; set_time_limit(0); //this avoids timeouts include $CONFIG->path . "mod/profile_manager/views/default/profile_manager/members/config.php"; $guids = unserialize(file_get_contents($IOdir . "guids")); $return = array(); $length = count($guids); $z = 0; foreach ($guids as $guid) { $return[$guid] = array(); $w = 0; foreach ($guids as $guid_cmp) { if ($z < $w) { //we work only on some elements, since the doc-doc matrix will be triangular and so we have already the other elements if (!isset($docterm[$guid]) || !isset($docterm[$guid_cmp])) { $return[$guid][$guid_cmp] = 0; } elseif (!empty($old_docdoc) && !empty($old_docterm) && isset($old_docterm[$guid]) && isset($old_docterm[$guid_cmp]) && $docterm[$guid] == $old_docterm[$guid] && $docterm[$guid_cmp] == $old_docterm[$guid_cmp]) { //if we have chosen not to do a new indexing and both the elements haven't changed since the old version, we can take the value from the old doc-doc matrix, saving so time $return[$guid][$guid_cmp] = $old_docdoc[$guid][$guid_cmp]; } else { $return[$guid][$guid_cmp] = 0; //initialization $common_keywords = array_intersect(array_keys($docterm[$guid]), array_keys($docterm[$guid_cmp])); //it finds the keywords in common for the two documents foreach ($common_keywords as $keyword) { $context_add = 0; $context2_add = 0; if ($context_limit != 0) { $common_contexts = array_intersect(array_keys($docterm[$guid][$keyword]), array_keys($docterm[$guid_cmp][$keyword])); //it finds the contexts in common for the current keyword for the two documents foreach ($common_contexts as $context) { if ($context == $keyword) { continue; } //the first context is the keyword, so we don't consider it $context_add += get_scoreLU_c($context, $docterm[$guid][$keyword]); //we calculate the weight of the current context for the current keyword in the first document $context2_add += get_scoreLU_c($context, $docterm[$guid_cmp][$keyword]); //we calculate the weight of the current context for the current keyword in the second document } } $return[$guid][$guid_cmp] += get_scoreLU($keyword, $docterm[$guid]) * (1 + $context_add) + get_scoreLU($keyword, $docterm[$guid_cmp]) * (1 + $context2_add); //with this formula we consider both the keywords and the contexts weights } //now we normalize the value in the range 0-5 //element : max = x : 5 //x = (element * 5) / max if ($context_limit != 0) { $max = 4; } else { $max = 2; } $return[$guid][$guid_cmp] = $return[$guid][$guid_cmp] * 5 / $max; } } elseif ($z == $w) { $return[$guid][$guid_cmp] = 0; } $w++; } $z++; } return $return; }
function toDocDocMetadataUses($docterm, $M, $N, $old_docdoc = array(), $old_docterm = array()) { global $use_contexts, $guids; $return = array(); $length = count($guids); $z = 0; foreach ($guids as $guid) { if ($z >= floor(($M - 1) / $N * $length) && $z < floor($M / $N * $length)) { //we work only on 1/N of the matrix for each subprocess $return[$guid] = array(); $w = 0; foreach ($guids as $guid_cmp) { if ($z < $w) { //we work only on some elements, since the doc-doc matrix will be triangular and so we have already the other elements if (!isset($docterm[$guid]) || !isset($docterm[$guid_cmp])) { $return[$guid][$guid_cmp] = 0; } elseif (!empty($old_docdoc) && !empty($old_docterm) && isset($old_docterm[$guid]) && isset($old_docterm[$guid_cmp]) && $docterm[$guid] == $old_docterm[$guid] && $docterm[$guid_cmp] == $old_docterm[$guid_cmp]) { //if we have chosen not to do a new indexing and both the elements haven't changed since the old version, we can take the value from the old doc-doc matrix, saving so time $return[$guid][$guid_cmp] = $old_docdoc[$guid][$guid_cmp]; } else { $return[$guid][$guid_cmp] = 0; //initialization $common_keywords = array_intersect(array_keys($docterm[$guid]), array_keys($docterm[$guid_cmp])); //it finds the keywords in common for the two documents foreach ($common_keywords as $keyword) { $context_add = 0; $context2_add = 0; if ($use_contexts == 1) { $common_contexts = array_intersect(array_keys($docterm[$guid][$keyword]), array_keys($docterm[$guid_cmp][$keyword])); //it finds the contexts in common for the current keyword for the two documents foreach ($common_contexts as $context) { if ($context == $keyword) { continue; } //the first context is the keyword, so we don't consider it $context_add += get_scoreLU_c($context, $docterm[$guid][$keyword]); //we calculate the weight of the current context for the current keyword in the first document $context2_add += get_scoreLU_c($context, $docterm[$guid_cmp][$keyword]); //we calculate the weight of the current context for the current keyword in the second document } } $return[$guid][$guid_cmp] += get_scoreLU($keyword, $docterm[$guid]) * (1 + $context_add) + get_scoreLU($keyword, $docterm[$guid_cmp]) * (1 + $context2_add); //with this formula we consider both the keywords and the contexts weights } //now we normalize the value in the range 0-5 //element : max = x : 5 //x = (element * 5) / max if ($use_contexts == 1) { $max = 4; } else { $max = 2; } $return[$guid][$guid_cmp] = $return[$guid][$guid_cmp] * 5 / $max; } } elseif ($z == $w) { $return[$guid][$guid_cmp] = 0; } $w++; } } $z++; } return $return; }