function clusterize_aggregative($type, $cl_useold) { global $CONFIG; set_time_limit(0); //this avoids timeouts include $CONFIG->path . "mod/profile_manager/views/default/profile_manager/members/config.php"; include_once $CONFIG->path . "mod/profile_manager/views/default/profile_manager/members/classes.php"; switch ($type) { case 'metadata': $docterm = unserialize(file_get_contents($IOdir . 'metadata_dt')); $docdoc = unserialize(file_get_contents($IOdir . 'metadata_dd')); break; case 'tags': $docterm = unserialize(file_get_contents($IOdir . 'tags_dt')); $docdoc = unserialize(file_get_contents($IOdir . 'tags_dd')); break; case 'uses': $docterm = unserialize(file_get_contents($IOdir . 'uses_dt')); $docdoc = unserialize(file_get_contents($IOdir . 'uses_dd')); break; case 'replinks': $docdoc = unserialize(file_get_contents($IOdir . 'replinks_dd')); break; } $docdoc = sort_matrix($docdoc); $clusters = array(); foreach ($docdoc as $g => $row) { $c = new Cluster($type, get_loggedin_user()->username, "aggregative", 1); $c->array_docs[] = $g; $clusters[] = $c; } $def_cluster = array(); $it = 0; while (true) { $it++; $current = $clusters; $next = array(); $excluded = array(); $changes = false; for ($i = 0; $i < count($clusters); $i++) { if (in_array($current[$i]->id, $excluded)) { continue; } $dist_v = array(); for ($j = 0; $j < count($clusters); $j++) { if (in_array($current[$j]->id, $excluded)) { continue; } if ($i != $j) { $dist_v[$current[$j]->id] = correlation($current[$i], $current[$j], $docdoc); } } arsort($dist_v); foreach ($dist_v as $id => $correlation) { $fuse = find_cluster_by_id($id, $current); if (!$fuse) { continue; } $qi1 = quality_index($current[$i], $docdoc); $qi2 = quality_index($current[$fuse], $docdoc); if ($correlation == 0 || $correlation < $qi1 || $correlation < $qi2) { break; } $c = new Cluster($type, get_loggedin_user()->username, "aggregative", 1); $c->array_docs = array_merge($current[$i]->array_docs, $current[$fuse]->array_docs); $next[] = $c; $excluded[] = $current[$fuse]->id; $excluded[] = $current[$i]->id; $changes = true; break; } } foreach ($current as $remaining) { if (!in_array($remaining->id, $excluded)) { $next[] = $remaining; } } // usort($next,'sort_clusters'); $clusters = $next; if ($it > 50 || !$changes) { $def_cluster = $next; break; } } //delete clusters that contain only one element foreach ($def_cluster as $key => $cluster) { if (count($cluster->array_docs) == 1) { unset($def_cluster[$key]); } } //sort clusters documents by guid foreach ($def_cluster as $key => $cluster) { sort($def_cluster[$key]->array_docs); } return $def_cluster; }
function clusterize_aggregative($type) { global $IOdir; global $usern; switch ($type) { case 'metadata': $docterm = unserialize(file_get_contents($IOdir . 'metadata_dt')); $docdoc = unserialize(file_get_contents($IOdir . 'metadata_dd')); break; case 'tags': $docterm = unserialize(file_get_contents($IOdir . 'tags_dt')); $docdoc = unserialize(file_get_contents($IOdir . 'tags_dd')); break; case 'uses': $docterm = unserialize(file_get_contents($IOdir . 'uses_dt')); $docdoc = unserialize(file_get_contents($IOdir . 'uses_dd')); break; case 'replinks': $docdoc = unserialize(file_get_contents($IOdir . 'replinks_dd')); break; } $docdoc = sort_matrix($docdoc); $clusters = array(); foreach ($docdoc as $g => $row) { $c = new Cluster($type, $usern, "aggregative", 1); $c->array_docs[] = $g; $clusters[] = $c; } $def_cluster = array(); $it = 0; while (true) { $it++; $current = $clusters; $next = array(); $excluded = array(); $changes = false; for ($i = 0; $i < count($clusters); $i++) { if (in_array($current[$i]->id, $excluded)) { continue; } $dist_v = array(); for ($j = 0; $j < count($clusters); $j++) { if (in_array($current[$j]->id, $excluded)) { continue; } if ($i != $j) { $dist_v[$current[$j]->id] = correlation($current[$i], $current[$j], $docdoc); } } arsort($dist_v); foreach ($dist_v as $id => $correlation) { $fuse = find_cluster_by_id($id, $current); if (!$fuse) { continue; } $qi1 = quality_index($current[$i], $docdoc); $qi2 = quality_index($current[$fuse], $docdoc); if ($correlation == 0 || $correlation < $qi1 || $correlation < $qi2) { break; } $c = new Cluster($type, $usern, "aggregative", 1); $c->array_docs = array_merge($current[$i]->array_docs, $current[$fuse]->array_docs); $next[] = $c; $excluded[] = $current[$fuse]->id; $excluded[] = $current[$i]->id; $changes = true; break; } } foreach ($current as $remaining) { if (!in_array($remaining->id, $excluded)) { $next[] = $remaining; } } // usort($next,'sort_clusters'); $clusters = $next; if ($it > 50 || !$changes) { $def_cluster = $next; break; } } //delete clusters that contain only one element foreach ($def_cluster as $key => $cluster) { if (count($cluster->array_docs) == 1) { unset($def_cluster[$key]); } } echo "\nCalculating positive features for {$type}...\n"; if ($type != 'replinks') { $array_cluster = get_positive_features($def_cluster, $docterm, $type); } //sort clusters documents by guid foreach ($array_cluster as $key => $cluster) { sort($array_cluster[$key]->array_docs); } return $array_cluster; }