PHP get_positive_featuresの例

プログラミング言語: PHP

メソッド/関数: get_positive_features

hotexamples.comのコード掲載数: 4

PHP get_positive_features - 4件のコード例が見つかりました。すべてオープンソースプロジェクトから抽出されたPHPのget_positive_featuresの実例で、最も評価が高いものを厳選しています。コード例の評価を行っていただくことで、より質の高いコード例が表示されるようになります。

コード例 #1

ファイルを表示

ファイル: procedures_retrieve.php プロジェクト: portokallidis/Metamorphosis-Meducator

function classify($req)
{
    global $CONFIG;
    set_time_limit(0);
    //this avoids timeouts
    require_once $CONFIG->path . "mod/profile_manager/views/default/profile_manager/members/config.php";
    require_once $CONFIG->path . "mod/profile_manager/views/default/profile_manager/members/classes.php";
    $outputfile = $CONFIG->path . "mod/profile_manager/views/default/profile_manager/members/output.log";
    file_put_contents($outputfile, "Starting classification...\n", FILE_APPEND);
    $classification_methods = array("metadata" => $classification_method_metadata, "uses" => $classification_method_uses, "tags" => $classification_method_tags, "replinks" => $classification_method_replinks);
    foreach ($classification_methods as $type => $value) {
        file_put_contents($outputfile, "Creating clusters for {$type}...\n", FILE_APPEND);
        if ($value == 1) {
            //if classification_method is Kohonen
            require_once $CONFIG->path . "mod/profile_manager/views/default/profile_manager/members/kohonen.php";
            $array_clusters[$type] = run_kohonen($type, $req["cl_useold"]);
        } elseif ($value == 2) {
            //if classification_method is aggregative
            require_once $CONFIG->path . "mod/profile_manager/views/default/profile_manager/members/aggregative.php";
            $array_clusters[$type] = clusterize_aggregative($type, $req["cl_useold"]);
        } else {
            //if classification_method is 3 (YACA)
            require_once $CONFIG->path . "mod/profile_manager/views/default/profile_manager/members/yaca.php";
            $array_clusters[$type] = clusterize_yaca($type, $req["cl_useold"]);
        }
        //add positive features to clusters
        if ($type != "replinks") {
            //there are not positive features for replinks
            file_put_contents($outputfile, "Calculating positive features for {$type}...\n", FILE_APPEND);
            $dt_matrix = unserialize(file_get_contents($IOdir . $type . "_dt"));
            $array_clusters[$type] = get_positive_features($array_clusters[$type], $dt_matrix, $type);
        }
        file_put_contents($IOdir . "clusters_" . $type, serialize($array_clusters[$type]));
        if (PHP_OS == "Linux" && posix_getuid() == fileowner($IOdir . "clusters_" . $type)) {
            chmod($IOdir . "clusters_" . $type, 0666);
        }
        //set rw permissions for everybody for this file
        file_put_contents($outputfile, "Clusters for {$type} created\n\n", FILE_APPEND);
    }
    return "OK";
}

コード例 #2

ファイルを表示

ファイル: aggregative.php プロジェクト: portokallidis/Metamorphosis-Meducator

function clusterize_aggregative($type)
{
    global $IOdir;
    global $usern;
    switch ($type) {
        case 'metadata':
            $docterm = unserialize(file_get_contents($IOdir . 'metadata_dt'));
            $docdoc = unserialize(file_get_contents($IOdir . 'metadata_dd'));
            break;
        case 'tags':
            $docterm = unserialize(file_get_contents($IOdir . 'tags_dt'));
            $docdoc = unserialize(file_get_contents($IOdir . 'tags_dd'));
            break;
        case 'uses':
            $docterm = unserialize(file_get_contents($IOdir . 'uses_dt'));
            $docdoc = unserialize(file_get_contents($IOdir . 'uses_dd'));
            break;
        case 'replinks':
            $docdoc = unserialize(file_get_contents($IOdir . 'replinks_dd'));
            break;
    }
    $docdoc = sort_matrix($docdoc);
    $clusters = array();
    foreach ($docdoc as $g => $row) {
        $c = new Cluster($type, $usern, "aggregative", 1);
        $c->array_docs[] = $g;
        $clusters[] = $c;
    }
    $def_cluster = array();
    $it = 0;
    while (true) {
        $it++;
        $current = $clusters;
        $next = array();
        $excluded = array();
        $changes = false;
        for ($i = 0; $i < count($clusters); $i++) {
            if (in_array($current[$i]->id, $excluded)) {
                continue;
            }
            $dist_v = array();
            for ($j = 0; $j < count($clusters); $j++) {
                if (in_array($current[$j]->id, $excluded)) {
                    continue;
                }
                if ($i != $j) {
                    $dist_v[$current[$j]->id] = correlation($current[$i], $current[$j], $docdoc);
                }
            }
            arsort($dist_v);
            foreach ($dist_v as $id => $correlation) {
                $fuse = find_cluster_by_id($id, $current);
                if (!$fuse) {
                    continue;
                }
                $qi1 = quality_index($current[$i], $docdoc);
                $qi2 = quality_index($current[$fuse], $docdoc);
                if ($correlation == 0 || $correlation < $qi1 || $correlation < $qi2) {
                    break;
                }
                $c = new Cluster($type, $usern, "aggregative", 1);
                $c->array_docs = array_merge($current[$i]->array_docs, $current[$fuse]->array_docs);
                $next[] = $c;
                $excluded[] = $current[$fuse]->id;
                $excluded[] = $current[$i]->id;
                $changes = true;
                break;
            }
        }
        foreach ($current as $remaining) {
            if (!in_array($remaining->id, $excluded)) {
                $next[] = $remaining;
            }
        }
        //	usort($next,'sort_clusters');
        $clusters = $next;
        if ($it > 50 || !$changes) {
            $def_cluster = $next;
            break;
        }
    }
    //delete clusters that contain only one element
    foreach ($def_cluster as $key => $cluster) {
        if (count($cluster->array_docs) == 1) {
            unset($def_cluster[$key]);
        }
    }
    echo "\nCalculating positive features for {$type}...\n";
    if ($type != 'replinks') {
        $array_cluster = get_positive_features($def_cluster, $docterm, $type);
    }
    //sort clusters documents by guid
    foreach ($array_cluster as $key => $cluster) {
        sort($array_cluster[$key]->array_docs);
    }
    return $array_cluster;
}

コード例 #3

ファイルを表示

ファイル: kohonen.php プロジェクト: portokallidis/Metamorphosis-Meducator

function run_kohonen($type_cluster)
{
    global $output_file_kohonen, $IndexingClassificationPath, $IOdir;
    global $usern;
    $m_dt = '';
    $m_dd = '';
    $num_t = 0;
    switch ($type_cluster) {
        case 'tags':
            $m_dt = unserialize(file_get_contents($IOdir . 'tags_dt'));
            $m_dd = unserialize(file_get_contents($IOdir . 'tags_dd'));
            break;
        case 'uses':
            $m_dt = unserialize(file_get_contents($IOdir . 'uses_dt'));
            $m_dd = unserialize(file_get_contents($IOdir . 'uses_dd'));
            break;
        case 'metadata':
        default:
            $m_dt = unserialize(file_get_contents($IOdir . 'metadata_dt'));
            $m_dd = unserialize(file_get_contents($IOdir . 'metadata_dd'));
            break;
    }
    $kohonen_c = $IndexingClassificationPath . 'kohonen';
    $doc_term = '';
    foreach ($m_dt as $guid => $row) {
        $num_t = count($row);
        $j = 0;
        ksort($row);
        foreach ($row as $k => $v) {
            if ($type_cluster == 'tags') {
                if ($j < $num_t - 1) {
                    $doc_term .= "{$v},";
                } else {
                    $doc_term .= "{$v}-";
                }
            } else {
                if ($j < $num_t - 1) {
                    $doc_term .= "{$v[$k]},";
                } else {
                    $doc_term .= "{$v[$k]}-";
                }
            }
            $j++;
        }
    }
    $doc_doc = '';
    $array_guid = array();
    foreach ($m_dd as $guid => $row) {
        $array_guid[] = $guid;
        $num_d = count($row);
        $j = 0;
        ksort($row);
        foreach ($row as $k => $v) {
            if ($j < $num_d - 1) {
                $doc_doc .= "{$v},";
            } else {
                $doc_doc .= "{$v}-";
            }
            $j++;
        }
    }
    $num_d = count($m_dt);
    echo $command_string = "{$kohonen_c} -r {$num_d} -c {$num_t} -t {$doc_term} -d {$doc_doc}";
    //file_put_contents("d","$command_string");
    shell_exec($command_string);
    $clusters_strings = file_get_contents($output_file_kohonen);
    $array_cluster = array();
    $info_cluster = split("\n", $clusters_strings);
    unset($info_cluster[count($info_cluster) - 1]);
    for ($i = 0; $i < count($array_guid); $i++) {
        if (count($array_cluster[$info_cluster[$i]]) == '') {
            $array_cluster[$info_cluster[$i]] = new Cluster($type_cluster, $usern, "kohonen", 1);
        }
        $array_cluster[$info_cluster[$i]]->array_docs[] = $array_guid[$i];
    }
    echo "\nCalculating positive features for {$type_cluster}...\n";
    $array_cluster = get_positive_features($array_cluster, $m_dt, $type_cluster);
    //adds positive features to the clusters
    foreach ($array_cluster as $k => $obj) {
        if (count($obj->array_docs) < 2) {
            unset($array_cluster[$k]);
        }
    }
    return $array_cluster;
}

コード例 #4

ファイルを表示

ファイル: yaca.php プロジェクト: portokallidis/Metamorphosis-Meducator

function clusterize_yaca($type)
{
    global $YACA_threshold;
    global $IOdir;
    global $usern;
    global $new_classification_required;
    $docdoc = unserialize(file_get_contents($IOdir . $type . "_dd"));
    if ($type != "replinks") {
        $docterm = unserialize(file_get_contents($IOdir . $type . "_dt"));
    }
    $old_data_retrieved = false;
    //if possible, use old classification results: it is based on the file "changes", that is the log of edited and new documents
    //the filce "changes" doesn't contain logs for replinks (so this "shortcut" is not possible for replinks)
    //the strategy is to use old clusters and make the needed changes instead of recalculating them from scratch
    if ($new_classification_required == 0 && $type != "replinks") {
        //only if there were not very important changes and if it's not the case of replinks
        if (file_exists($IOdir . "old_clusters_{$type}") && file_exists($IOdir . "old_{$type}" . "_dd")) {
            //we need both the old clusters and the old doc-doc matrix
            $changes = unserialize(file_get_contents($IOdir . "changes"));
            $old_clusters = unserialize(file_get_contents($IOdir . "old_clusters_{$type}"));
            $old_clusters_values = array_values($old_clusters);
            $old_dd = unserialize(file_get_contents($IOdir . "old_{$type}" . "_dd"));
            if ($old_clusters_values[0]->clusteringAlgorithm == "YACA") {
                //we can continue only if the old clusters have been created with YACA
                //create new clusters that have the same documents of the old ones (we re-create them in order not to mess things up, especially the IDs, however it's a very quick process)
                foreach ($old_clusters as $old_cluster) {
                    $c = new Cluster($type, $usern, "YACA", 1);
                    $c->array_docs = $old_cluster->array_docs;
                    //we don't put positive features right now
                    $clusters[] = $c;
                }
                //for each edited element...
                foreach ($changes["edited"][$type] as $guid) {
                    //...for each cluster
                    foreach ($clusters as $num => $cluster) {
                        //...see if the element is contained in the cluster and, if so, delete it from the cluster and delete its inherence from each element of the cluster
                        $pos = $cluster->belongs($guid);
                        if ($pos != false) {
                            unset($clusters[$num]->array_docs[$pos - 1]);
                            //delete it (the key is given by (position - 1)
                            foreach ($cluster->array_docs as $numres => $resource) {
                                //delete its inherence from each element
                                $clusters[$num]->array_docs[$numres]["inherence"] = ($clusters[$num]->array_docs[$numres]["inherence"] * count($clusters[$num]->array_docs) - $old_dd[$guid][$resource["guid"]]) / (count($clusters[$num]->array_docs) - 1);
                            }
                        }
                        $clusters[$num]->array_docs = array_values($clusters[$num]->array_docs);
                        //in order to preserve the normal sequence in the array
                        //see if the element is now related to the cluster or not (if it is related to almost one other element, it is considered related)
                        $related = false;
                        foreach ($cluster->array_docs as $resource) {
                            if ($docdoc[$guid][$resource["guid"]] > $YACA_threshold) {
                                $related = true;
                                break;
                            }
                        }
                        //if the element is related to the cluster, add its inherence to each element of the cluster and add it to the cluster
                        if ($related) {
                            $new_inherence = 0;
                            foreach ($cluster->array_docs as $numres => $resource) {
                                //add its inherence to each element of the cluster
                                $clusters[$num]->array_docs[$numres]["inherence"] = ($clusters[$num]->array_docs[$numres]["inherence"] * (count($clusters[$num]->array_docs) - 1) + $docdoc[$guid][$resource["guid"]]) / count($clusters[$num]->array_docs);
                                $new_inherence += $docdoc[$guid][$resource["guid"]];
                            }
                            $clusters[$num]->array_docs[] = array("guid" => $guid, "inherence" => $new_inherence / count($clusters[$num]->array_docs));
                            //add it to the cluster
                        }
                    }
                }
                //for each new resource...
                foreach ($changes["new"] as $guid) {
                    //...for each cluster
                    foreach ($clusters as $num => $cluster) {
                        //see if the element is related to the cluster or not (if it is related to almost one other element, it is considered related)
                        $related = false;
                        foreach ($cluster->array_docs as $resource) {
                            if ($docdoc[$guid][$resource["guid"]] > $YACA_threshold) {
                                $related = true;
                                break;
                            }
                        }
                        //if the element is related to the cluster, add its inherence to each element of the cluster and add it to the cluster
                        if ($related) {
                            $new_inherence = 0;
                            foreach ($cluster->array_docs as $numres => $resource) {
                                $clusters[$num]->array_docs[$numres]["inherence"] = ($clusters[$num]->array_docs[$numres]["inherence"] * (count($clusters[$num]->array_docs) - 1) + $docdoc[$guid][$resource["guid"]]) / count($clusters[$num]->array_docs);
                                $new_inherence += $docdoc[$guid][$resource["guid"]];
                            }
                            $clusters[$num]->array_docs[] = array("guid" => $guid, "inherence" => $new_inherence / count($clusters[$num]->array_docs));
                        }
                    }
                    //moreover we have to create a new cluster starting from the new element
                    $c = new Cluster($type, $usern, "YACA", 1);
                    $c->array_docs[] = array("guid" => $guid, "inherence" => 0);
                    $guids = unserialize(file_get_contents($IOdir . "guids"));
                    foreach ($guids as $guid2) {
                        if ($docdoc[$guid][$guid2] >= $YACA_threshold) {
                            if ($guid != $guid2) {
                                $new_inherence = 0;
                                foreach ($c->array_docs as $numres => $resource) {
                                    $c->array_docs[$numres]["inherence"] += ($c->array_docs[$numres]["inherence"] * (count($c->array_docs) - 1) + $docdoc[$guid2][$resource["guid"]]) / count($c->array_docs);
                                    $new_inherence += $docdoc[$guid2][$resource["guid"]];
                                }
                                $c->array_docs[] = array("guid" => $guid2, "inherence" => $new_inherence / count($c->array_docs));
                            }
                        }
                    }
                    if (count($c->array_docs) == 1) {
                        $c->array_docs[0]["inherence"] = -1;
                    }
                    // -1 stands for "the maximum", since when a cluster has only an element, this is obviously totally inherent to the cluster
                    $clusters[] = $c;
                }
                $old_data_retrieved = true;
            }
        }
    }
    //if it wasn't possible to use the old classification results, do the whole process
    if ($old_data_retrieved == false) {
        $clusters = array();
        foreach ($docdoc as $key => $row) {
            $c = new Cluster($type, $usern, "YACA", 1);
            $c->array_docs[] = array("guid" => $key, "inherence" => 0);
            foreach ($row as $key2 => $value) {
                if ($docdoc[$key][$key2] >= $YACA_threshold) {
                    if ($key != $key2) {
                        $new_inherence = 0;
                        //add the element score to the other elements' inherence
                        foreach ($c->array_docs as $numres => $resource) {
                            $c->array_docs[$numres]["inherence"] = ($c->array_docs[$numres]["inherence"] * (count($c->array_docs) - 1) + $docdoc[$key2][$resource["guid"]]) / count($c->array_docs);
                            $new_inherence += $docdoc[$key2][$resource["guid"]];
                        }
                        //we add now inherence to each element of the cluster
                        $c->array_docs[] = array("guid" => $key2, "inherence" => $new_inherence / count($c->array_docs));
                    }
                }
            }
            if (count($c->array_docs) == 1) {
                $c->array_docs[0]["inherence"] = -1;
            }
            // -1 stands for "the maximum", since when a cluster has only an element, this is obviously totally inherent to the cluster
            $clusters[] = $c;
        }
    }
    $clusters_ok = delete_unnecessary_clusters($clusters);
    //sort each cluster's documents by inherence descending order and then by guid
    foreach ($clusters_ok as $num => $cluster) {
        usort($cluster->array_docs, "compare_inherence_guid");
    }
    //add positive features to clusters
    if ($type != 'replinks') {
        echo "\nCalculating positive features for {$type}...\n";
        $clusters_pos = get_positive_features($clusters_ok, $docterm, $type);
        //it's very slow for metadata
    } else {
        $clusters_pos = $clusters_ok;
    }
    return $clusters_pos;
}