function calc_weights($wordarray, $title, $host, $path, $keywords, $url_parts)
{
    global $db_con, $index_host, $index_meta_keywords, $sort_results, $domain_mul, $cn_seg, $clear, $dompromo, $keypromo;
    $hostarray = unique_array(explode(" ", preg_replace("/[^[:alnum:]-]+/i", " ", strtolower($host))));
    $patharray = unique_array(explode(" ", preg_replace("/[^[:alnum:]-]+/i", " ", strtolower($path))));
    if ($cn_seg == '1') {
        //      we need all characters for Chinese language
        $titlearray = unique_array(explode(" ", strtolower($title)));
        $keywordsarray = unique_array(explode(" ", strtolower($keywords)));
    } else {
        $titlearray = unique_array(explode(" ", preg_replace("/[^[:alnum:]-]+/i", " ", strtolower($title))));
        $keywordsarray = unique_array(explode(" ", preg_replace("/[^[:alnum:]-]+/i", " ", strtolower($keywords))));
    }
    $path_depth = countSubstrs($path, "/");
    $main_url_factor = '1';
    if ($sort_results == '2') {
        //      enter here if 'Main URLs (domains) on top'  is selected
        $act_host = $host;
        $act_path = $url_parts['path'];
        $act_query = $url_parts['query'];
        //      try to find main URL for localhost systems
        if ($act_host == 'localhost' && substr_count($act_path, ".") == '0' && substr_count($act_path, "/") <= '3') {
            $main_url_factor = $domain_mul;
            //      if localhost: increase weight for domains in path
        }
        /*
        if ($act_host == 'localhost' && substr_count($act_path, ".") == '1' && substr_count($act_path, "/") <= '3') {
        $main_url_factor = $domain_mul/2;     //      if localhost: increase weight for sub-domains in path slightly
        }
        */
        //      only these files are exepted as valid part of the url path
        $act_path = str_replace('index.php', '', $act_path);
        $act_path = str_replace('index.html', '', $act_path);
        $act_path = str_replace('index.htm', '', $act_path);
        $act_path = str_replace('index.shtml', '', $act_path);
        //      try to find main URL in the wild
        if ($act_host != 'localhost' && substr_count($act_host, ".") == '2' && strlen($act_path) <= '1' && !$url_parts['query']) {
            $main_url_factor = $domain_mul;
            //      increase weight for main URLs (domains)
        }
    }
    $promo = '';
    $catch_found = '';
    while (list($w, $word) = each($wordarray)) {
        if ($keypromo == $word[1]) {
            $catch_found = '1';
            //  catchword found in text
        }
    }
    //  for promoted domains, correct the weighting
    if (!$keypromo && $dompromo && strstr($host, $dompromo)) {
        $promo = '1';
    }
    //  for promoted catchwords, correct the weighting
    if (!$dompromo && $keypromo && $catch_found) {
        $promo = '1';
    }
    //  for promoted domains AND promoted catchwords , correct the weighting
    if ($keypromo && $catch_found && $dompromo && strstr($host, $dompromo)) {
        $promo = '1';
    }
    reset($wordarray);
    while (list($wid, $word) = each($wordarray)) {
        $word_in_path = 0;
        $word_in_domain = 0;
        $word_in_title = 0;
        $meta_keyword = 0;
        if ($index_host == 1) {
            while (list($id, $path) = each($patharray)) {
                if ($path[1] == $word[1]) {
                    $word_in_path = 1;
                    break;
                }
            }
            reset($patharray);
            while (list($id, $host) = each($hostarray)) {
                if ($host[1] == $word[1]) {
                    $word_in_domain = 1;
                    break;
                }
            }
            reset($hostarray);
        }
        if ($index_meta_keywords == 1) {
            while (list($id, $keyword) = each($keywordsarray)) {
                if ($keyword[1] == $word[1]) {
                    $meta_keyword = 1;
                    break;
                }
            }
            reset($keywordsarray);
        }
        while (list($id, $tit) = each($titlearray)) {
            if ($tit[1] == $word[1]) {
                $word_in_title = 1;
                break;
            }
        }
        reset($titlearray);
        $wordarray[$wid][3] = (int) calc_weight($wordarray[$wid][2], $word_in_title, $word_in_domain, $word_in_path, $path_depth, $meta_keyword, $main_url_factor, $host, $promo);
    }
    if ($clear == 1) {
        unset($titlearray, $keywordsarray, $hostarray, $patharray, $act_path, $act_host, $act_query);
    }
    reset($wordarray);
    return $wordarray;
}
Example #2
0
function calc_weights($wordarray, $title, $host, $path, $keywords)
{
    global $index_host, $index_meta_keywords;
    $hostarray = unique_array(explode(" ", preg_replace("/[^[:alnum:]-]+/i", " ", strtolower($host))));
    $patharray = unique_array(explode(" ", preg_replace("/[^[:alnum:]-]+/i", " ", strtolower($path))));
    $titlearray = unique_array(explode(" ", preg_replace("/[^[:alnum:]-]+/i", " ", strtolower($title))));
    $keywordsarray = unique_array(explode(" ", preg_replace("/[^[:alnum:]-]+/i", " ", strtolower($keywords))));
    $path_depth = countSubstrs($path, "/");
    while (list($wid, $word) = each($wordarray)) {
        $word_in_path = 0;
        $word_in_domain = 0;
        $word_in_title = 0;
        $meta_keyword = 0;
        if ($index_host == 1) {
            while (list($id, $path) = each($patharray)) {
                if ($path[1] == $word[1]) {
                    $word_in_path = 1;
                    break;
                }
            }
            reset($patharray);
            while (list($id, $host) = each($hostarray)) {
                if ($host[1] == $word[1]) {
                    $word_in_domain = 1;
                    break;
                }
            }
            reset($hostarray);
        }
        if ($index_meta_keywords == 1) {
            while (list($id, $keyword) = each($keywordsarray)) {
                if ($keyword[1] == $word[1]) {
                    $meta_keyword = 1;
                    break;
                }
            }
            reset($keywordsarray);
        }
        while (list($id, $tit) = each($titlearray)) {
            if ($tit[1] == $word[1]) {
                $word_in_title = 1;
                break;
            }
        }
        reset($titlearray);
        $wordarray[$wid][2] = (int) calc_weight($wordarray[$wid][2], $word_in_title, $word_in_domain, $word_in_path, $path_depth, $meta_keyword);
    }
    reset($wordarray);
    return $wordarray;
}
Example #3
0
function calc_weights($wordarray, $title, $host, $path, $keywords, $url_parts)
{
    global $index_host, $index_meta_keywords, $sort_results, $domain_mul;
    $hostarray = unique_array(explode(" ", preg_replace("/[^[:alnum:]-]+/i", " ", strtolower($host))));
    $patharray = unique_array(explode(" ", preg_replace("/[^[:alnum:]-]+/i", " ", strtolower($path))));
    $titlearray = unique_array(explode(" ", preg_replace("/[^[:alnum:]-]+/i", " ", strtolower($title))));
    $keywordsarray = unique_array(explode(" ", preg_replace("/[^[:alnum:]-]+/i", " ", strtolower($keywords))));
    $path_depth = countSubstrs($path, "/");
    $main_url_factor = '1';
    if ($sort_results == '2') {
        //      enter here if 'Main URLs (domains) on top'  is selected
        $act_host = $host;
        $act_path = $url_parts['path'];
        $act_query = $url_parts['query'];
        //      try to find main URL for localhost systems
        if ($act_host == 'localhost' && substr_count($act_path, ".") == '0' && substr_count($act_path, "/") <= '3') {
            $main_url_factor = $domain_mul;
            //      if localhost: increase weight for domains in path
        }
        /*
                    if ($act_host == 'localhost' && substr_count($act_path, ".") == '1' && substr_count($act_path, "/") <= '3') {  
                        $main_url_factor = $domain_mul/2;     //      if localhost: increase weight for sub-domains in path slightly               
                    }
        */
        //      only these files are exepted as valid part of the url path
        $act_path = str_replace('index.php', '', $act_path);
        $act_path = str_replace('index.html', '', $act_path);
        $act_path = str_replace('index.htm', '', $act_path);
        //      try to find main URL in the wild
        if ($act_host != 'localhost' && substr_count($act_host, ".") == '2' && strlen($act_path) <= '1' && !$url_parts['query']) {
            $main_url_factor = $domain_mul;
            //      increase weight for main URLs (domains)
        }
    }
    while (list($wid, $word) = each($wordarray)) {
        $word_in_path = 0;
        $word_in_domain = 0;
        $word_in_title = 0;
        $meta_keyword = 0;
        if ($index_host == 1) {
            while (list($id, $path) = each($patharray)) {
                if ($path[1] == $word[1]) {
                    $word_in_path = 1;
                    break;
                }
            }
            reset($patharray);
            while (list($id, $host) = each($hostarray)) {
                if ($host[1] == $word[1]) {
                    $word_in_domain = 1;
                    break;
                }
            }
            reset($hostarray);
        }
        if ($index_meta_keywords == 1) {
            while (list($id, $keyword) = each($keywordsarray)) {
                if ($keyword[1] == $word[1]) {
                    $meta_keyword = 1;
                    break;
                }
            }
            reset($keywordsarray);
        }
        while (list($id, $tit) = each($titlearray)) {
            if ($tit[1] == $word[1]) {
                $word_in_title = 1;
                break;
            }
        }
        reset($titlearray);
        $wordarray[$wid][3] = (int) calc_weight($wordarray[$wid][2], $word_in_title, $word_in_domain, $word_in_path, $path_depth, $meta_keyword, $main_url_factor);
    }
    unset($titlearray, $keywordsarray, $hostarray, $patharray, $act_path, $act_host, $act_query);
    reset($wordarray);
    return $wordarray;
}