コード例 #1
0
     foreach ($white as $val) {
         if ($trim_words) {
             $whitelist[] = trim($val);
         } else {
             $whitelist[] = $val;
         }
     }
     $whitelist = array_unique($whitelist);
     sort($whitelist);
 }
 if (is_array($black_in)) {
     $black = array();
     $blacklist = array();
     foreach ($black_in as $val) {
         if ($case_sensitive == '0') {
             $val = lower_case($val);
         }
         $val = @iconv($home_charset, "UTF-8", $val);
         $black[] = addslashes($val);
     }
     foreach ($black as $val) {
         if ($trim_words) {
             $blacklist[] = trim($val);
         } else {
             $blacklist[] = $val;
         }
     }
     $blacklist = array_unique($blacklist);
     sort($blacklist);
 }
 if (is_array($image)) {
コード例 #2
0
ファイル: helper.php プロジェクト: royanonuevo/My-Web-Kit
function title_case($str)
{
    return ucwords(lower_case($str));
}
コード例 #3
0
ファイル: domains.php プロジェクト: kpabijanskas/fusionpbx
 public function upgrade()
 {
     //get the db variables
     $config = new config();
     $config_exists = $config->exists();
     $config_path = $config->find();
     $config->get();
     $db_type = $config->db_type;
     $db_name = $config->db_name;
     $db_username = $config->db_username;
     $db_password = $config->db_password;
     $db_host = $config->db_host;
     $db_path = $config->db_path;
     $db_port = $config->db_port;
     //get the PROJECT PATH
     include "root.php";
     //get the list of installed apps from the core and app directories (note: GLOB_BRACE doesn't work on some systems)
     $config_list_1 = glob($_SERVER["DOCUMENT_ROOT"] . PROJECT_PATH . "/*/*/app_config.php");
     $config_list_2 = glob($_SERVER["DOCUMENT_ROOT"] . PROJECT_PATH . "/*/*/app_menu.php");
     $config_list = array_merge((array) $config_list_1, (array) $config_list_2);
     unset($config_list_1, $config_list_2);
     $db = $this->db;
     $x = 0;
     foreach ($config_list as &$config_path) {
         include $config_path;
         $x++;
     }
     //get the domains
     $sql = "select * from v_domains ";
     $prep_statement = $this->db->prepare($sql);
     $prep_statement->execute();
     $domains = $prep_statement->fetchAll(PDO::FETCH_NAMED);
     unset($prep_statement);
     //get the domain_settings
     $sql = "select * from v_domain_settings ";
     $sql .= "where domain_setting_enabled = 'true' ";
     $prep_statement = $this->db->prepare($sql);
     $prep_statement->execute();
     $domain_settings = $prep_statement->fetchAll(PDO::FETCH_NAMED);
     unset($prep_statement);
     //get the default settings
     $sql = "select * from v_default_settings ";
     $sql .= "where default_setting_enabled = 'true' ";
     $prep_statement = $this->db->prepare($sql);
     $prep_statement->execute();
     $database_default_settings = $prep_statement->fetchAll(PDO::FETCH_NAMED);
     unset($prep_statement);
     //get the domain_uuid
     foreach ($domains as $row) {
         if (count($domains) == 1) {
             $_SESSION["domain_uuid"] = $row["domain_uuid"];
             $_SESSION["domain_name"] = $row['domain_name'];
         } else {
             if (lower_case($row['domain_name']) == lower_case($domain_array[0]) || lower_case($row['domain_name']) == lower_case('www.' . $domain_array[0])) {
                 $_SESSION["domain_uuid"] = $row["domain_uuid"];
                 $_SESSION["domain_name"] = $row['domain_name'];
             }
             $_SESSION['domains'][$row['domain_uuid']]['domain_uuid'] = $row['domain_uuid'];
             $_SESSION['domains'][$row['domain_uuid']]['domain_name'] = $row['domain_name'];
         }
     }
     //loop through all domains
     $domain_count = count($domains);
     $domains_processed = 1;
     foreach ($domains as &$row) {
         //get the values from database and set them as php variables
         $domain_uuid = $row["domain_uuid"];
         $domain_name = $row["domain_name"];
         //get the context
         $context = $domain_name;
         //show the domain when display_type is set to text
         if ($display_type == "text") {
             echo "\n";
             echo $domain_name;
             echo "\n";
         }
         //get the default settings - this needs to be done to reset the session values back to the defaults for each domain in the loop
         foreach ($database_default_settings as $row) {
             $name = $row['default_setting_name'];
             $category = $row['default_setting_category'];
             $subcategory = $row['default_setting_subcategory'];
             if (strlen($subcategory) == 0) {
                 if ($name == "array") {
                     $_SESSION[$category][] = $row['default_setting_value'];
                 } else {
                     $_SESSION[$category][$name] = $row['default_setting_value'];
                 }
             } else {
                 if ($name == "array") {
                     $_SESSION[$category][$subcategory][] = $row['default_setting_value'];
                 } else {
                     $_SESSION[$category][$subcategory]['uuid'] = $row['default_setting_uuid'];
                     $_SESSION[$category][$subcategory][$name] = $row['default_setting_value'];
                 }
             }
         }
         //get the domains settings for the current domain
         foreach ($domain_settings as $row) {
             if ($row['domain_uuid'] == $domain_uuid) {
                 $name = $row['domain_setting_name'];
                 $category = $row['domain_setting_category'];
                 $subcategory = $row['domain_setting_subcategory'];
                 if (strlen($subcategory) == 0) {
                     //$$category[$name] = $row['domain_setting_value'];
                     $_SESSION[$category][$name] = $row['domain_setting_value'];
                 } else {
                     //$$category[$subcategory][$name] = $row['domain_setting_value'];
                     $_SESSION[$category][$subcategory][$name] = $row['domain_setting_value'];
                 }
             }
         }
         //get the list of installed apps from the core and mod directories and execute the php code in app_defaults.php
         $default_list = glob($_SERVER["DOCUMENT_ROOT"] . PROJECT_PATH . "/*/*/app_defaults.php");
         foreach ($default_list as &$default_path) {
             include $default_path;
         }
         //track of the number of domains processed
         $domains_processed++;
     }
     //synchronize the dialplan
     if (function_exists('save_dialplan_xml')) {
         save_dialplan_xml();
     }
     //update config.lua
     if (file_exists($_SERVER["DOCUMENT_ROOT"] . PROJECT_PATH . '/app/scripts/resources/classes/scripts.php')) {
         $obj = new scripts();
         $obj->write_config();
     }
     //clear the session variables
     unset($_SESSION['domain']);
     unset($_SESSION['switch']);
 }
コード例 #4
0
ファイル: check_auth.php プロジェクト: shadowym/fusionpbx
                 if (lower_case($row['domain_name']) == lower_case($domain_name)) {
                     $_SESSION['domain_uuid'] = $row['domain_uuid'];
                     break;
                 }
             }
             $_REQUEST["username"] = substr(check_str($_REQUEST["username"]), 0, -(strlen($domain_name) + 1));
         }
     }
     //get the domain name from the http value
     if (strlen(check_str($_REQUEST["domain_name"])) > 0) {
         $domain_name = check_str($_REQUEST["domain_name"]);
     }
     //set the domain information
     if (strlen($domain_name) > 0) {
         foreach ($_SESSION['domains'] as &$row) {
             if (lower_case($row['domain_name']) == lower_case($domain_name)) {
                 //set the domain session variables
                 $domain_uuid = $row["domain_uuid"];
                 $_SESSION["domain_uuid"] = $row["domain_uuid"];
                 $_SESSION["domain_name"] = $_SESSION['domains'][$domain_uuid]['domain_name'];
                 //set the setting arrays
                 $domain = new domains();
                 $domain->db = $db;
                 $domain->set();
             }
         }
     }
     //set the domain parent uuid
     $_SESSION['domain_parent_uuid'] = $_SESSION["domain_uuid"];
 }
 //get the username or key
コード例 #5
0
     $id3_trans = del_secintern($id3_trans);
     if ($id3_trans != $id3_rem) {
         $id3_string .= " " . $id3_trans . "";
         //  add new words to EXIF info
     }
 }
 //   convert to lower case
 if ($case_sensitive == '0') {
     $title_rem = $title_trans;
     $id3_rem = $id3_trans;
     $title_trans = lower_case(lower_ent($title_trans));
     if ($title_trans != $title_rem) {
         $title .= " " . $title_trans . "";
         //  add new words to title
     }
     $id3_trans = lower_case(lower_ent($id3_trans));
     if ($id3_trans != $id3_rem) {
         $id3_string .= " " . $id3_trans . "";
         //  add new words to EXIF info
     }
 }
 //  remove Latin accents
 if ($vowels || $noacc_el) {
     $title_rem = $title_trans;
     $id3_rem = $id3_trans;
     $title_trans = remove_acc($title_trans, '0');
     if ($title_trans != $title_rem) {
         $title .= " " . $title_trans . "";
     }
     $id3_trans = remove_acc($id3_trans, '0');
     if ($id3_trans != $id3_rem) {
コード例 #6
0
function str_decapitalize($str)
{
    $str[0] = lower_case($str[0]);
    return $str;
}
コード例 #7
0
function index_url($url, $level, $site_id, $md5sum, $domain, $indexdate, $sessid, $can_leave_domain, $reindex, $use_nofollow, $cl, $use_robot, $use_pref, $url_inc, $url_not_inc, $num)
{
    global $db_con, $entities, $min_delay, $link_check, $command_line, $min_words_per_page, $dup_content, $dup_url, $quotes, $plus_nr, $use_prefcharset;
    global $min_words_per_page, $supdomain, $smp, $follow_sitemap, $max_links, $realnum, $local, $tmp_dir, $auto_add, $admin_email, $idna, $conv_puny;
    global $mysql_table_prefix, $user_agent, $tmp_urls, $delay_time, $domain_arr, $home_charset, $charSet, $url_status, $redir_count;
    global $debug, $common, $use_white1, $use_white2, $use_black, $whitelist, $blacklist, $clear, $abslinks, $utf8_verify, $webshot;
    global $index_media, $index_image, $suppress_suffix, $imagelist, $min_image_x, $min_image_y, $dup_media, $index_alt, $no_log, $index_rss;
    global $index_audio, $audiolist, $index_video, $videolist, $index_embeded, $rss_template, $index_csv, $delim, $ext, $index_id3, $dba_act;
    global $converter_dir, $dict_dir, $cn_seg, $jp_seg, $index_framesets, $index_iframes, $cdata, $dc, $preferred, $index_rar, $index_zip, $curl;
    global $docs, $only_docs, $only_links, $case_sensitive, $vowels, $noacc_el, $include_dir, $thumb_folder, $js_reloc, $server_char;
    global $latin_ligatures, $phon_trans, $liga;
    //  Currently (2013.01.11)  the variable $use_prefcharset as defined in Admin Settings 'Obligatory use preferred charset' is used.
    //  and not the variable $use_pref as defined in Admin Settings as a varaiable used for addsite() in .../admin/admin.php
    error_reporting(E_ALL & ~E_DEPRECATED & ~E_WARNING & ~E_NOTICE & ~E_STRICT);
    $data = array();
    $cn_data = array();
    $url_parts = array();
    $url_status = array();
    $url_status['black'] = '';
    $contents = array();
    $links = array();
    $wordarray = array();
    $topic = '';
    $url_reloc = '';
    $js_link = '';
    $document = '';
    $file = '';
    $file0 = '';
    $raw_file = '';
    $seg_data = '';
    $index_url = $url;
    $comment = $db_con->real_escape_string("Automatically added during index procedure, as this domain is not yet available in 'Sites' menu.");
    $admin_email = $db_con->real_escape_string($admin_email);
    if ($debug == '0') {
        if (function_exists("ini_set")) {
            ini_set("display_errors", "0");
        }
        error_reporting(0);
    } else {
        error_reporting(E_ERROR);
        //  otherwise  a non existing siemap.xml  would always cause a warning message
    }
    $needsReindex = 1;
    $deletable = 0;
    $nohost = 1;
    $i = 0;
    $nohost_count = 5;
    //  defines count of attempts to get in contact with the server
    //  check URL status
    while ($i < $nohost_count && $nohost) {
        $url_status = url_status($url, $site_id, $sessid);
        if (!stristr($url_status['state'], "NOHOST")) {
            $nohost = '';
            //  reset for successfull attempt
        }
        $i++;
    }
    //  check for emergency exit
    if ($url_status['aborted'] == '1' || stristr($url_status['state'], "NOHOST")) {
        return $url_status;
    }
    //  check for UFO file or invalid suffix
    if (stristr($url_status['state'], "ufo")) {
        return $url_status;
    }
    // JFIELD here is right before we try to retrieve the URL and get the error
    // echo "<h3>F****E: $url</h3>\n";
    //  check for 'unreachable' links and if it is a known URL, delete all keyword relationships, former indexed from the meanwhile unreachable link
    if (stristr($url_status['state'], "unreachable")) {
        printStandardReport('unreachable', $command_line, $no_log);
        $sql_query = "SELECT link_id from " . $mysql_table_prefix . "links where url='{$url}'";
        $result = $db_con->query($sql_query);
        if ($debug && $db_con->errno) {
            $err_row = __LINE__ - 2;
            printf("<p><span class='red'>&nbsp;MySQL failure: %s&nbsp;\n<br /></span></p>", $db_con->error);
            if (__FUNCTION__) {
                printf("<p><span class='red'>&nbsp;Found in script: " . __FILE__ . "&nbsp;&nbsp;row: {$err_row}&nbsp;&nbsp;in function():&nbsp;" . __FUNCTION__ . "&nbsp;<br /></span></p>");
            } else {
                printf("<p><span class='red'>&nbsp;Found in script: " . __FILE__ . "&nbsp;&nbsp;row: {$err_row}&nbsp;<br /></span></p>");
            }
            printf("<p><span class='red'>&nbsp;Script execution aborted.&nbsp;<br /></span>");
            printf("<p><strong>Invalid query string, which caused the SQL error:</strong></p>");
            echo "<p> {$sql_query} </p>";
            exit;
        }
        $row = $result->fetch_array(MYSQLI_NUM);
        $link_id = $row[0];
        if ($link_id) {
            $sql_query = "DELETE from " . $mysql_table_prefix . "link_keyword where link_id={$link_id}";
            $db_con->query($sql_query);
            if ($debug && $db_con->errno) {
                $err_row = __LINE__ - 2;
                printf("<p><span class='red'>&nbsp;MySQL failure: %s&nbsp;\n<br /></span></p>", $db_con->error);
                if (__FUNCTION__) {
                    printf("<p><span class='red'>&nbsp;Found in script: " . __FILE__ . "&nbsp;&nbsp;row: {$err_row}&nbsp;&nbsp;in function():&nbsp;" . __FUNCTION__ . "&nbsp;<br /></span></p>");
                } else {
                    printf("<p><span class='red'>&nbsp;Found in script: " . __FILE__ . "&nbsp;&nbsp;row: {$err_row}&nbsp;<br /></span></p>");
                }
                printf("<p><span class='red'>&nbsp;Script execution aborted.&nbsp;<br /></span>");
                printf("<p><strong>Invalid query string, which caused the SQL error:</strong></p>");
                echo "<p> {$sql_query} </p>";
                exit;
            }
            //  here we should delete the keywords associated only to the unreachable link
            //  but this takes too much time during index procedure
            //  the admin is asked toc do it manually by using the regarding option in 'Clean' menue
            //
            //  delete the meanwhile unreachable link from db
            $sql_query = "DELETE from " . $mysql_table_prefix . "links where link_id = {$link_id}";
            $db_con->query($sql_query);
            if ($debug && $db_con->errno) {
                $err_row = __LINE__ - 2;
                printf("<p><span class='red'>&nbsp;MySQL failure: %s&nbsp;\n<br /></span></p>", $db_con->error);
                if (__FUNCTION__) {
                    printf("<p><span class='red'>&nbsp;Found in script: " . __FILE__ . "&nbsp;&nbsp;row: {$err_row}&nbsp;&nbsp;in function():&nbsp;" . __FUNCTION__ . "&nbsp;<br /></span></p>");
                } else {
                    printf("<p><span class='red'>&nbsp;Found in script: " . __FILE__ . "&nbsp;&nbsp;row: {$err_row}&nbsp;<br /></span></p>");
                }
                printf("<p><span class='red'>&nbsp;Script execution aborted.&nbsp;<br /></span>");
                printf("<p><strong>Invalid query string, which caused the SQL error:</strong></p>");
                echo "<p> {$sql_query} </p>";
                exit;
            }
        }
        return $url_status;
    }
    //  check for overwritten URL, forced by the header, sending content PLUS any redirected URL
    if ($url_status['url_over'] && !$url_status['relocate']) {
        $url = $url_status['url_over'];
    }
    $url_parts = parse_all_url($url);
    $thislevel = $level - 1;
    //  redirected URL ?
    if ($url_status['relocate']) {
        //  if relocated,  print message, verify the new URL, and redirect to new URL
        //  check for redirection on an already indexed link
        $known_link = '';
        $sql_query = "SELECT * from " . $mysql_table_prefix . "links where url='{$url}'";
        $result = $db_con->query($sql_query);
        if ($debug && $db_con->errno) {
            $err_row = __LINE__ - 2;
            printf("<p><span class='red'>&nbsp;MySQL failure: %s&nbsp;\n<br /></span></p>", $db_con->error);
            if (__FUNCTION__) {
                printf("<p><span class='red'>&nbsp;Found in script: " . __FILE__ . "&nbsp;&nbsp;row: {$err_row}&nbsp;&nbsp;in function():&nbsp;" . __FUNCTION__ . "&nbsp;<br /></span></p>");
            } else {
                printf("<p><span class='red'>&nbsp;Found in script: " . __FILE__ . "&nbsp;&nbsp;row: {$err_row}&nbsp;<br /></span></p>");
            }
            printf("<p><span class='red'>&nbsp;Script execution aborted.&nbsp;<br /></span>");
            printf("<p><strong>Invalid query string, which caused the SQL error:</strong></p>");
            echo "<p> {$sql_query} </p>";
            exit;
        }
        $known_link = $result->num_rows;
        if ($known_link) {
            $urlo_status['state'] = "URL was redirected to an already indexed page.<br />In order to prevent infinite indexation, this is not supported by Sphider-plus.<br />Indexation aborted for this URL";
            $url_status['aborted'] = 1;
            return $url_status;
        }
        //  remove the original URL from temp table. The relocated URL will be added later on.
        mysqltest();
        $sql_query = "DELETE from " . $mysql_table_prefix . "temp where link = '{$url}' AND id = '{$sessid}'";
        $db_con->query($sql_query);
        if ($debug && $db_con->errno) {
            $err_row = __LINE__ - 2;
            printf("<p><span class='red'>&nbsp;MySQL failure: %s&nbsp;\n<br /></span></p>", $db_con->error);
            if (__FUNCTION__) {
                printf("<p><span class='red'>&nbsp;Found in script: " . __FILE__ . "&nbsp;&nbsp;row: {$err_row}&nbsp;&nbsp;in function():&nbsp;" . __FUNCTION__ . "&nbsp;<br /></span></p>");
            } else {
                printf("<p><span class='red'>&nbsp;Found in script: " . __FILE__ . "&nbsp;&nbsp;row: {$err_row}&nbsp;<br /></span></p>");
            }
            printf("<p><span class='red'>&nbsp;Script execution aborted.&nbsp;<br /></span>");
            printf("<p><strong>Invalid query string, which caused the SQL error:</strong></p>");
            echo "<p> {$sql_query} </p>";
            exit;
        }
        $new_url = $url_status['path'];
        //  URL of first redirection
        //  remove the redirected URL, which eventually is  already stored in db
        //  before finally storing in db, we need to check for correct redirection.
        $sql_query = "DELETE from " . $mysql_table_prefix . "temp where link = '{$new_url}' AND id = '{$sessid}'";
        $db_con->query($sql_query);
        if ($debug && $db_con->errno) {
            $err_row = __LINE__ - 2;
            printf("<p><span class='red'>&nbsp;MySQL failure: %s&nbsp;\n<br /></span></p>", $db_con->error);
            if (__FUNCTION__) {
                printf("<p><span class='red'>&nbsp;Found in script: " . __FILE__ . "&nbsp;&nbsp;row: {$err_row}&nbsp;&nbsp;in function():&nbsp;" . __FUNCTION__ . "&nbsp;<br /></span></p>");
            } else {
                printf("<p><span class='red'>&nbsp;Found in script: " . __FILE__ . "&nbsp;&nbsp;row: {$err_row}&nbsp;<br /></span></p>");
            }
            printf("<p><span class='red'>&nbsp;Script execution aborted.&nbsp;<br /></span>");
            printf("<p><strong>Invalid query string, which caused the SQL error:</strong></p>");
            echo "<p> {$sql_query} </p>";
            exit;
        }
        //  now special processing for relative links
        if (!strpos(substr($new_url, 0, 5), "ttp")) {
            $new_url = make_abs($new_url, $index_url);
        }
        if ($url == $new_url && $url_status['file']) {
            $url_status['relocate'] = '';
            //  remove this redirection, as it is 'in it selves'
            $url_status['state'] = "ok";
            //  try to index the conteent
        }
        $care_excl = '1';
        //  care file suffixed to be excluded
        $relocated = '1';
        //  URL is relocated
        if ($debug) {
            printRedirected($url_status['relocate'], $url_status['path'], $cl);
        }
        $count = "1";
        while ($count <= $redir_count && $url_status['relocate'] && !$url_status['aborted']) {
            //  check this redirection
            $url_status = url_status($new_url, $site_id, $sessid);
            if ($url_status['path']) {
                $new_url = $url_status['path'];
                //  URL of another redirections
                //  now special processing for relative links
                if (!strpos(substr($new_url, 0, 5), "ttp")) {
                    $new_url = make_abs($new_url, $index_url);
                }
            }
            if ($debug) {
                printRedirected($url_status['relocate'], $url_status['path'], $cl);
            }
            $count++;
        }
        if ($url_status['relocate']) {
            $url_status['aborted'] = 1;
            $url_status['state'] = "<br />Indexation aborted because of too many redirections.<br />";
            return $url_status;
        }
        if ($url_status['state'] != "ok") {
            $code = $url_status['state'];
            //  check for most common client errors
            if (!preg_match("/401|402|403|404/", $code)) {
                $url_status['aborted'] = 1;
                //  end indexing for cmplete site
            } else {
                $url_status['aborted'] = '';
                //  abort only for this page
            }
            if (strstr($code, "401")) {
                $code = "401 (Authentication required)";
            }
            if (strstr($code, "403")) {
                $code = "403 (Forbidden)";
            }
            if (strstr($code, "404")) {
                $code = "404 (Not found)";
            }
            $url_status['state'] = "<br />Indexation aborted because of code: {$code}.<br />";
        }
        //  check final URL (which might be the 3. redirection)
        //  and puriify final redirected URL
        $url = $db_con->real_escape_string(url_purify($new_url, $index_url, $can_leave_domain, $care_excl, $relocated, $local_redir));
        // valid file suffix for the redirection??
        if ($url) {
            if ($care_excl == '1') {
                //  care about non-accepted suffixes
                reset($ext);
                while (list($id, $excl) = each($ext)) {
                    if (preg_match("/\\.{$excl}(\$|\\?)/i", $url)) {
                        //  if suffix is at the end of the link, or followd by a question mark
                        $url_status['state'] = 'Found: Not supported suffix';
                        //  error message
                        return $url_status;
                    }
                }
            }
        }
        if (!$url) {
            $link_parts = parse_all_url($url);
            $host = $link_parts['host'];
            $sql_query = "DELETE from " . $mysql_table_prefix . "temp where link like '{$index_url}' AND id = '{$sessid}' OR relo_link like '{$url}'";
            $db_con->query($sql_query);
            if ($debug && $db_con->errno) {
                $err_row = __LINE__ - 2;
                printf("<p><span class='red'>&nbsp;MySQL failure: %s&nbsp;\n<br /></span></p>", $db_con->error);
                if (__FUNCTION__) {
                    printf("<p><span class='red'>&nbsp;Found in script: " . __FILE__ . "&nbsp;&nbsp;row: {$err_row}&nbsp;&nbsp;in function():&nbsp;" . __FUNCTION__ . "&nbsp;<br /></span></p>");
                } else {
                    printf("<p><span class='red'>&nbsp;Found in script: " . __FILE__ . "&nbsp;&nbsp;row: {$err_row}&nbsp;<br /></span></p>");
                }
                printf("<p><span class='red'>&nbsp;Script execution aborted.&nbsp;<br /></span>");
                printf("<p><strong>Invalid query string, which caused the SQL error:</strong></p>");
                echo "<p> {$sql_query} </p>";
                exit;
            }
            $url_status['aborted'] = 1;
            $url_status['state'] = "<br />Indexation aborted because of undefined redirection error.<br />";
            return $url_status;
        }
        //  abort indexation, if the redirected URL is equal to calling URL
        if ($url == 'self') {
            $link_parts = parse_all_url($url);
            $host = $link_parts['host'];
            $sql_query = "DELETE from " . $mysql_table_prefix . "temp where link like '{$url}' AND id = '{$sessid}' OR relo_link like '{$url}'";
            $db_con->query($sql_query);
            if ($debug && $db_con->errno) {
                $err_row = __LINE__ - 2;
                printf("<p><span class='red'>&nbsp;MySQL failure: %s&nbsp;\n<br /></span></p>", $db_con->error);
                if (__FUNCTION__) {
                    printf("<p><span class='red'>&nbsp;Found in script: " . __FILE__ . "&nbsp;&nbsp;row: {$err_row}&nbsp;&nbsp;in function():&nbsp;" . __FUNCTION__ . "&nbsp;<br /></span></p>");
                } else {
                    printf("<p><span class='red'>&nbsp;Found in script: " . __FILE__ . "&nbsp;&nbsp;row: {$err_row}&nbsp;<br /></span></p>");
                }
                printf("<p><span class='red'>&nbsp;Script execution aborted.&nbsp;<br /></span>");
                printf("<p><strong>Invalid query string, which caused the SQL error:</strong></p>");
                echo "<p> {$sql_query} </p>";
                exit;
            }
            $url_status['aborted'] = 1;
            $url_status['state'] = "<br />Indexation aborted for this page, because the redirection was a link in it selves.<br />Blocked by Sphider-plus, because this could end in an infinite indexation loop.<br />";
            return $url_status;
        }
        //  abort indexation, if the redirected URL contains invalid file suffix
        if ($url == 'excl') {
            $link_parts = parse_all_url($url);
            $host = $link_parts['host'];
            $sql_query = "DELETE from " . $mysql_table_prefix . "temp where link like '{$url}' AND id = '{$sessid}' OR relo_link like '{$url}'";
            $db_con->query($sql_query);
            if ($debug && $db_con->errno) {
                $err_row = __LINE__ - 2;
                printf("<p><span class='red'>&nbsp;MySQL failure: %s&nbsp;\n<br /></span></p>", $db_con->error);
                if (__FUNCTION__) {
                    printf("<p><span class='red'>&nbsp;Found in script: " . __FILE__ . "&nbsp;&nbsp;row: {$err_row}&nbsp;&nbsp;in function():&nbsp;" . __FUNCTION__ . "&nbsp;<br /></span></p>");
                } else {
                    printf("<p><span class='red'>&nbsp;Found in script: " . __FILE__ . "&nbsp;&nbsp;row: {$err_row}&nbsp;<br /></span></p>");
                }
                printf("<p><span class='red'>&nbsp;Script execution aborted.&nbsp;<br /></span>");
                printf("<p><strong>Invalid query string, which caused the SQL error:</strong></p>");
                echo "<p> {$sql_query} </p>";
                exit;
            }
            $url_status['aborted'] = 1;
            $url_status['state'] = "<br />Indexation aborted because the redirected link does not meet the URL suffix conditions.<br />";
            return $url_status;
        }
        //  abort indexation, because purifing the redirected URL failed
        if (!strstr($url, "//")) {
            $sql_query = "DELETE from " . $mysql_table_prefix . "temp where link like '{$url}' AND id = '{$sessid}' OR relo_link like '{$url}'";
            $db_con->query($sql_query);
            if ($debug && $db_con->errno) {
                $err_row = __LINE__ - 2;
                printf("<p><span class='red'>&nbsp;MySQL failure: %s&nbsp;\n<br /></span></p>", $db_con->error);
                if (__FUNCTION__) {
                    printf("<p><span class='red'>&nbsp;Found in script: " . __FILE__ . "&nbsp;&nbsp;row: {$err_row}&nbsp;&nbsp;in function():&nbsp;" . __FUNCTION__ . "&nbsp;<br /></span></p>");
                } else {
                    printf("<p><span class='red'>&nbsp;Found in script: " . __FILE__ . "&nbsp;&nbsp;row: {$err_row}&nbsp;<br /></span></p>");
                }
                printf("<p><span class='red'>&nbsp;Script execution aborted.&nbsp;<br /></span>");
                printf("<p><strong>Invalid query string, which caused the SQL error:</strong></p>");
                echo "<p> {$sql_query} </p>";
                exit;
            }
            $url_status['aborted'] = 1;
            $url_status['state'] = "<br />Indexation aborted because {$url} is not supported.<br />";
            return $url_status;
        }
        //  abort indexation, if redirected URL met 'must/must not include' string rule
        if (!check_include($url, $url_inc, $url_not_inc)) {
            $link_parts = parse_all_url($url);
            $host = $link_parts['host'];
            $sql_query = "DELETE from " . $mysql_table_prefix . "temp where link like '{$url}' AND id = '{$sessid}' OR relo_link like '{$url}'";
            $db_con->query($sql_query);
            if ($debug && $db_con->errno) {
                $err_row = __LINE__ - 2;
                printf("<p><span class='red'>&nbsp;MySQL failure: %s&nbsp;\n<br /></span></p>", $db_con->error);
                if (__FUNCTION__) {
                    printf("<p><span class='red'>&nbsp;Found in script: " . __FILE__ . "&nbsp;&nbsp;row: {$err_row}&nbsp;&nbsp;in function():&nbsp;" . __FUNCTION__ . "&nbsp;<br /></span></p>");
                } else {
                    printf("<p><span class='red'>&nbsp;Found in script: " . __FILE__ . "&nbsp;&nbsp;row: {$err_row}&nbsp;<br /></span></p>");
                }
                printf("<p><span class='red'>&nbsp;Script execution aborted.&nbsp;<br /></span>");
                printf("<p><strong>Invalid query string, which caused the SQL error:</strong></p>");
                echo "<p> {$sql_query} </p>";
                exit;
            }
            $url_status['aborted'] = 1;
            $url_status['state'] = "<br />Indexation aborted because the redirected link does not meet<br />the URL 'must include' or 'must not include' conditions.<br />";
            return $url_status;
        }
        //  if redirected URL is already known and in database: abort
        $rows0 = '';
        $rows1 = '';
        mysqltest();
        $sql_query = "SELECT url from " . $mysql_table_prefix . "sites where url like '{$url}'";
        $result = $db_con->query($sql_query);
        if ($debug && $db_con->errno) {
            $err_row = __LINE__ - 2;
            printf("<p><span class='red'>&nbsp;MySQL failure: %s&nbsp;\n<br /></span></p>", $db_con->error);
            if (__FUNCTION__) {
                printf("<p><span class='red'>&nbsp;Found in script: " . __FILE__ . "&nbsp;&nbsp;row: {$err_row}&nbsp;&nbsp;in function():&nbsp;" . __FUNCTION__ . "&nbsp;<br /></span></p>");
            } else {
                printf("<p><span class='red'>&nbsp;Found in script: " . __FILE__ . "&nbsp;&nbsp;row: {$err_row}&nbsp;<br /></span></p>");
            }
            printf("<p><span class='red'>&nbsp;Script execution aborted.&nbsp;<br /></span>");
            printf("<p><strong>Invalid query string, which caused the SQL error:</strong></p>");
            echo "<p> {$sql_query} </p>";
            exit;
        }
        $rows0 = $result->num_rows;
        $sql_query = "SELECT * from " . $mysql_table_prefix . "links where url='{$url}'";
        $result = $db_con->query($sql_query);
        if ($debug && $db_con->errno) {
            $err_row = __LINE__ - 2;
            printf("<p><span class='red'>&nbsp;MySQL failure: %s&nbsp;\n<br /></span></p>", $db_con->error);
            if (__FUNCTION__) {
                printf("<p><span class='red'>&nbsp;Found in script: " . __FILE__ . "&nbsp;&nbsp;row: {$err_row}&nbsp;&nbsp;in function():&nbsp;" . __FUNCTION__ . "&nbsp;<br /></span></p>");
            } else {
                printf("<p><span class='red'>&nbsp;Found in script: " . __FILE__ . "&nbsp;&nbsp;row: {$err_row}&nbsp;<br /></span></p>");
            }
            printf("<p><span class='red'>&nbsp;Script execution aborted.&nbsp;<br /></span>");
            printf("<p><strong>Invalid query string, which caused the SQL error:</strong></p>");
            echo "<p> {$sql_query} </p>";
            exit;
        }
        $known_link = $result->fetch_array(MYSQLI_NUM);
        $md5 = $known_link[8];
        if ($clear == 1) {
            clean_resource($result, '02');
        }
        if ($rows0) {
            $url_status['state'] = "<br />URL already in database (as a site URL). Index aborted.<br />";
            $url_status['aborted'] = 1;
            return $url_status;
        }
        // if known link, which is already indexed (because containing the md5 checksum), enter here
        if ($known_link[8]) {
            $count = $known_link[15];
            $count++;
            if ($count > $redir_count) {
                //  abort indexation
                $url_status['state'] = "<br />{$count}. attempt to redirect in the same (already indexed) URL, <br />which is no longer accepted by Sphider-plus. Indexation aborted for this site.<br />";
                $url_status['aborted'] = 1;
                return $url_status;
            } else {
                $sql_query = "UPDATE " . $mysql_table_prefix . "links set relo_count='{$count}' where url='{$url}'";
                $db_con->query($sql_query);
            }
        }
        //  add redirected URL to temp table, if not yet known
        $sql_query = "SELECT link from " . $mysql_table_prefix . "temp where link='{$url}' && id = '{$sessid}'";
        $result = $db_con->query($sql_query);
        if ($debug && $db_con->errno) {
            $err_row = __LINE__ - 2;
            printf("<p><span class='red'>&nbsp;MySQL failure: %s&nbsp;\n<br /></span></p>", $db_con->error);
            if (__FUNCTION__) {
                printf("<p><span class='red'>&nbsp;Found in script: " . __FILE__ . "&nbsp;&nbsp;row: {$err_row}&nbsp;&nbsp;in function():&nbsp;" . __FUNCTION__ . "&nbsp;<br /></span></p>");
            } else {
                printf("<p><span class='red'>&nbsp;Found in script: " . __FILE__ . "&nbsp;&nbsp;row: {$err_row}&nbsp;<br /></span></p>");
            }
            printf("<p><span class='red'>&nbsp;Script execution aborted.&nbsp;<br /></span>");
            printf("<p><strong>Invalid query string, which caused the SQL error:</strong></p>");
            echo "<p> {$sql_query} </p>";
            exit;
        }
        $rows = $result->num_rows;
        if ($rows == 0) {
            $sql_query = "INSERT into " . $mysql_table_prefix . "temp (link, level, id, relo_count) values ('{$url}', '{$level}', '{$sessid}', '1')";
            $db_con->query($sql_query);
            if ($debug && $db_con->errno) {
                $err_row = __LINE__ - 2;
                printf("<p><span class='red'>&nbsp;MySQL failure: %s&nbsp;\n<br /></span></p>", $db_con->error);
                if (__FUNCTION__) {
                    printf("<p><span class='red'>&nbsp;Found in script: " . __FILE__ . "&nbsp;&nbsp;row: {$err_row}&nbsp;&nbsp;in function():&nbsp;" . __FUNCTION__ . "&nbsp;<br /></span></p>");
                } else {
                    printf("<p><span class='red'>&nbsp;Found in script: " . __FILE__ . "&nbsp;&nbsp;row: {$err_row}&nbsp;<br /></span></p>");
                }
                printf("<p><span class='red'>&nbsp;Script execution aborted.&nbsp;<br /></span>");
                printf("<p><strong>Invalid query string, which caused the SQL error:</strong></p>");
                echo "<p> {$sql_query} </p>";
                exit;
            }
        }
        if ($clear == 1) {
            clean_resource($result, '02');
        }
        //  at the end of redirect, rebuild the url parts from the redirected URL.
        //  This is the final URL, which will be indexed
        $url_parts = parse_all_url($url);
    }
    //  end check any redirection/relocation
    //  if a JavaScript file is currently indexed?
    $suffix = substr($url, strrpos($url, ".") + 1);
    $suffix = str_replace("/", "", $suffix);
    if (strlen($suffix) < "5") {
        if (preg_match("/js\$/", $suffix)) {
            $js_link = 1;
            //  activate JS switch
        }
    }
    if ($smp != 1 && $follow_sitemap == 1) {
        //  enter here if we don't already know a valid sitemap and if admin settings allowed us to do so
        $tmp_urls = get_temp_urls($sessid);
        //  reload previous temp
        $url2 = remove_sessid(convert_url($url));
        // get folder where sitemap should be and if exists, cut existing filename, suffix and subfolder
        $host = parse_addr($url2);
        $hostname = $host[host];
        $more_sitemaps = array();
        if ($hostname == 'localhost') {
            $host1 = str_replace($local, '', $url2);
        }
        $pos = strpos($host1, "/");
        //      on local server delete all behind the /
        if ($pos) {
            $host1 = substr($host1, 0, $pos);
        }
        //      build full adress again, now only the host
        if ($hostname == 'localhost') {
            $url2 = "" . $local . "" . $host1 . "";
        } else {
            $url2 = "{$host['scheme']}://{$hostname}";
        }
        $sitemap_name = "sitemap";
        //      standard name for sitemap file
        $input_file = "{$url2}/{$sitemap_name}";
        //      create path to sitemap
        $log_file = './sitemaps/current_sitemap.xml';
        //      destination for sitemap log-file
        $smap_found = '';
        $indexed_map = '';
        $map_cont = '';
        //  try to fetch individual sitemap url from database
        mysqltest();
        $sql_query = "SELECT smap_url from " . $mysql_table_prefix . "sites where site_id='{$site_id}'";
        $result = $db_con->query($sql_query);
        if ($debug && $db_con->errno) {
            $err_row = __LINE__ - 2;
            printf("<p><span class='red'>&nbsp;MySQL failure: %s&nbsp;\n<br /></span></p>", $db_con->error);
            if (__FUNCTION__) {
                printf("<p><span class='red'>&nbsp;Found in script: " . __FILE__ . "&nbsp;&nbsp;row: {$err_row}&nbsp;&nbsp;in function():&nbsp;" . __FUNCTION__ . "&nbsp;<br /></span></p>");
            } else {
                printf("<p><span class='red'>&nbsp;Found in script: " . __FILE__ . "&nbsp;&nbsp;row: {$err_row}&nbsp;<br /></span></p>");
            }
            printf("<p><span class='red'>&nbsp;Script execution aborted.&nbsp;<br /></span>");
            printf("<p><strong>Invalid query string, which caused the SQL error:</strong></p>");
            echo "<p> {$sql_query} </p>";
            exit;
        }
        $row = $result->fetch_array(MYSQLI_NUM);
        if (preg_match("/http:\\/\\//", $row[0])) {
            //   use the individual sitemap
            $input_file = preg_replace("/.xml.gz|.xml/i", "", $row[0]);
        }
        $file = "" . $input_file . ".xml";
        if ($fd = @fopen($file, "r")) {
            //  uncompressed ?
            //if ($zd = @gzopen("".$input_file.".xml", "r")) {    //  uncompressed ?
            $map_cont = @stream_get_contents($fd);
            if ($map_cont && strpos($map_cont, "schemas/sitemap")) {
                //  if we were able to read it
                $smap_found = '1';
            }
            fclose($fd);
        }
        $gz_file = "" . $input_file . ".xml.gz";
        if (!$smap_found && ($zd = @fopen("compress.zlib://{$gz_file}", "r"))) {
            // compressed  ?
            //if (!$smap_found && $zd = @gzopen("".$input_file.".xml.gz", "r")) {  // compressed  ?
            $map_cont = @gzread($zd, 10485760);
            //  max. 10 MB (might be too large for some server)
            gzclose($zd);
            if ($map_cont && strpos($map_cont, "schemas/sitemap")) {
                $smap_found = '1';
            }
        }
        //echo "\r\n\r\n<br>map_cont Array:<br><pre>";print_r($map_cont);echo "</pre>\r\n";
        if ($smap_found) {
            if ($debug != '0') {
                //      create a log-file of current sitemap.xml
                file_put_contents($log_file, $map_cont);
            }
            //$del = $db_con->query("DELETE from ".$mysql_table_prefix."temp"); // function get_sitemap and store_links will build a new temp table
            if (stristr($map_cont, "<sitemapindex")) {
                //      if current sitemap file is an index file
                printStandardReport('validSitemapInd', $command_line, $no_log);
                $get_maps = simplexml_load_string($map_cont);
                if ($get_maps) {
                    reset($get_maps);
                    foreach ($get_maps as $map_x) {
                        $new_links[] = $map_x->loc;
                        //   get all links to further sitemap files
                    }
                    if (is_array($new_links)) {
                        //      if we found more sitemap files
                        $new_links = explode(",", implode(",", $new_links));
                        // destroy SimpleXMLElement Object and get the link array
                        $new_links = array_slice($new_links, 0, $max_links);
                        $indexed_map = '1';
                        $i = '0';
                        //echo "\r\n\r\n<br>new_links Array:<br><pre>";print_r($new_links);echo "</pre>\r\n";
                        foreach ($new_links as $input_file) {
                            $these_links = get_sitemap($input_file, $indexed_map, $mysql_table_prefix);
                            // now extract page links from this sitemap file
                            //echo "\r\n\r\n<br>these_links Array:<br><pre>";print_r($these_links);echo "</pre>\r\n";
                            if ($these_links) {
                                reset($these_links);
                                store_newLinks($these_links, $level, $sessid);
                                $smp = '1';
                                //     there were valid sitemap files and we stored the new links
                                $i++;
                            } else {
                                printStandardReport('invalidSecSitemap', $command_line, $no_log);
                                //  unable to extract links from secondary sitemap file
                            }
                        }
                        printValidSecSmap($i, $cl);
                        unset($input_file, $map_cont, $new_links);
                    } else {
                        printStandardReport('invalidSecSitemap', $command_line, $no_log);
                        //  unable to extract links from secondary sitemap file
                    }
                } else {
                    printStandardReport('invalidSitemapInd', $command_line, $no_log);
                    //  unable to extract links from sitemap INDEX  file
                }
            } else {
                $links = get_sitemap($map_cont, $indexed_map, $mysql_table_prefix);
                // extract links from sitemap.xml  (there was only one sitemap file)
                if ($links != '') {
                    reset($links);
                    //echo "\r\n\r\n<br>sitemmap links Array:<br><pre>";print_r($links);echo "</pre>\r\n";
                    store_newLinks($links, $level, $sessid);
                    $smp = '1';
                    //     there was one valid sitemap and we stored the new links
                    printStandardReport('validSitemap', $command_line, $no_log);
                } else {
                    printStandardReport('invalidSitemap', $command_line, $no_log);
                }
                unset($links);
            }
        }
    }
    if ($debug == '0') {
        if (function_exists("ini_set")) {
            ini_set("display_errors", "0");
        }
        error_reporting(0);
    } else {
        error_reporting(E_ALL & ~E_DEPRECATED & ~E_WARNING & ~E_NOTICE & ~E_STRICT);
    }
    if ($url_status['state'] == 'ok') {
        $OKtoIndex = 1;
        $file_read_error = 0;
        if (time() - $delay_time < $min_delay) {
            sleep($min_delay - (time() - $delay_time));
        }
        if ($url_status['file']) {
            $file = $url_status['file'];
        } else {
            $url_status['state'] = "Unable to read the content of the file.<br />{$url} does not deliver any content.";
            $realnum--;
        }
    }
    if ($url_status['state'] == 'ok') {
        //  first attempt to define a charset
        $chrSet = '';
        if ($use_prefcharset == '1') {
            //  use preferred charset as defined in Admin settings
            $chrSet = $home_charset;
            //echo "<h1>USING PREFERRED CHARSET</h1>";
        } else {
            if ($server_char && $url_status['charset']) {
                //echo "<h1>USING SERVER CHARSET</h1>";
                $chrSet = $url_status['charset'];
                //  use charset as supplied by the remote server
            } else {
                //  try to extract the charset of this file
                //echo "<h1>USING CONTENT CHARSET</h1>";
                //echo "<h1>" . substr($file, 0, 500) . "</h1>";
                if (preg_match("'encoding=[\\'\"](.*?)[\\'\"]'si", substr($file, 0, 3000), $regs)) {
                    //echo "<h1>1</h1>";
                    $chrSet = trim(strtoupper($regs[1]));
                    //      get encoding of current XML or XHTML file     and use it furtheron
                }
                if (!$chrSet) {
                    //echo "<h1>2</h1>";
                    if (preg_match("'charset=(.*?)[ \\/\\;\\'\"]'si", substr($file, 0, 3000), $regs)) {
                        //echo "<h1>3</h1>";
                        $chrSet = trim(strtoupper($regs[1]));
                        //      get charset of current HTML file     and use it furtheron
                    }
                }
                if (!$chrSet) {
                    //echo "<h1>4</h1>";
                    if (preg_match("'charset=[\\'\"](.*?)[\\'\"]'si", substr($file, 0, 3000), $regs)) {
                        //echo "<h1>5</h1>";
                        $chrSet = trim(strtoupper($regs[1]));
                        //      get charset of current HTML file     and use it furtheron
                    }
                }
                //  in assistance for all lazy webmasters
                $chrSet = preg_replace("/win-/si", "windows-", $chrSet);
                if ($chrSet == "1251") {
                    //echo "<h1>6</h1>";
                    $chrSet = "windows-1251";
                }
                if ($chrSet == '') {
                    //echo "<h1>7</h1>";
                    $chrSet = $home_charset;
                    //  no charset found, we need to use default charset like for DOCs, PDFs, etc
                }
            }
        }
        //echo "<h1>CHRSET: $chrSet</h1>";
        //  if required, uncompress ZIP archives and make content of each file => text
        if ($url_status['content'] == 'zip' && $index_zip == '1' && $file) {
            file_put_contents("" . $tmp_dir . "/archiv.temp", $file);
            $zip = zip_open("" . $tmp_dir . "/archiv.temp");
            if ($zip) {
                $url_status['content'] = "text";
                //  preventiv, if not another status will be detected for individual archiv files
                $file = '';
                //  starting with a blank file for all archive files
                $topic = 'zip';
                if ($debug == '2') {
                    printStandardReport('archivFiles', $command_line, $no_log);
                }
                while ($zip_entry = zip_read($zip)) {
                    if (zip_entry_open($zip, $zip_entry, "r")) {
                        $buf = zip_entry_read($zip_entry, zip_entry_filesize($zip_entry));
                        //uncompress the content of recent archiv file
                        $name = zip_entry_name($zip_entry);
                        //  get filename of recent archive file
                        if ($debug == '2') {
                            //
                            $report = "<strong>&nbsp;&nbsp;" . $name . "</strong>";
                            printThis($report, $cl);
                            $size = (int) (zip_entry_filesize($zip_entry) / 1024);
                            if ($size == 0) {
                                $size = '1';
                            }
                            $report = "&nbsp;&nbsp;&nbsp;-&nbsp;Unpacked size:&nbsp;" . $size . " kByte<br />";
                            printThis($report, $cl);
                        }
                        $buf = get_arch_content($buf, $name, $url, $chrSet);
                        //  if necessary, convert PDF, extract feed etc. for the recent file
                        zip_entry_close($zip_entry);
                        //  done for this file in archiv
                        $file .= "" . $buf . "<br /><br />";
                        //  add all uncompressed and converted files together
                    }
                }
                zip_close($zip);
            }
            unlink("" . $tmp_dir . "/archiv.temp");
        }
        //  if required, uncompress RAR archives and make content of each file => text
        if ($url_status['content'] == 'rar' && $index_rar == '1') {
            file_put_contents("" . $tmp_dir . "/archiv.temp", $file);
            $rar = rar_open("" . $tmp_dir . "/archiv.temp");
            if ($rar) {
                $url_status['content'] = "text";
                //  preventiv, all individual archiv files willl be converted to 'text'
                $file = '';
                //  starting with a blank file for all archive files
                $topic = 'rar';
                $entries = rar_list($rar);
                if ($rar) {
                    if ($debug == '2') {
                        printStandardReport('archivFiles', $command_line, $no_log);
                    }
                    foreach ($entries as $entry) {
                        $name = $entry->getName();
                        if ($debug == '2') {
                            $report = "<strong>&nbsp;&nbsp;" . $name . "</strong>";
                            printThis($report, $cl);
                            $size = (int) ($entry->getPackedSize() / 1024);
                            if ($size == 0) {
                                $size = '1';
                            }
                            $report = "&nbsp;&nbsp;&nbsp;-&nbsp;Packed size:&nbsp;&nbsp;" . $size . " kByte";
                            printThis($report, $cl);
                            $size = (int) ($entry->getUnpackedSize() / 1024);
                            if ($size == 0) {
                                $size = '1';
                            }
                            $report = "&nbsp;&nbsp;&nbsp;-&nbsp;Unpacked size:&nbsp;" . $size . " kByte<br />";
                            printThis($report, $cl);
                        }
                        $entry->extract('', "./" . $tmp_dir . "/" . $name . "");
                        //  extract single file of archiv into temporary folder
                        $buf = file_get_contents("./" . $tmp_dir . "/" . $name . "");
                        //  read content of this intermediate file
                        unlink("./" . $tmp_dir . "/" . $name . "");
                        //  destroy this file
                        if ($buf) {
                            $buf = get_arch_content($buf, $name, $url, $chrSet);
                            //  if necessary, convert PDF, extract feed etc. for the recent file
                            $file .= "" . $buf . "<br /><br />";
                            //  add all uncompressed and converted files together
                        }
                    }
                }
                rar_close($rar);
            }
            unlink("" . $tmp_dir . "/archiv.temp");
        }
        $file0 = $file;
        //  rememberr the original (e.g. for doc2txt converter)
        //  remove useless part of the content
        $file = purify_content($file);
        $valid_utf8 = '1';
        $raw_file = $file;
        //  kill eventually duplicate coding info in dynamic links
        if (stristr(substr($file, '0', '4000'), "encoding") && strstr(substr($file, '0', '4000'), "charset")) {
            $file = substr($file, strrpos($file, "<!DOCTYPE"));
            //  subsstring starting at last found <!DOCTYPE
        }
        //  we need to do it again for eventually new charset in archive
        $chrSet = '';
        if ($use_prefcharset == '1') {
            //  use preferred charset as defined in Admin settings
            $chrSet = $home_charset;
        } else {
            if ($server_char && $url_status['charset']) {
                $chrSet = $url_status['charset'];
                //  use charset as supplied by the remote server
            } else {
                //  try to extract the charset of this file
                if (preg_match("'encoding=[\\'\"](.*?)[\\'\"]'si", substr($file, 0, 3000), $regs)) {
                    $chrSet = trim(strtoupper($regs[1]));
                    //      get encoding of current XML or XHTML file     and use it furtheron
                }
                if (!$chrSet) {
                    if (preg_match("'charset=(.*?)[ \\/\\;\\'\"]'si", substr($file, 0, 3000), $regs)) {
                        $chrSet = trim(strtoupper($regs[1]));
                        //      get charset of current HTML file     and use it furtheron
                    }
                }
                if (!$chrSet) {
                    if (preg_match("'charset=[\\'\"](.*?)[\\'\"]'si", substr($file, 0, 3000), $regs)) {
                        $chrSet = trim(strtoupper($regs[1]));
                        //      get charset of current HTML file     and use it furtheron
                    }
                }
                //  in assistance for all lazy webmasters
                $chrSet = preg_replace("/win-/si", "windows-", $chrSet);
                if ($chrSet == "1251") {
                    $chrSet = "windows-1251";
                }
                if ($chrSet == '') {
                    $chrSet = $home_charset;
                    //  no charset found, we need to use default charset like for DOCs, PDFs, etc
                }
            }
        }
        if (strpos($chrSet, " ")) {
            // in the wild we have aloready seen a lot of variants
            $chrSet = substr($chrSet, 0, strpos($chrSet, " "));
        }
        //  some webmaster still use 'UNICODE' as name
        if (stristr($chrSet, "UNICODE")) {
            $chrSet = "UTF-8";
        }
        //  obsolete since 1990, but some (Italian) server still send it as charset . . . .
        if (stristr($chrSet, "8858")) {
            $chrSet = str_replace("8858", "8859", $chrSet);
        }
        //  required coaching for some webmasters
        if (stristr($chrSet, "cp-")) {
            $chrSet = str_ireplace("CP-", "CP", $chrSet);
        }
        $contents['charset'] = $chrSet;
        if ($index_framesets == '1') {
            if (preg_match("@<frameset[^>]*>(.*?)<\\/frameset>@si", $file, $regs)) {
                printStandardReport('newFrameset', $command_line, $no_log);
                //  separate the <frameset> ....</frameset> part of this file
                $frame = $regs[1];
                $replace = get_frames($frame, $url, $can_leave_domain);
                $replace = "<body>" . $replace . "</body>";
                //  create the body tags for $file
                $contents['charset'] = $chrSet;
                // rebuild charset
                //  include all replacements instead of the frameset tag into the actual file. This will become the body
                $file = preg_replace("@<frameset.*?</frameset>@si", "{$replace}", $file);
            }
        }
        if ($index_iframes == '1') {
            $links = array();
            $regs = array();
            $replace = '';
            $get_charset = '';
            $real_url = $url;
            if (preg_match_all("/(iframe[^>]*src[[:blank:]]*)=[[:blank:]]*[\\'\"]?(([[a-z]{3,5}:\\/\\/(([.a-zA-Z0-9-])+(:[0-9]+)*))*([+:%\\/?=&;\\\\(\\),._ a-zA-Z0-9-]*))(#[.a-zA-Z0-9-]*)?[\\'\" ]?/i", $file, $regs, PREG_SET_ORDER)) {
                printStandardReport('newIframe', $command_line, $no_log);
                //  find all frames of the iframe;
                $care_excl = '';
                //  don't care file suffixed to be excluded
                $relocated = '';
                //  URL is not relocated
                foreach ($regs as $val) {
                    if (($a = url_purify($val[2], $url, $can_leave_domain, $care_exel, $relocated, $local_redir)) != '') {
                        $links[] = $a;
                        // collect  all iframe links
                    }
                }
                if ($links) {
                    foreach ($links as $url) {
                        printNewLinks($url, $cl);
                        if (preg_match("/.html|.htm|.xhtml|.xml|.php/i", $url)) {
                            $frame = file_get_contents($url);
                            //      get content of this frame
                            //  separate the body part of this frame
                            preg_match("@<body[^>]*>(.*?)<\\/body>@si", $frame, $regs);
                            $body = $regs[1];
                            if ($abslinks == '1') {
                                $body = make_abslinks($body, $url);
                                //  if required, correct links relative to found iframe
                            }
                            $replace = "" . $replace . "<br />" . $body . "";
                        } else {
                            //  might be an image
                            $replace = "" . $replace . "<br /><img src=\"" . $url . "\">";
                        }
                    }
                }
                //  include all replacements instead of the iframe tag into the actual file
                $file = preg_replace("@<iframe.*?</iframe>@si", "{$replace}", $file);
                $contents['charset'] = $chrSet;
                // rebuild charset
            }
            $url = $real_url;
        }
        //      in order to index RDF, RSD, RSS and ATOM feeds enter here
        if ($url_status['content'] == 'xml' && $index_rss == '1') {
            if (!preg_match("/<rss|atom|<feed|<rdf|<rsd/si", substr($file, 0, 400))) {
                printStandardReport('notRSS', $command_line, $no_log);
                //  no valid feed detected
                $OKtoIndex = 0;
                $file_read_error = 1;
                $realnum--;
            } else {
                $html = '';
                $xml = XML_IsWellFormed($file);
                //      check for well-formed XML
                if ($xml != '1') {
                    if ($debug > 0) {
                        printNotWellFormedXML($xml, $cl);
                    }
                    $OKtoIndex = 0;
                    $file_read_error = 1;
                    $realnum--;
                } else {
                    $rss = new feedParser();
                    // define options for feed parser
                    $rss->limit = $max_links;
                    //   save time by limiting the items/entries to be processed
                    $rss->in_cp = strtoupper($contents['charset']);
                    //  charset of actual file
                    $rss->out_cp = 'UTF-8';
                    //  convert all into this charset
                    $rss->cache_dir = '';
                    //  currently unused
                    $rss->dc = $dc;
                    //  treat Dublin Core tags in RDF feeds
                    $rss->pro = $preferred;
                    //  obey the PREFERRED directive in RSD feeds
                    $rss->file = '1';
                    //  use $file as feed (as a string, not URL)
                    if ($cdata != 1) {
                        $rss->CDATA = 'content';
                        //  get it all  (naughty)
                    } else {
                        $rss->CDATA = 'nochange';
                        //  well educated crawler
                    }
                    //  get feed as array
                    if ($feed = $rss->get($url, $file)) {
                        //  if you want to see the feed during index procedure, uncomment the following row
                        //  echo "<br>FEED array:<br><pre>";print_r($feed);echo "</pre>";
                        $link = '';
                        $textinput_link = '';
                        $image_url = '';
                        $image_link = '';
                        $docs = '';
                        $subjects = '';
                        $count = '';
                        $type = $feed[type];
                        $count = $feed[sub_count];
                        $cached = $feed[cached];
                        //  kill all no longer required values
                        $feed[type] = '';
                        $feed[sub_count] = '';
                        $feed[encoding_in] = '';
                        $feed[encoding_out] = '';
                        $feed[items_count] = '';
                        $feed[cached] = '';
                        if (!$count) {
                            $count = '0';
                        }
                        if ($type == 'RSD') {
                            //      prepare all RSD APIs
                            for ($i = 0; $i < $count; $i++) {
                                $subjects .= '' . $feed['api'][$i]['name'] . '<br />
                                            ' . $feed['api'][$i]['apiLink'] . '<br />
                                            ' . $feed['api'][$i]['blogID'] . '<br />
                                            ' . $feed['api'][$i]['settings_docs'] . '<br />
                                            ' . $feed['api'][$i]['settings_notes'] . '<br />';
                            }
                        }
                        if ($type == 'Atom') {
                            //      prepare all Atom entries
                            for ($i = 0; $i < $count; $i++) {
                                $subjects .= '' . $feed['entries'][$i]['link'] . '<br />
                                            ' . $feed['entries'][$i]['title'] . '<br />
                                            ' . $feed['entries'][$i]['id'] . '<br />
                                            ' . $feed['entries'][$i]['published'] . '<br />
                                            ' . $feed['entries'][$i]['updated'] . '<br />
                                            ' . $feed['entries'][$i]['summary'] . '<br />
                                            ' . $feed['entries'][$i]['rights'] . '<br />
                                            ' . $feed['entries'][$i]['author_name'] . ' ' . $feed['entries'][$i]['author_email'] . ' ' . $feed['entries'][$i]['author_uri'] . '<br />
                                            ' . $feed['entries'][$i]['category_term'] . ' ' . $feed['entries'][$i]['category_label'] . ' ' . $feed['entries'][$i]['category_scheme'] . '<br />
                                            ' . $feed['entries'][$i]['contributor_name'] . ' ' . $feed['entries'][$i]['contributor_email'] . ' ' . $feed['entries'][$i]['contributor_uri'] . '<br />
                                        ';
                            }
                        }
                        if ($type == 'RDF' | $type == 'RSS v.0.91/0.92' | $type == 'RSS v.2.0') {
                            //  For RDF and RSS feeds enter here
                            //  prepare channel image
                            $image_url = $feed[image_url];
                            if ($image_url) {
                                $width = $feed[image_width];
                                if (!$width || $width > '144') {
                                    $width = '88';
                                    //set to default value
                                }
                                $height = $feed[image_height];
                                if (!$height || $height > '400') {
                                    $height = '31';
                                    //set to default value
                                }
                                $feed[image_url] = "<img id=\"rss_007\" src=\"" . $image_url . "\" alt=\"" . $feed[image_title] . "\" width=\"" . $width . "\" height=\"" . $height . "\">";
                            }
                            $image_link = $feed[image_link];
                            if ($image_link) {
                                $feed[image_link] = "<a href=\"" . $image_link . "\">" . $image_link . "</a>";
                            }
                            //      prepare all RDF or RSS items
                            for ($i = 0; $i < $count; $i++) {
                                $subjects .= '' . $feed['items'][$i]['link'] . '<br />
                                            ' . $feed['items'][$i]['title'] . '<br />
                                            ' . $feed['items'][$i]['description'] . '<br />
                                            ' . $feed['items'][$i]['author'] . '<br />
                                            ' . $feed['items'][$i]['category'] . '<br />
                                            ' . $feed['items'][$i]['guid'] . '<br />
                                            ' . $feed['items'][$i]['comments'] . '<br />
                                            ' . $feed['items'][$i]['pubDate'] . '<br />
                                            ' . $feed['items'][$i]['source'] . '<br />
                                            ' . $feed['items'][$i]['enclosure'] . '<br />
                                            ' . $feed['items'][$i]['country'] . '<br />
                                            ' . $feed['items'][$i]['coverage'] . '<br />
                                            ' . $feed['items'][$i]['contributor'] . '<br />
                                            ' . $feed['items'][$i]['date'] . '<br />
                                            ' . $feed['items'][$i]['industry'] . '<br />
                                            ' . $feed['items'][$i]['language'] . '<br />
                                            ' . $feed['items'][$i]['publisher'] . '<br />
                                            ' . $feed['items'][$i]['state'] . '<br />
                                            ' . $feed['items'][$i]['subject'] . '<br />
                                        ';
                            }
                        }
                        //  convert  the channel/feed part  into a string
                        $feed_common = implode(" ", $feed);
                        //  build something that could be indexed
                        $html .= "<html>\r\n<head>\r\n<title>" . $feed['title'] . "</title>\r\n<meta name=\"description\" content=\"" . $feed['description'] . " \">\r\n</head>\r\n";
                        $html .= "<body>\r\n" . $feed_common . "\r\n" . $subjects . "\r\n</body>\r\n</html>\r\n";
                    }
                    if (strlen($html) < "130") {
                        //  can't be a valid feed
                        if ($type == "unknown") {
                            printInvalidFeedType($type, $cl);
                        } else {
                            printStandardReport('invalidRSS', $command_line, $no_log);
                        }
                        $OKtoIndex = 0;
                        $file_read_error = 1;
                        $realnum--;
                    } else {
                        $contents['charset'] = 'UTF-8';
                        //      the feed reader converts all to utf-8
                        $file = $html;
                        //     use feed reader output
                        if ($debug > 0) {
                            printValidFeed($type, $count, $cl);
                        }
                    }
                }
            }
        }
        //  duplicate here, but frames, iframes, or RSS might have added nonsense content
        $file = purify_content($file);
        //  prepare CVS files
        if ($url_status['content'] == 'csv' && $index_csv == '1') {
            $file = str_replace(",", " ", $file);
            $file = str_replace(";", " ", $file);
        }
        //echo "\r\n\r\n<br>url_status Array:<br><pre>";print_r($url_status);echo "</pre>\r\n";
        // for DOCs, PDFs, etc we need special text converter
        if ($url_status['content'] != 'text' && $url_status['content'] != 'xml' && $url_status['content'] != 'xhtml' && $url_status['content'] != 'csv') {
            $document = 1;
            $file = extract_text($file, $file0, $url_status['content'], $url, $chrSet);
            //  because the converter already transferred the documents to UTF-8, we need to adjust it here
            $contents['charset'] = 'UTF-8';
            $charSet = 'UTF-8';
            if ($file == 'ERROR') {
                //      if error, suppress further indexing
                $OKtoIndex = 0;
                $file_read_error = 1;
                $realnum--;
            }
            //  reduce Pashtu and Urdu to the main Farsi letters
            if (strtolower($charSet) == 'windows-1256' && $url_status['content'] == 'pdf') {
                $f_letter0 = array("ﺎ", "�");
                $f_letter1 = array("�", "�", "ﺑ", "ﺒ");
                $f_letter2 = array("ï­–", "ï­—", "ï­˜", "ï­™");
                $f_letter3 = array("ﺕ", "ﺖ", "ﺗ", "ﺘ");
                $f_letter4 = array("ﺙ", "ﺚ", "ﺛ", "ﺜ");
                $f_letter5 = array("�", "ﺞ", "ﺟ", "ﺠ");
                $f_letter6 = array("ï­º", "ï­»", "ï­¼", "ï­½");
                $f_letter7 = array("ﺡ", "ﺢ", "ﺣ", "ﺤ");
                $f_letter8 = array("ﮋ", "ﮊ");
                $f_letter9 = array("ﺥ", "ﺦ", "ﺧ", "ﺨ");
                $f_letter10 = array("ﺩ", "ﺪ");
                $f_letter11 = array("ﺫ", "ﺬ");
                $f_letter12 = array("ﺭ", "ﺮ");
                $f_letter13 = array("ﺯ", "ﺰ");
                $f_letter14 = array("ﺱ", "ﺲ", "ﺳ", "ﺴ");
                $f_letter15 = array("ﺵ", "ﺶ", "ﺷ", "ﺸ");
                $f_letter16 = array("ﺹ", "ﺺ", "ﺻ", "ﺼ");
                $f_letter17 = array("ﺽ", "ﺾ", "ﺿ", "ﻀ");
                $f_letter18 = array("�", "ﻂ", "ﻃ", "ﻄ");
                $f_letter19 = array("ﻅ", "ﻆ", "ﻇ", "ﻈ");
                $f_letter20 = array("ﻉ", "ﻊ", "ﻋ", "ﻌ");
                $f_letter21 = array("�", "ﻎ", "�", "�");
                $f_letter22 = array("ﻑ", "ﻒ", "ﻓ", "ﻔ");
                $f_letter23 = array("ﻕ", "ﻖ", "ﻗ", "ﻘ");
                $f_letter24 = array("ﻙ", "ﻚ", "ﻛ", "ﻜ", "ﮎ", "�", "�", "ﮑ");
                $f_letter25 = array("ﮒ", "ﮓ", "ﮔ", "ﮕ");
                $f_letter26 = array("�", "ﻞ", "ﻟ", "ﻠ");
                $f_letter27 = array("ﻡ", "ﻢ", "ﻣ", "ﻤ");
                $f_letter28 = array("ﻧ", "ﻨ", "ﻦ", "ﻥ");
                $f_letter29 = array("ï»­", "ï»®");
                $f_letter30 = array("ﻩ", "ﻪ", "ﻫ", "ﻬ");
                $f_letter31 = array("ﻯ", "ﻰ", "ﻱ", "ﻲ", "ﻳ", "ﻴ");
                $file = str_replace($f_letter0, "ا", $file);
                $file = str_replace($f_letter1, "ب", $file);
                $file = str_replace($f_letter2, "Ù¾", $file);
                $file = str_replace($f_letter3, "ت", $file);
                $file = str_replace($f_letter4, "Ø«", $file);
                $file = str_replace($f_letter5, "ج", $file);
                $file = str_replace($f_letter6, "Ú†", $file);
                $file = str_replace($f_letter7, "Ø­", $file);
                $file = str_replace($f_letter8, "Ú˜", $file);
                $file = str_replace($f_letter9, "Ø®", $file);
                $file = str_replace($f_letter10, "د", $file);
                $file = str_replace($f_letter11, "Ø°", $file);
                $file = str_replace($f_letter12, "ر", $file);
                $file = str_replace($f_letter13, "ز", $file);
                $file = str_replace($f_letter14, "س", $file);
                $file = str_replace($f_letter15, "Ø´", $file);
                $file = str_replace($f_letter16, "ص", $file);
                $file = str_replace($f_letter17, "ض", $file);
                $file = str_replace($f_letter18, "Ø·", $file);
                $file = str_replace($f_letter19, "ظ", $file);
                $file = str_replace($f_letter20, "ع", $file);
                $file = str_replace($f_letter21, "غ", $file);
                $file = str_replace($f_letter22, "Ù�", $file);
                $file = str_replace($f_letter23, "Ù‚", $file);
                $file = str_replace($f_letter24, "Ú©", $file);
                $file = str_replace($f_letter25, "Ú¯", $file);
                $file = str_replace($f_letter26, "Ù„", $file);
                $file = str_replace($f_letter27, "Ù…", $file);
                $file = str_replace($f_letter28, "Ù†", $file);
                $file = str_replace($f_letter29, "Ùˆ", $file);
                $file = str_replace($f_letter30, "Ù‡", $file);
                $file = str_replace($f_letter31, "ÙŠ", $file);
            }
        }
        if ($OKtoIndex == 1) {
            $pageSize = number_format(strlen($file) / 1024, 2, ".", "");
            printPageSizeReport($pageSize, $topic);
        }
        $charSet = strtoupper(trim($contents['charset']));
        //      final charset for UTF-8 converter
        if (stristr($charSet, "encoding") || strlen($charSet) < '3') {
            //  must be invalid encountered charset
            $charSet = 'UTF-8';
        }
        //echo "\r\n\r\n<br /> final charSet: '$charSet'<br />\r\n";
        if ($charSet == "UTF-16") {
            $charSet = "UTF-8";
            //  content will be converted in function clean_file()
        }
        $dic = '';
        //  if Chinese or Korean text should be segmented enter here
        if ($cn_seg == '1' && $file && !$js_link && !stristr($charSet, "8859")) {
            if ($charSet == 'GB2312' || $charSet == 'GB18030' || $charSet == 'GBK') {
                $dic = "" . $dict_dir . "/cn_gb18030.dic";
                //  simplified Chinese
            }
            if ($charSet == 'BIG5') {
                $dic = "" . $dict_dir . "/cn_big5.dic";
                //  traditional Chinese
            }
            if ($charSet == 'ISO10646-1933') {
                $dic = "" . $dict_dir . "/kr_iso10646-1933.dic";
                // Korean
            }
            if ($charSet == 'EUC-KR') {
                $dic = "" . $dict_dir . "/kr_euc-kr.dic";
                //  Korean
            }
            if ($charSet == 'UTF-8') {
                $dic = "" . $dict_dir . "/cn_utf-8.dic";
                //  Unicode
            }
            if ($dic) {
                //  if dictionary is available for page charset, perform a segmentation
                $Segmentation = new Segmentation();
                $Segmentation->load($dic);
                $Segmentation->setLowercase(FALSE);
                $cn_result = $Segmentation->segmentString($file);
                if ($cn_result && $charSet != 'UTF-8') {
                    $iconv_file = @iconv($charSet, "UTF-8//IGNORE", $cn_result);
                    if (trim($iconv_file) == "") {
                        // iconv is not installed or input charSet not available. We need to use class ConvertCharset
                        $NewEncoding = new ConvertCharset($charSet, "utf-8");
                        $NewFileOutput = $NewEncoding->Convert($cn_result);
                        $cn_result = $NewFileOutput;
                    } else {
                        $cn_result = $iconv_file;
                    }
                    unset($iconv_file, $NewEncoding, $NewFileOutput);
                }
                $seg_data = clean_file($cn_result, $url, $url_status['content'], $charSet, $use_nofollow, $use_robot, $can_leave_domain);
            } else {
                printNoDictionary($charSet, $cl);
                //  no dictionary found for this charset
            }
        }
        //  if Japanese text should be segmented enter here. But not if a Chinese dictonary was already found
        if ($jp_seg == '1' && $file && !$js_link && !stristr($charSet, "ISO") && !$dic) {
            $dic = '';
            if ($charSet == 'UTF-8' || $charSet == 'EUC-JP') {
                $file = @iconv($charSet, "SHIFT_JIS//IGNORE", $file);
                $charSet = "SHIFT_JIS";
            }
            if ($charSet == 'SHIFT_JIS') {
                $dic = "" . $dict_dir . "/jp_shiftJIS.dic";
            }
            if ($dic) {
                //  if dictionary is available for page charset, perform a segmentation
                $Segmentation = new Segmentation();
                $Segmentation->load($dic);
                $Segmentation->setLowercase(FALSE);
                $jp_result = $Segmentation->segmentString($file);
                //echo "\r\n\r\n<br /> jp_result: $jp_result<br />\r\n";
                if ($jp_result && $charSet != 'UTF-8') {
                    $iconv_file = @iconv($charSet, "UTF-8//IGNORE", $jp_result);
                    if (trim($iconv_file) == "") {
                        // iconv is not installed or input charSet not available. We need to use class ConvertCharset
                        $NewEncoding = new ConvertCharset($charSet, "utf-8");
                        $NewFileOutput = $NewEncoding->Convert($jp_result);
                        $jp_result = $NewFileOutput;
                    } else {
                        $jp_result = $iconv_file;
                    }
                    unset($iconv_file, $NewEncoding, $NewFileOutput);
                }
                $seg_data = clean_file($jp_result, $url, $url_status['content'], $charSet, $use_nofollow, $use_robot, $can_leave_domain);
            } else {
                printNoDictionary($charSet, $cl);
                //  no dictionary found for this charset
            }
        }
        //  enter here only, if site / file is not yet UTF-8 coded or had already been converted to UTF-8
        if ($charSet != "UTF-8" && $file) {
            $file = convertToUTF8($file, $charSet, $char_Set, $converter_dir);
        }
        //  if activated in Admin backend, check for correct converting of $file into UTF-8
        if ($utf8_verify) {
            $valid_utf8 = @iconv('UTF-8', 'UTF-8', $file) === $file;
        }
        if (!$valid_utf8) {
            $url_status['state'] = "<br />Invalid charset definition placed in meta tags of HTML header. Unable to convert the text into UTF-8<br />Indexing aborted for {$url}";
            if ($server_char) {
                $url_status['state'] = "<br />Invalid charset definition supplied via HTTP by the client server. Unable to convert the text into UTF-8<br />Indexing aborted for {$url}";
            }
            if ($use_prefcharset) {
                $url_status['state'] = "<br />Invalid charset definition placed Admin Settings.<br />Site was created with another charset<br />Indexing aborted for {$url}";
            }
            printUrlStatus($url_status['state'], $command_line, $no_log);
            $file = '';
            $deletable = 1;
        } else {
            if ($index_media == '1') {
                $newmd5sum = md5($file);
                //  get md5 including links and title of media files
            }
            $data = clean_file($file, $url, $url_status['content'], $charSet, $use_nofollow, $use_robot, $can_leave_domain);
            //echo "\r\n\r\n<br>data Array:<br><pre>";print_r($data);echo "</pre>\r\n";
            //  index only links and their titles
            if ($only_links) {
                $media_links = '0';
                $my_links = get_link_details($file, $url, $can_leave_domain, $data['base'], $media_links, $use_nofollow, $local_redir);
                $data['content'] = $my_links[0][0];
                //  define new content
                $data['fulltext'] = $my_links[0][0];
                //  define new content also for 'full text';
            }
            //  combine raw words plus segmented  words
            if ($cn_seg == 1 || $jp_seg == 1 && $dic && !$js_link) {
                if ($debug != '0') {
                    $seg_add = $seg_data[count] - $data[count];
                    //      calculate segmentation result
                    if ($seg_add > '0') {
                        if ($charSet == 'EUC-KR' || $charSet == 'ISO10646-1933') {
                            printSegKR($seg_add, $cl);
                        }
                        if ($charSet == 'SHIFT_JIS') {
                            printSegJA($seg_add, $cl);
                        } else {
                            printSegCN($seg_add, $cl);
                        }
                    }
                    /*
                    echo "<br /><pre>Results of word segmentation:</pre>";
                    echo "<br />Unsegmented title :<br><pre>";print_r($data[title]);echo "</pre>";
                    echo "<br />Segmented title :<br><pre>";print_r($seg_data[title]);echo "</pre>";
                    echo "<br />Unsegmented full text:<br />$data[fulltext]<br />";
                    echo "<br />Segmented full text:<br />$seg_data[fulltext]";
                    */
                }
                $data[content] = "" . $data[content] . "" . $seg_data[content] . "";
                //$data[title]        ="".$data[title]."".$seg_data[title]."";
                $data[description] = "" . $data[description] . "" . $seg_data[description] . "";
                $data[keywords] = "" . $data[keywords] . "" . $seg_data[keywords] . "";
            }
            //      check if canonical redirection was found in page head
            $cano_link = '0';
            if ($data['cano_link']) {
                //echo "\r\n\r\n<br /> url: '$url'<br />\r\n";
                $cano_link = $db_con->real_escape_string($data['cano_link']);
                //echo "\r\n\r\n<br /> cano_link: '$cano_link'<br />\r\n";
                if ($url != $cano_link) {
                    //  only new cano links are accepted
                    $OKtoIndex = 0;
                    $deletable = 1;
                    $realnum--;
                    if ($cano_link == "1") {
                        printNoCanonical($cano_link, $cl);
                        //  if unable to extract redirection link
                    } else {
                        if ($data['refresh'] == '1') {
                            printRefreshed($cano_link, $data['wait'], $cl);
                            //  if refresh meta tag was found in HTML head
                        } else {
                            printCanonical($cano_link, $cl);
                            //  if canonical link was found in HTML head
                        }
                        //      do we already know this link in link-table
                        $sql_query = "SELECT /* jfield 2 */ url from " . $mysql_table_prefix . "links where url like '{$cano_link}'";
                        $res = $db_con->query($sql_query);
                        if ($debug && $db_con->errno) {
                            $err_row = __LINE__ - 2;
                            printf("<p><span class='red'>&nbsp;MySQL failure: %s&nbsp;\n<br /></span></p>", $db_con->error);
                            if (__FUNCTION__) {
                                printf("<p><span class='red'>&nbsp;Found in script: " . __FILE__ . "&nbsp;&nbsp;row: {$err_row}&nbsp;&nbsp;in function():&nbsp;" . __FUNCTION__ . "&nbsp;<br /></span></p>");
                            } else {
                                printf("<p><span class='red'>&nbsp;Found in script: " . __FILE__ . "&nbsp;&nbsp;row: {$err_row}&nbsp;<br /></span></p>");
                            }
                            printf("<p><span class='red'>&nbsp;Script execution aborted.&nbsp;<br /></span>");
                            printf("<p><strong>Invalid query string, which caused the SQL error:</strong></p>");
                            echo "<p> {$sql_query} </p>";
                            exit;
                        }
                        $rows = $res->num_rows;
                        if ($rows == 0) {
                            // if not known in link-table, check if already known in temp-table
                            $sql_query = "SELECT /* jfield 1 */ link from " . $mysql_table_prefix . "temp where link like '{$cano_link}'";
                            $res = $db_con->query($sql_query);
                            if ($debug && $db_con->errno) {
                                $err_row = __LINE__ - 2;
                                printf("<p><span class='red'>&nbsp;MySQL failure: %s&nbsp;\n<br /></span></p>", $db_con->error);
                                if (__FUNCTION__) {
                                    printf("<p><span class='red'>&nbsp;Found in script: " . __FILE__ . "&nbsp;&nbsp;row: {$err_row}&nbsp;&nbsp;in function():&nbsp;" . __FUNCTION__ . "&nbsp;<br /></span></p>");
                                } else {
                                    printf("<p><span class='red'>&nbsp;Found in script: " . __FILE__ . "&nbsp;&nbsp;row: {$err_row}&nbsp;<br /></span></p>");
                                }
                                printf("<p><span class='red'>&nbsp;Script execution aborted.&nbsp;<br /></span>");
                                printf("<p><strong>Invalid query string, which caused the SQL error:</strong></p>");
                                echo "<p> {$sql_query} </p>";
                                exit;
                            }
                            $rows = $res->num_rows;
                            if ($rows == 0) {
                                // not known in link-table, add new link
                                if ($numoflinks <= $max_links) {
                                    $sql_query = "INSERT into " . $mysql_table_prefix . "temp (link, level, id) values ('{$cano_link}', '{$level}', '{$sessid}')";
                                    $db_con->query($sql_query);
                                }
                                if ($debug && $db_con->errno) {
                                    $err_row = __LINE__ - 2;
                                    printf("<p><span class='red'>&nbsp;MySQL failure: %s&nbsp;\n<br /></span></p>", $db_con->error);
                                    if (__FUNCTION__) {
                                        printf("<p><span class='red'>&nbsp;Found in script: " . __FILE__ . "&nbsp;&nbsp;row: {$err_row}&nbsp;&nbsp;in function():&nbsp;" . __FUNCTION__ . "&nbsp;<br /></span></p>");
                                    } else {
                                        printf("<p><span class='red'>&nbsp;Found in script: " . __FILE__ . "&nbsp;&nbsp;row: {$err_row}&nbsp;<br /></span></p>");
                                    }
                                    printf("<p><span class='red'>&nbsp;Script execution aborted.&nbsp;<br /></span>");
                                    printf("<p><strong>Invalid query string, which caused the SQL error:</strong></p>");
                                    echo "<p> {$sql_query} </p>";
                                    exit;
                                }
                            }
                        }
                    }
                }
                $cano_link = '0';
                //  reset the cano flag
            } else {
                if ($index_media == '0') {
                    $newmd5sum = md5($data['content']);
                    // get md5 from cleaned full text only
                }
                if ($md5sum == $newmd5sum) {
                    printStandardReport('md5notChanged', $command_line, $no_log);
                    $OKtoIndex = 0;
                    $realnum--;
                } else {
                    mysqltest();
                    //     check for duplicate page content
                    $sql_query = "SELECT * from " . $mysql_table_prefix . "links where md5sum='{$newmd5sum}'";
                    $result = $db_con->query($sql_query);
                    if ($debug && $db_con->errno) {
                        $err_row = __LINE__ - 2;
                        printf("<p><span class='red'>&nbsp;MySQL failure: %s&nbsp;\n<br /></span></p>", $db_con->error);
                        if (__FUNCTION__) {
                            printf("<p><span class='red'>&nbsp;Found in script: " . __FILE__ . "&nbsp;&nbsp;row: {$err_row}&nbsp;&nbsp;in function():&nbsp;" . __FUNCTION__ . "&nbsp;<br /></span></p>");
                        } else {
                            printf("<p><span class='red'>&nbsp;Found in script: " . __FILE__ . "&nbsp;&nbsp;row: {$err_row}&nbsp;<br /></span></p>");
                        }
                        printf("<p><span class='red'>&nbsp;Script execution aborted.&nbsp;<br /></span>");
                        printf("<p><strong>Invalid query string, which caused the SQL error:</strong></p>");
                        echo "<p> {$sql_query} </p>";
                        exit;
                    }
                    if ($num_rows = $result->num_rows) {
                        //  display warning message and urls with duplicate content
                        printStandardReport('duplicate', $command_line, $no_log);
                        while ($row = $result->fetch_array(MYSQLI_ASSOC)) {
                            $dups[] = $row['link_id'];
                        }
                        for ($i = 0; $i < $num_rows; $i++) {
                            $link_id = $dups[$i];
                            //$num = $i+1;
                            $sql_query = "SELECT * from " . $mysql_table_prefix . "links where link_id like '{$link_id}'";
                            $res1 = $db_con->query($sql_query);
                            if ($debug && $db_con->errno) {
                                $err_row = __LINE__ - 2;
                                printf("<p><span class='red'>&nbsp;MySQL failure: %s&nbsp;\n<br /></span></p>", $db_con->error);
                                if (__FUNCTION__) {
                                    printf("<p><span class='red'>&nbsp;Found in script: " . __FILE__ . "&nbsp;&nbsp;row: {$err_row}&nbsp;&nbsp;in function():&nbsp;" . __FUNCTION__ . "&nbsp;<br /></span></p>");
                                } else {
                                    printf("<p><span class='red'>&nbsp;Found in script: " . __FILE__ . "&nbsp;&nbsp;row: {$err_row}&nbsp;<br /></span></p>");
                                }
                                printf("<p><span class='red'>&nbsp;Script execution aborted.&nbsp;<br /></span>");
                                printf("<p><strong>Invalid query string, which caused the SQL error:</strong></p>");
                                echo "<p> {$sql_query} </p>";
                                exit;
                            }
                            $row = $res1->fetch_array(MYSQLI_NUM);
                            $dup_url = urldecode($row[2]);
                            $dup_url = $dup_url;
                            $dup_url = @iconv($charSet, "UTF-8//IGNORE", $dup_url);
                            if ($idna) {
                                // Initialize the converter class
                                $IDN = new idna_convert(array('idn_version' => 2008));
                                if ($conv_puny && strstr($dup_url, "xn--") && $idna) {
                                    $dup_url = $IDN->decode($dup_url);
                                }
                            }
                            if ($clear == 1) {
                                clean_resource($res, '03');
                            }
                            printDupReport($dup_url, $command_line);
                        }
                        if ($dup_content == '0') {
                            //  enter here, if pages with duplicate content should not be indexed/re-indexed
                            $OKtoIndex = 0;
                            $realnum--;
                        } else {
                            $OKtoIndex = 1;
                        }
                    }
                }
            }
            //echo "\r\n\r\n<br>data array1:<br><pre>";print_r($data);echo "</pre>\r\n";
            if (($md5sum != $newmd5sum || $reindex == 1) && $OKtoIndex == 1) {
                $urlparts = parse_addr($url);
                $newdomain = $urlparts['host'];
                $type = 0;
                if ($data['noindex'] == 1) {
                    //  remember this URlL, so it might not become another time a new link
                    //  check without scheme and www.
                    $check_link = substr($check_link, stripos($url, "//") + 2);
                    if (stristr($check_link, "www.")) {
                        $check_link = substr($check_link, stripos($check_link, "www") + 4);
                    }
                    $sql_query = "SELECT url from " . $mysql_table_prefix . "links where url like '%{$check_link}'";
                    $res = $db_con->query($sql_query);
                    if ($debug && $db_con->errno) {
                        $err_row = __LINE__ - 2;
                        printf("<p><span class='red'>&nbsp;MySQL failure: %s&nbsp;\n<br /></span></p>", $db_con->error);
                        if (__FUNCTION__) {
                            printf("<p><span class='red'>&nbsp;Found in script: " . __FILE__ . "&nbsp;&nbsp;row: {$err_row}&nbsp;&nbsp;in function():&nbsp;" . __FUNCTION__ . "&nbsp;<br /></span></p>");
                        } else {
                            printf("<p><span class='red'>&nbsp;Found in script: " . __FILE__ . "&nbsp;&nbsp;row: {$err_row}&nbsp;<br /></span></p>");
                        }
                        printf("<p><span class='red'>&nbsp;Script execution aborted.&nbsp;<br /></span>");
                        printf("<p><strong>Invalid query string, which caused the SQL error:</strong></p>");
                        echo "<p> {$sql_query} </p>";
                        exit;
                    }
                    $known_link = $res->num_rows;
                    if ($known_link != '1') {
                        $sql_query = "INSERT into " . $mysql_table_prefix . "links (site_id, url, indexdate, size, md5sum, level) values ('{$site_id}', '{$url}', curdate(), '{$pageSize}', '{$newmd5sum}', '{$thislevel}')";
                        $db_con->query($sql_query);
                        if ($debug && $db_con->errno) {
                            $err_row = __LINE__ - 2;
                            printf("<p><span class='red'>&nbsp;MySQL failure: %s&nbsp;\n<br /></span></p>", $db_con->error);
                            if (__FUNCTION__) {
                                printf("<p><span class='red'>&nbsp;Found in script: " . __FILE__ . "&nbsp;&nbsp;row: {$err_row}&nbsp;&nbsp;in function():&nbsp;" . __FUNCTION__ . "&nbsp;<br /></span></p>");
                            } else {
                                printf("<p><span class='red'>&nbsp;Found in script: " . __FILE__ . "&nbsp;&nbsp;row: {$err_row}&nbsp;<br /></span></p>");
                            }
                            printf("<p><span class='red'>&nbsp;Script execution aborted.&nbsp;<br /></span>");
                            printf("<p><strong>Invalid query string, which caused the SQL error:</strong></p>");
                            echo "<p> {$sql_query} </p>";
                            exit;
                        }
                    }
                    $OKtoIndex = 0;
                    $deletable = 1;
                    $realnum--;
                    printStandardReport('metaNoindex', $command_line, $no_log);
                }
                if (!$js_link) {
                    //  JavaScript will not deliver keywords, only links are parsed
                    $content = explode(" ", addslashes($data['content']));
                    //echo "\r\n\r\n<br>content array0:<br><pre>";print_r($content);echo "</pre>\r\n";
                    $acc_words[] = array();
                    $type = '';
                    //  if Greek accents should be removed from Greek vowels
                    if ($noacc_el) {
                        foreach ($content as &$thisword) {
                            $no_acc = remove_acc_el($thisword);
                            if ($no_acc != $thisword) {
                                $acc_words[] = $no_acc;
                            }
                        }
                    }
                    //  if the other (Latin)  accents should be removed from their vowels
                    if ($vowels) {
                        foreach ($content as $thisword) {
                            $no_acc = remove_acc($thisword, '');
                            if ($no_acc != $thisword) {
                                $acc_words[] = $no_acc;
                            }
                        }
                    }
                    //  now add the words without accents to the total text content
                    $content = array_merge($content, $acc_words);
                    //echo "\r\n\r\n<br>content array0:<br><pre>";print_r($content);echo "</pre>\r\n";
                    //  if ligatures should be equalized
                    if ($liga) {
                        $liga_words = array();
                        //  will contain converted ligatures
                        $phon_words = array();
                        //  will contain converted phonetics
                        //  first: convert letters into latin ligatures
                        foreach ($content as $thisword) {
                            if ($thisword) {
                                $liga_words[] = html_entity_decode($thisword, ENT_QUOTES, "UTF-8");
                                $thisword1 = $thisword;
                                reset($latin_ligatures);
                                while ($char = each($latin_ligatures)) {
                                    $thisword2 = preg_replace("/" . $char[0] . "/s", $char[1], $thisword1);
                                    //  convert ligatures
                                    if ($thisword1 != $thisword2) {
                                        //  break on first ligature
                                        $liga_words[] = html_entity_decode($thisword2, ENT_QUOTES, "UTF-8");
                                        //  collect new words with ligatures
                                        $thisword1 = $thisword2;
                                        //  continue with the word, containing the ligatures
                                        //break;
                                    }
                                }
                            }
                        }
                        // second: convert all letters into phonetic transcriptions
                        reset($liga_words);
                        foreach ($liga_words as $thisword) {
                            $thisword1 = $thisword;
                            reset($phon_trans);
                            while ($char = each($phon_trans)) {
                                $thisword2 = preg_replace("/" . $char[0] . "/s", $char[1], $thisword1);
                                //  convert into phonetics
                                if ($thisword1 != $thisword2) {
                                    //  break on first ligature
                                    $phon_words[] = html_entity_decode($thisword2, ENT_QUOTES, "UTF-8");
                                    //  collect new words with phonetics
                                    $thisword1 = $thisword2;
                                    //  continue with the word, containing the ligatures
                                    //break;
                                }
                            }
                        }
                        $liga_words = array_merge($liga_words, $phon_words);
                        //  add all phoneticss to the liga array
                        //  now vice versa: convert latin ligatures and phonetic transcriptions into standard letters
                        reset($content);
                        $not_liga_words = array();
                        foreach ($content as $thisword) {
                            if ($thisword) {
                                //  first: convert latin ligatures into standard letters
                                $thisword1 = superentities($thisword, ENT_QUOTES, "UTF-8");
                                reset($latin_ligatures);
                                while ($char = each($latin_ligatures)) {
                                    $thisword2 = preg_replace("/" . $char[1] . "/s", $char[0], $thisword1);
                                    //  re-convert ligatures
                                    if ($thisword1 != $thisword2) {
                                        $not_liga_words[] = html_entity_decode($thisword2, ENT_QUOTES, "UTF-8");
                                        //  collect new words without ligatures
                                        $thisword1 = $thisword2;
                                        //  continue with the word, containing the ligature
                                    }
                                }
                            }
                            //echo "\r\n\r\n<br>not_liga_words Array:<br><pre>";print_r($not_liga_words);echo "</pre>\r\n";
                            // second: convert phonetic transcriptions into standard letters
                            reset($not_liga_words);
                            $not_phon_words = array();
                            foreach ($not_liga_words as $thisword) {
                                $thisword1 = superentities($thisword, ENT_QUOTES, "UTF-8");
                                reset($phon_trans);
                                while ($char = each($phon_trans)) {
                                    $thisword2 = preg_replace("/" . $char[1] . "/s", $char[0], $thisword1);
                                    //  re-convert sphonetic
                                    if ($thisword1 != $thisword2) {
                                        $not_phon_words[] = html_entity_decode($thisword2, ENT_QUOTES, "UTF-8");
                                        //  collect new words without phonetics
                                        $thisword1 = $thisword2;
                                        //  continue with the word, containing the phonetic trans.
                                    }
                                }
                            }
                        }
                        $not_words = array_merge($not_liga_words, $not_phon_words);
                        //  add all together
                        $content = array_merge($liga_words, $not_words);
                        //  add all ligatures and re-converted letters to the content array
                    }
                    $wordarray = unique_array($content);
                }
                //echo "\r\n\r\n<br>wordarray0:<br><pre>";print_r($wordarray);echo "</pre>\r\n";
                if ($smp != 1) {
                    if ($data['nofollow'] != 1 && $cano_link == '0') {
                        $media_links = '0';
                        $links = array();
                        if (!$document) {
                            //  don't try to find links in PDFs and other pure documents
                            $links = get_links($file, $url, $can_leave_domain, $data['base'], $media_links, $use_nofollow, $local_redir, $url_reloc, $charSet);
                        }
                        if ($links[0]) {
                            $links = distinct_array($links);
                            $all_links = count($links);
                            if ($all_links > $max_links) {
                                $all_links = $max_links;
                            }
                            $links = array_slice($links, 0, $max_links);
                            if ($realnum < $max_links) {
                                $numoflinks = 0;
                                //if there are any new links, add to the temp table, but only if there isn't such url already
                                if ($links[0]) {
                                    reset($links);
                                    $tmp_urls = get_temp_urls($sessid);
                                    //  reload previous temp
                                    // echo "\r\n\r\n<br>tmp_urls array:<br><pre>";print_r($tmp_urls);echo "</pre>\r\n";
                                    if ($debug == '2') {
                                        //  if debug mode, show details
                                        printStandardReport('newLinks', $command_line, $no_log);
                                    }
                                    while ($thislink = each($links)) {
                                        // echo "\r\n\r\n<br>thislink array:<br><pre>";print_r($thislink);echo "</pre>\r\n";
                                        //  ignore error (message) links and self linking
                                        if (strstr($thislink[1], "//") && $thislink[1] != $url) {
                                            //  find new domains for _addurl table
                                            if ($auto_add && $can_leave_domain) {
                                                $all_link = parse_all_url($thislink[1]);
                                                //  only the domain will be stored as new URL into addurl table
                                                $dom_link = $all_link['host'];
                                                //  reduce to domain name and tld
                                                $new_link = str_replace("www.", "", $dom_link);
                                                // use the complete URL
                                                //$dom_link = $thislink[1];
                                                //  use only the domain
                                                $dom_link = $all_link['scheme'] . "://" . $dom_link;
                                                $banned = '';
                                                mysqltest();
                                                //     check whether URL is already known in sites table
                                                $sql_query = "SELECT url from " . $mysql_table_prefix . "sites where url like '%{$new_link}%'";
                                                $res1 = $db_con->query($sql_query);
                                                //     check whether URL is already known in addurl table
                                                $sql_query = "SELECT url from " . $mysql_table_prefix . "addurl where url like '%{$new_link}%'";
                                                $res2 = $db_con->query($sql_query);
                                                //     check whether URL is banned
                                                $sql_query = "SELECT domain from " . $mysql_table_prefix . "banned where domain like '%{$new_link}%'";
                                                $res3 = $db_con->query($sql_query);
                                                if ($res3->num_rows) {
                                                    $banned = "1";
                                                }
                                                if ($res1->num_rows == 0 && $res2->num_rows == 0 && $res3->num_rows == 0) {
                                                    //  add new domain into _addurl table
                                                    $sql_query = "INSERT into " . $mysql_table_prefix . "addurl (url, description, account) values ('{$dom_link}', '{$comment}', '{$admin_email}')";
                                                    $db_con->query($sql_query);
                                                }
                                            }
                                            //      check whether thislink is already known as a link ( might happen by means of relocated URLs)
                                            $res4 = '';
                                            $res5 = '';
                                            $known_link = '';
                                            $known_temp = '';
                                            $check_link = $thislink[1];
                                            // i don't believe the "like" is necessary here and it slows down indexing
                                            //                                                //  check without scheme and www.
                                            //                                                $check_link = substr($check_link, stripos($check_link, "//")+2);
                                            //                                                if (stristr($check_link, "www.")) {
                                            //                                                    $check_link = substr($check_link, stripos($check_link, "www")+4);
                                            //                                                }
                                            //
                                            //                                                $sql_query = "SELECT /* jfield 3 */ url from ".$mysql_table_prefix."links where url like '%$check_link'";
                                            //                                                $res4 = $db_con->query($sql_query);
                                            //
                                            //                                                $known_link = $res4->num_rows;;
                                            //
                                            //                                                $sql_query = "SELECT /* jfield 4 */ link from ".$mysql_table_prefix."temp where link like '%$check_link'";
                                            //                                                $res5 = $db_con->query($sql_query);
                                            //                                                if ($debug > 0 && $db_con->errno) {
                                            //                                                    printf("MySQL failure: %s\n", $db_con->error);
                                            //                                                    echo "<br />Script aborted.";
                                            //                                                    exit;
                                            //                                                }
                                            //                                                $known_temp = $res5->num_rows;;
                                            $sql_query = "SELECT /* jfield 3 */ url from " . $mysql_table_prefix . "links where url = '{$check_link}'";
                                            $res4 = $db_con->query($sql_query);
                                            $known_link = $res4->num_rows;
                                            $sql_query = "SELECT /* jfield 4 */ link from " . $mysql_table_prefix . "temp where link = '{$check_link}'";
                                            $res5 = $db_con->query($sql_query);
                                            if ($debug > 0 && $db_con->errno) {
                                                printf("MySQL failure: %s\n", $db_con->error);
                                                echo "<br />Script aborted.";
                                                exit;
                                            }
                                            $known_temp = $res5->num_rows;
                                            //      if this is a new link not yet known or banned, add this new link to the temp table
                                            if ($tmp_urls[$thislink[1]] != 1 && !$res1 && !$known_link && !$known_temp && !$banned) {
                                                $tmp_urls[$thislink[1]] = 1;
                                                $numoflinks++;
                                                if ($debug == '2') {
                                                    $act_link = rawurldecode($thislink[1]);
                                                    //  make it readable
                                                    $act_link = stripslashes($act_link);
                                                    printNewLinks($act_link, $cl);
                                                }
                                                mysqltest();
                                                $sql_query = "INSERT into " . $mysql_table_prefix . "temp (link, level, id) values ('{$thislink['1']}', '{$level}', '{$sessid}')";
                                                if ($numoflinks <= $max_links) {
                                                    $db_con->query($sql_query);
                                                }
                                            }
                                        }
                                    }
                                }
                            }
                        }
                    } else {
                        printStandardReport('noFollow', $command_line, $no_log);
                    }
                    unset($file);
                }
                // JFIELD at this point, the URL in the DB is good
                // echo "<h1>DONE</h1>";
                // exit;
                //  if we should index only the files as defined in docs list
                if ($only_docs) {
                    $OKtoIndex = '';
                    foreach ($docs as $thisdoc) {
                        if (strstr($urlparts['path'], $thisdoc)) {
                            $OKtoIndex = "1";
                        }
                    }
                    if (!$OKtoIndex) {
                        printStandardReport('noDoclist', $command_line, $no_log);
                    }
                }
                if ($OKtoIndex == 1) {
                    if ($link_check == 0) {
                        $title = $data['title'];
                        $host = $data['host'];
                        $path = $data['path'];
                        $fulltxt = $data['fulltext'];
                        $desc = substr($data['description'], 0, 1024);
                        //  extract domain
                        $url_parts = parse_all_url($url);
                        $hostname = $url_parts[host];
                        //  rebuild domain for localhost applications
                        if ($hostname == 'localhost') {
                            $host1 = str_replace($local, '', $url);
                        }
                        $pos = strpos($host1, "/");
                        //      on local server delete all behind the /
                        //      will work for localhost URLs like http://localhost/publizieren/japan1/index.htm
                        //       will fail for localhost URLs like http://localhost/publizieren/externe/japan2/index.htm
                        if ($pos) {
                            $host1 = substr($host1, 0, $pos);
                            //      build full adress again, now only local domain
                        }
                        if ($hostname == 'localhost') {
                            $domain_for_db = "" . $local . "" . $host1 . "/";
                            // complete URL
                            $domain_for_db = str_replace("http://", "", $domain_for_db);
                            //$domain_for_db = $host1;
                        } else {
                            //$domain_for_db = ("$url_parts[scheme]://".$hostname."/");  // complete URL
                            $domain_for_db = $hostname;
                        }
                        if (isset($domain_arr[$domain_for_db])) {
                            $dom_id = $domain_arr[$domain_for_db];
                        } else {
                            mysqltest();
                            $sql_query = "INSERT into " . $mysql_table_prefix . "domains (domain) values ('{$domain_for_db}')";
                            $db_con->query($sql_query);
                            if ($debug && $db_con->errno) {
                                $err_row = __LINE__ - 2;
                                printf("<p><span class='red'>&nbsp;MySQL failure: %s&nbsp;\n<br /></span></p>", $db_con->error);
                                if (__FUNCTION__) {
                                    printf("<p><span class='red'>&nbsp;Found in script: " . __FILE__ . "&nbsp;&nbsp;row: {$err_row}&nbsp;&nbsp;in function():&nbsp;" . __FUNCTION__ . "&nbsp;<br /></span></p>");
                                } else {
                                    printf("<p><span class='red'>&nbsp;Found in script: " . __FILE__ . "&nbsp;&nbsp;row: {$err_row}&nbsp;<br /></span></p>");
                                }
                                printf("<p><span class='red'>&nbsp;Script execution aborted.&nbsp;<br /></span>");
                                printf("<p><strong>Invalid query string, which caused the SQL error:</strong></p>");
                                echo "<p> {$sql_query} </p>";
                                exit;
                            }
                            $dom_id = $db_con->insert_id;
                            $domain_arr[$domain_for_db] = $dom_id;
                        }
                        if (!$js_link) {
                            //  JavaScript will not deliver keywords, only links are parsed
                            reset($wordarray);
                            if ($case_sensitive == '0') {
                                foreach ($wordarray as &$value) {
                                    $value[1] = lower_ent($value[1]);
                                    $value[1] = lower_case($value[1]);
                                    //  convert keywords to lower case
                                }
                            }
                            $wordarray = calc_weights($wordarray, $title, $host, $path, $data['keywords'], $url_parts);
                        } else {
                            $wordarray = '';
                        }
                        //if there are words to index, add the link to the database, get its id, and add the word + their relation
                        if (is_array($wordarray) && count($wordarray) >= $min_words_per_page) {
                            $OKtoSave = 1;
                            if ($use_white1 == '1') {
                                //  check if content of page matches ANY word in whitelist
                                $found = '0';
                                foreach ($whitelist as $key => $val1) {
                                    reset($wordarray);
                                    while ($thisword = each($wordarray)) {
                                        $word = trim($thisword[1][1]);
                                        if (strcasecmp($val1, $word) == 0) {
                                            $found = '1';
                                        }
                                    }
                                }
                                if ($found == '0') {
                                    printStandardReport('noWhitelist', $command_line, $no_log);
                                    $OKtoSave = 0;
                                    $realnum--;
                                }
                            }
                            if ($use_white2 == '1') {
                                //  check if content of page matches ALL words in whitelist
                                $all = count($whitelist);
                                $found = '0';
                                $found_this = '0';
                                foreach ($whitelist as $key => $val2) {
                                    reset($wordarray);
                                    while ($thisword = each($wordarray)) {
                                        $word = trim($thisword[1][1]);
                                        if (strcasecmp($val2, $word) == 0) {
                                            $found_this = '1';
                                        }
                                    }
                                    if ($found_this != '0') {
                                        $found++;
                                        $found_this = '0';
                                    }
                                }
                                if ($found != $all) {
                                    printStandardReport('noWhitelist', $command_line, $no_log);
                                    $OKtoSave = 0;
                                    $realnum--;
                                }
                            }
                            if ($use_black == '1') {
                                $found = '0';
                                //  check if content of page matches ANY string in blacklist
                                foreach ($blacklist as $key => $val3) {
                                    $met = stripos($data[fulltext], $val3);
                                    if ($met) {
                                        $found = '1';
                                    }
                                }
                                if ($found == '1') {
                                    printStandardReport('matchBlacklist', $command_line, $no_log);
                                    $OKtoSave = 0;
                                    $realnum--;
                                    $url_status['black'] = 1;
                                    return $url_status;
                                }
                            }
                            //  if activated in Admin backend, create a thumbnail of this URL
                            if ($OKtoSave && $hostname != 'localhost' && $webshot) {
                                $shot = '';
                                //  will contain the png webshot
                                $img = new webshots();
                                $shot = $img->url_to_image($url);
                                if ($debug && stristr($shot, "error: #")) {
                                    $shot_warn = "<br />Unable to create the webshot because of " . $shot;
                                    printWarning($shot_warn, $command_line, $no_log);
                                } else {
                                    $shot = $db_con->real_escape_string($shot);
                                }
                            }
                            if ($md5sum == '' || $md5sum == '' && $url_status['relocate']) {
                                //  enter here for new page (unknown link) OR for new relocated URL(so it will become a new link)
                                //  title, description and fulltxt are already escaped in function clean_file();
                                $url = $db_con->real_escape_string($url);
                                // jfield says: messy char decoding earlier
                                // leaves crap here that fudges up the works
                                $title_enc = mb_detect_encoding($title);
                                if (mb_detect_encoding($title) != "UTF-8") {
                                    $title = iconv($title_enc, "UTF-8", $title);
                                }
                                $fulltxt = substr($fulltxt, 0, 100000);
                                // we've got to stop somewhere
                                $fulltxt_enc = mb_detect_encoding($fulltxt);
                                if (mb_detect_encoding($title) != "UTF-8") {
                                    $fulltxt = iconv($fulltxt_enc, "UTF-8", $fulltxt);
                                }
                                mysqltest();
                                $sql_query = "INSERT into " . $mysql_table_prefix . "links (site_id, url, title, description, fulltxt, indexdate, size, md5sum, level, webshot) values ('{$site_id}', '{$url}', '{$title}', left('{$desc}', 255), '{$fulltxt}', curdate(), '{$pageSize}', '{$newmd5sum}', '{$thislevel}', '{$shot}')";
                                $db_con->query($sql_query);
                                if ($debug && $db_con->errno) {
                                    $err_row = __LINE__ - 2;
                                    printf("<p><span class='red'>&nbsp;MySQL failure: %s&nbsp;\n<br /></span></p>", $db_con->error);
                                    if (__FUNCTION__) {
                                        printf("<p><span class='red'>&nbsp;Found in script: " . __FILE__ . "&nbsp;&nbsp;row: {$err_row}&nbsp;&nbsp;in function():&nbsp;" . __FUNCTION__ . "&nbsp;<br /></span></p>");
                                    } else {
                                        printf("<p><span class='red'>&nbsp;Found in script: " . __FILE__ . "&nbsp;&nbsp;row: {$err_row}&nbsp;<br /></span></p>");
                                    }
                                    printf("<p><span class='red'>&nbsp;Script execution aborted.&nbsp;<br /></span>");
                                    printf("<p><strong>Invalid query string, which caused the SQL error:</strong></p>");
                                    echo "<p> {$sql_query} </p>";
                                    //exit;
                                    // jfield: let's keep going
                                    return;
                                }
                                $sql_query = "SELECT link_id from " . $mysql_table_prefix . "links where url='{$url}'";
                                $result = $db_con->query($sql_query);
                                if ($debug && $db_con->errno) {
                                    $err_row = __LINE__ - 2;
                                    printf("<p><span class='red'>&nbsp;MySQL failure: %s&nbsp;\n<br /></span></p>", $db_con->error);
                                    if (__FUNCTION__) {
                                        printf("<p><span class='red'>&nbsp;Found in script: " . __FILE__ . "&nbsp;&nbsp;row: {$err_row}&nbsp;&nbsp;in function():&nbsp;" . __FUNCTION__ . "&nbsp;<br /></span></p>");
                                    } else {
                                        printf("<p><span class='red'>&nbsp;Found in script: " . __FILE__ . "&nbsp;&nbsp;row: {$err_row}&nbsp;<br /></span></p>");
                                    }
                                    printf("<p><span class='red'>&nbsp;Script execution aborted.&nbsp;<br /></span>");
                                    printf("<p><strong>Invalid query string, which caused the SQL error:</strong></p>");
                                    echo "<p> {$sql_query} </p>";
                                    exit;
                                }
                                $row = $result->fetch_array(MYSQLI_NUM);
                                $link_id = $row[0];
                                if ($OKtoSave) {
                                    //  store link details, if not yet known (during reindex)
                                    if ($only_links) {
                                        //  extract domain of current page delivering the new links
                                        $url_parts = parse_all_url($url);
                                        $hostname = $url_parts[host];
                                        if ($hostname == 'localhost') {
                                            //  rebuild domain for localhost applications
                                            $host1 = str_replace($local, '', $url);
                                        }
                                        $pos = strpos($host1, "/");
                                        //      on local server delete all behind the /
                                        //      will work for localhost URLs like http://localhost/publizieren/japan1/index.htm
                                        //       will fail for localhost URLs like http://localhost/publizieren/externe/japan2/index.htm
                                        if ($pos) {
                                            $host1 = substr($host1, 0, $pos);
                                            //      build full adress again, now only local domain
                                        }
                                        if ($hostname == 'localhost') {
                                            $domain_db = "" . $local . "" . $host1 . "/";
                                            // complete URL
                                            $domain_db = str_replace("http://", "", $domain_db);
                                            //$domain_db = $host1;
                                        } else {
                                            //$domain_db = ("$url_parts[scheme]://".$hostname."/");  // complete URL
                                            $domain_db = $hostname;
                                        }
                                        //    now store all link details into db
                                        foreach ($my_links as $found_link) {
                                            //  but only if we have found a title
                                            if ($found_link[3]) {
                                                mysqltest();
                                                //     check whether URL is already known in sites table
                                                $sql_query = "SELECT title from " . $mysql_table_prefix . "link_details where link_id like '{$link_id}' and url like '%{$found_link['2']}%'";
                                                $res1 = $db_con->query($sql_query);
                                                if ($res1->num_rows == 0) {
                                                    //  must be new link
                                                    $sql_query = "INSERT into " . $mysql_table_prefix . "link_details (link_id, url, title, indexdate, domain) values ('{$link_id}', '{$found_link['2']}', '{$found_link['3']}', now(), '{$domain_db}')";
                                                    $db_con->query($sql_query);
                                                }
                                            }
                                        }
                                    }
                                    if ($debug == '2') {
                                        //  if debug mode, show details
                                        printStandardReport('newKeywords', $command_line, $no_log);
                                    }
                                    save_keywords($wordarray, $link_id, $dom_id);
                                }
                                mysqltest();
                                if ($index_media == '1' && $OKtoSave) {
                                    //   find media content only if there was no conflict with text (white and/or blacklist)
                                    include "index_media.php";
                                    //  try to find media files
                                }
                                mysqltest();
                                if ($debug == '2') {
                                    printStandardReport('indexed1', $command_line, $no_log);
                                } else {
                                    printStandardReport('indexed', $command_line, $no_log);
                                }
                            } else {
                                if ($md5sum != '' && $md5sum != $newmd5sum && $OKtoSave) {
                                    //if page has changed, start updating
                                    mysqltest();
                                    $sql_query = "SELECT link_id from " . $mysql_table_prefix . "links where url='{$url}'";
                                    $result = $db_con->query($sql_query);
                                    if ($debug && $db_con->errno) {
                                        $err_row = __LINE__ - 2;
                                        printf("<p><span class='red'>&nbsp;MySQL failure: %s&nbsp;\n<br /></span></p>", $db_con->error);
                                        if (__FUNCTION__) {
                                            printf("<p><span class='red'>&nbsp;Found in script: " . __FILE__ . "&nbsp;&nbsp;row: {$err_row}&nbsp;&nbsp;in function():&nbsp;" . __FUNCTION__ . "&nbsp;<br /></span></p>");
                                        } else {
                                            printf("<p><span class='red'>&nbsp;Found in script: " . __FILE__ . "&nbsp;&nbsp;row: {$err_row}&nbsp;<br /></span></p>");
                                        }
                                        printf("<p><span class='red'>&nbsp;Script execution aborted.&nbsp;<br /></span>");
                                        printf("<p><strong>Invalid query string, which caused the SQL error:</strong></p>");
                                        echo "<p> {$sql_query} </p>";
                                        exit;
                                    }
                                    $row = $result->fetch_array(MYSQLI_NUM);
                                    $link_id = $row[0];
                                    $sql_query = "DELETE from " . $mysql_table_prefix . "link_keyword where link_id={$link_id}";
                                    $db_con->query($sql_query);
                                    if ($debug && $db_con->errno) {
                                        $err_row = __LINE__ - 2;
                                        printf("<p><span class='red'>&nbsp;MySQL failure: %s&nbsp;\n<br /></span></p>", $db_con->error);
                                        if (__FUNCTION__) {
                                            printf("<p><span class='red'>&nbsp;Found in script: " . __FILE__ . "&nbsp;&nbsp;row: {$err_row}&nbsp;&nbsp;in function():&nbsp;" . __FUNCTION__ . "&nbsp;<br /></span></p>");
                                        } else {
                                            printf("<p><span class='red'>&nbsp;Found in script: " . __FILE__ . "&nbsp;&nbsp;row: {$err_row}&nbsp;<br /></span></p>");
                                        }
                                        printf("<p><span class='red'>&nbsp;Script execution aborted.&nbsp;<br /></span>");
                                        printf("<p><strong>Invalid query string, which caused the SQL error:</strong></p>");
                                        echo "<p> {$sql_query} </p>";
                                        exit;
                                    }
                                    if ($debug == '2') {
                                        //  if debug mode, show details
                                        printStandardReport('newKeywords', $command_line, $no_log);
                                    }
                                    save_keywords($wordarray, $link_id, $dom_id);
                                    $sql_query = "UPDATE " . $mysql_table_prefix . "links set title='{$title}', description ='{$desc}', fulltxt = '{$fulltxt}', indexdate=now(), size = '{$pageSize}', md5sum='{$newmd5sum}', level='{$thislevel}', webshot='{$shot}' where link_id='{$link_id}'";
                                    mysqltest();
                                    $db_con->query($sql_query);
                                    if ($debug && $db_con->errno) {
                                        $err_row = __LINE__ - 2;
                                        printf("<p><span class='red'>&nbsp;MySQL failure: %s&nbsp;\n<br /></span></p>", $db_con->error);
                                        if (__FUNCTION__) {
                                            printf("<p><span class='red'>&nbsp;Found in script: " . __FILE__ . "&nbsp;&nbsp;row: {$err_row}&nbsp;&nbsp;in function():&nbsp;" . __FUNCTION__ . "&nbsp;<br /></span></p>");
                                        } else {
                                            printf("<p><span class='red'>&nbsp;Found in script: " . __FILE__ . "&nbsp;&nbsp;row: {$err_row}&nbsp;<br /></span></p>");
                                        }
                                        printf("<p><span class='red'>&nbsp;Script execution aborted.&nbsp;<br /></span>");
                                        printf("<p><strong>Invalid query string, which caused the SQL error:</strong></p>");
                                        echo "<p> {$sql_query} </p>";
                                        exit;
                                    }
                                    if ($index_media == '1') {
                                        include "index_media.php";
                                        //  try to find media files
                                    }
                                    if ($debug == '2') {
                                        printStandardReport('re-indexed1', $command_line, $no_log);
                                    }
                                }
                            }
                        } else {
                            if ($js_link) {
                                printStandardReport('js_content', $command_line, $no_log);
                            } else {
                                printStandardReport('minWords', $command_line, $no_log);
                            }
                            $realnum--;
                        }
                    } else {
                        printStandardReport('link_okay', $command_line, $no_log);
                    }
                    unset($file, $title, $fulltxt, $desc);
                    $wordarray = array();
                    $data = array();
                    $seg_data = array();
                }
            }
        }
    } else {
        $deletable = 1;
        //printUrlStatus($url_status['state'], $command_line, $no_log);
    }
    mysqltest();
    if ($url_status['relocate']) {
        //  remove this relocated URL from temp table, because it is indexed now
        $sql_query = "DELETE from " . $mysql_table_prefix . "temp where link = '{$url}' AND id = '{$sessid}'";
        $db_con->query($sql_query);
        if ($debug && $db_con->errno) {
            $err_row = __LINE__ - 2;
            printf("<p><span class='red'>&nbsp;MySQL failure: %s&nbsp;\n<br /></span></p>", $db_con->error);
            if (__FUNCTION__) {
                printf("<p><span class='red'>&nbsp;Found in script: " . __FILE__ . "&nbsp;&nbsp;row: {$err_row}&nbsp;&nbsp;in function():&nbsp;" . __FUNCTION__ . "&nbsp;<br /></span></p>");
            } else {
                printf("<p><span class='red'>&nbsp;Found in script: " . __FILE__ . "&nbsp;&nbsp;row: {$err_row}&nbsp;<br /></span></p>");
            }
            printf("<p><span class='red'>&nbsp;Script execution aborted.&nbsp;<br /></span>");
            printf("<p><strong>Invalid query string, which caused the SQL error:</strong></p>");
            echo "<p> {$sql_query} </p>";
            exit;
        }
    }
    if ($reindex == 1 && $deletable == 1) {
        check_for_removal($url);
    } else {
        if ($reindex == 1) {
        }
    }
    if (!isset($all_links)) {
        $all_links = 0;
    }
    if (!isset($numoflinks)) {
        $numoflinks = 0;
    }
    //      if valid sitemap found, or canonical link, or something else, no LinkReport
    if ($smp != 1 && $OKtoIndex == 1 && $url_status['state'] == 'ok') {
        printLinksReport($numoflinks, $all_links, $command_line);
    }
    //  remove the URL, which haas been idexed now from temp table.
    mysqltest();
    $sql_query = "DELETE from " . $mysql_table_prefix . "temp where link = '{$url}' AND id = '{$sessid}'";
    $db_con->query($sql_query);
    if ($debug && $db_con->errno) {
        $err_row = __LINE__ - 2;
        printf("<p><span class='red'>&nbsp;MySQL failure: %s&nbsp;\n<br /></span></p>", $db_con->error);
        if (__FUNCTION__) {
            printf("<p><span class='red'>&nbsp;Found in script: " . __FILE__ . "&nbsp;&nbsp;row: {$err_row}&nbsp;&nbsp;in function():&nbsp;" . __FUNCTION__ . "&nbsp;<br /></span></p>");
        } else {
            printf("<p><span class='red'>&nbsp;Found in script: " . __FILE__ . "&nbsp;&nbsp;row: {$err_row}&nbsp;<br /></span></p>");
        }
        printf("<p><span class='red'>&nbsp;Script execution aborted.&nbsp;<br /></span>");
        printf("<p><strong>Invalid query string, which caused the SQL error:</strong></p>");
        echo "<p> {$sql_query} </p>";
        exit;
    }
    return $url_status;
}
コード例 #8
0
function fresh_media($query, $domain_qry, $mysql_table_prefix, $catid, $db_slv)
{
    global $db_con, $case_sensitive, $debug, $category, $search_id3, $mysql_charset;
    global $sort_media, $thumb_dir, $delim, $debug_user, $use_cache, $mediacache_dir;
    global $case_sensitive, $vowels, $noacc_el, $translit_el, $greek, $type;
    global $cat_sel, $cat_sel0, $cat_sel0a, $cat_sel1, $cat_sel2, $cat_sel3, $cat_sel4, $cat_sel_all;
    $all_media = array();
    $this_media = array();
    $sort = "title, id3";
    if (!$category) {
        $category = '-1';
    }
    //  define order of result listing
    if ($sort_media == "1") {
        $sort = "title, id3";
    }
    if ($sort_media == "2") {
        $sort = "size_x DESC, size_y DESC, title, id3";
    }
    if ($sort_media == "3") {
        $sort = "last_query DESC, title, id3";
    }
    if ($sort_media == "4") {
        $sort = "click_counter DESC, title, id3";
    }
    if ($sort_media == "5") {
        $sort = "suffix, title";
    }
    if ($query == '') {
        $query = '&nbsp;';
    }
    //    prevent blank results for media search
    if ($query == 'media:') {
        $query = '%';
    }
    //    search for all media files in database /category
    if ($case_sensitive == '0') {
        $query = lower_case(lower_ent($query));
    }
    if ($vowels || $greek) {
        $query = remove_acc($query, '0');
        //  remove Latin accents
    }
    if ($noacc_el) {
        $query = remove_acc_el($query, '0');
        //  remove Greek accents
    }
    if ($translit_el) {
        $query = translit_el($query);
    }
    //  OR search, but only for multiple query words
    if ($type == "or" && strpos($query, " ")) {
        $known_id = array();
        $all = explode(" ", $query);
        //  build an array from all query words
        foreach ($all as $query) {
            //  try to get results for any query word
            if ($search_id3 == '1') {
                // search in name, title, EXIF and ID3 info
                $sql_query = "SELECT * from " . $mysql_table_prefix . "media\n                                                    where title like LOWER('%" . $db_con->real_escape_string($query) . "%') {$domain_qry}\n                                                    OR name like LOWER('%" . $db_con->real_escape_string($query) . "%') {$domain_qry}\n                                                    OR (id3 like '%" . $db_con->real_escape_string($query) . "%') {$domain_qry}\n                                                    ORDER BY {$sort} ";
                $result = $db_con->query($sql_query);
                if ($debug && $db_con->errno) {
                    $err_row = __LINE__ - 2;
                    printf("<p><span class='red'>&nbsp;MySQL failure: %s&nbsp;\n<br /></span></p>", $db_con->error);
                    if (__FUNCTION__) {
                        printf("<p><span class='red'>&nbsp;Found in script: " . __FILE__ . "&nbsp;&nbsp;row: {$err_row}&nbsp;&nbsp;in function():&nbsp;" . __FUNCTION__ . "&nbsp;<br /></span></p>");
                    } else {
                        printf("<p><span class='red'>&nbsp;Found in script: " . __FILE__ . "&nbsp;&nbsp;row: {$err_row}&nbsp;<br /></span></p>");
                    }
                    printf("<p><span class='red'>&nbsp;Script execution aborted.&nbsp;<br /></span>");
                    printf("<p><strong>Invalid query string, which caused the SQL error:</strong></p>");
                    echo "<p> {$sql_query} </p>";
                    exit;
                }
            } else {
                //  search only in media name and title
                $sql_query = "SELECT * from " . $mysql_table_prefix . "media\n                                                   where (CONVERT(LOWER(title)USING UTF8) like '%" . $db_con->real_escape_string($query) . "%') {$domain_qry}\n                                                   OR (CONVERT(LOWER(name)USING UTF8) like '%" . $db_con->real_escape_string($query) . "%') {$domain_qry}\n                                                   ORDER BY {$sort} ";
                $result = $db_con->query($sql_query);
                if ($debug && $db_con->errno) {
                    $err_row = __LINE__ - 2;
                    printf("<p><span class='red'>&nbsp;MySQL failure: %s&nbsp;\n<br /></span></p>", $db_con->error);
                    if (__FUNCTION__) {
                        printf("<p><span class='red'>&nbsp;Found in script: " . __FILE__ . "&nbsp;&nbsp;row: {$err_row}&nbsp;&nbsp;in function():&nbsp;" . __FUNCTION__ . "&nbsp;<br /></span></p>");
                    } else {
                        printf("<p><span class='red'>&nbsp;Found in script: " . __FILE__ . "&nbsp;&nbsp;row: {$err_row}&nbsp;<br /></span></p>");
                    }
                    printf("<p><span class='red'>&nbsp;Script execution aborted.&nbsp;<br /></span>");
                    printf("<p><strong>Invalid query string, which caused the SQL error:</strong></p>");
                    echo "<p> {$sql_query} </p>";
                    exit;
                }
            }
            //  collect all results, but only for different links
            while ($row = $result->fetch_array(MYSQLI_NUM)) {
                $new_id = $row[0];
                if (count($all_media) > 0) {
                    foreach ($all_media as $this_media) {
                        if (!in_array($row[0], $known_id)) {
                            $all_media[] = $row;
                            //  add the unknown media result
                            $known_id[] = $row[0];
                        }
                    }
                } else {
                    $all_media[] = $row;
                    //  get the first media result
                    $known_id[] = $row[0];
                    //  remember this media_id
                }
            }
        }
    } else {
        //  AND, PHRASE and TOL search
        $query = str_replace(" ", "%", $query);
        if ($search_id3 == '1') {
            // search in name, title, EXIF and ID3 info
            $sql_query = "SELECT * from " . $mysql_table_prefix . "media\n                                                where title like LOWER('%" . $db_con->real_escape_string($query) . "%') {$domain_qry}\n                                                OR media_link like LOWER('%" . $db_con->real_escape_string($query) . "%') {$domain_qry}\n                                                OR (id3 like '%" . $db_con->real_escape_string($query) . "%') {$domain_qry}\n                                                ORDER BY {$sort} ";
            $result = $db_con->query($sql_query);
            if ($debug && $db_con->errno) {
                $err_row = __LINE__ - 2;
                printf("<p><span class='red'>&nbsp;MySQL failure: %s&nbsp;\n<br /></span></p>", $db_con->error);
                if (__FUNCTION__) {
                    printf("<p><span class='red'>&nbsp;Found in script: " . __FILE__ . "&nbsp;&nbsp;row: {$err_row}&nbsp;&nbsp;in function():&nbsp;" . __FUNCTION__ . "&nbsp;<br /></span></p>");
                } else {
                    printf("<p><span class='red'>&nbsp;Found in script: " . __FILE__ . "&nbsp;&nbsp;row: {$err_row}&nbsp;<br /></span></p>");
                }
                printf("<p><span class='red'>&nbsp;Script execution aborted.&nbsp;<br /></span>");
                printf("<p><strong>Invalid query string, which caused the SQL error:</strong></p>");
                echo "<p> {$sql_query} </p>";
                exit;
            }
        } else {
            //  search only in media name and title
            $sql_query = "SELECT * from " . $mysql_table_prefix . "media\n                                               where (CONVERT(LOWER(title)USING UTF8) like '%" . $db_con->real_escape_string($query) . "%') {$domain_qry}\n                                               OR (CONVERT(LOWER(media_link)USING UTF8) like '%" . $db_con->real_escape_string($query) . "%') {$domain_qry}\n                                               ORDER BY {$sort} ";
            $result = $db_con->query($sql_query);
            if ($debug && $db_con->errno) {
                $err_row = __LINE__ - 2;
                printf("<p><span class='red'>&nbsp;MySQL failure: %s&nbsp;\n<br /></span></p>", $db_con->error);
                if (__FUNCTION__) {
                    printf("<p><span class='red'>&nbsp;Found in script: " . __FILE__ . "&nbsp;&nbsp;row: {$err_row}&nbsp;&nbsp;in function():&nbsp;" . __FUNCTION__ . "&nbsp;<br /></span></p>");
                } else {
                    printf("<p><span class='red'>&nbsp;Found in script: " . __FILE__ . "&nbsp;&nbsp;row: {$err_row}&nbsp;<br /></span></p>");
                }
                printf("<p><span class='red'>&nbsp;Script execution aborted.&nbsp;<br /></span>");
                printf("<p><strong>Invalid query string, which caused the SQL error:</strong></p>");
                echo "<p> {$sql_query} </p>";
                exit;
            }
        }
        //      if query did not match any media object
        if ($result->num_rows == 0) {
            return $all_media;
            //      return blank array, otherwise array_merge() will not work in PHP5
        }
        //  collect all results
        while ($this_array = $result->fetch_array(MYSQLI_NUM)) {
            $all_media[] = $this_array;
        }
    }
    $fresh_media = array();
    //  if necessary, reduce to single category valid links
    if ($category != '-1') {
        while (list($key, $value) = each($all_media)) {
            $sql_query = "SELECT site_id from " . $mysql_table_prefix . "links\n                                                where url = '{$value['2']}'";
            //  get site_id corresponding to this page
            $result = $db_con->query($sql_query);
            if ($debug && $db_con->errno) {
                $err_row = __LINE__ - 2;
                printf("<p><span class='red'>&nbsp;MySQL failure: %s&nbsp;\n<br /></span></p>", $db_con->error);
                if (__FUNCTION__) {
                    printf("<p><span class='red'>&nbsp;Found in script: " . __FILE__ . "&nbsp;&nbsp;row: {$err_row}&nbsp;&nbsp;in function():&nbsp;" . __FUNCTION__ . "&nbsp;<br /></span></p>");
                } else {
                    printf("<p><span class='red'>&nbsp;Found in script: " . __FILE__ . "&nbsp;&nbsp;row: {$err_row}&nbsp;<br /></span></p>");
                }
                printf("<p><span class='red'>&nbsp;Script execution aborted.&nbsp;<br /></span>");
                printf("<p><strong>Invalid query string, which caused the SQL error:</strong></p>");
                echo "<p> {$sql_query} </p>";
                exit;
            }
            $site_id = $result->fetch_array(MYSQLI_NUM);
            //  check for valid catid
            $sql_query = "SELECT * from " . $mysql_table_prefix . "site_category\n                                                where site_id = '{$site_id['0']}' AND category_id ='{$catid}'";
            $result = $db_con->query($sql_query);
            if ($debug && $db_con->errno) {
                $err_row = __LINE__ - 2;
                printf("<p><span class='red'>&nbsp;MySQL failure: %s&nbsp;\n<br /></span></p>", $db_con->error);
                if (__FUNCTION__) {
                    printf("<p><span class='red'>&nbsp;Found in script: " . __FILE__ . "&nbsp;&nbsp;row: {$err_row}&nbsp;&nbsp;in function():&nbsp;" . __FUNCTION__ . "&nbsp;<br /></span></p>");
                } else {
                    printf("<p><span class='red'>&nbsp;Found in script: " . __FILE__ . "&nbsp;&nbsp;row: {$err_row}&nbsp;<br /></span></p>");
                }
                printf("<p><span class='red'>&nbsp;Script execution aborted.&nbsp;<br /></span>");
                printf("<p><strong>Invalid query string, which caused the SQL error:</strong></p>");
                echo "<p> {$sql_query} </p>";
                exit;
            }
            //  add valid link to result array
            if ($result->num_rows) {
                $fresh_media[] = $value;
            }
        }
    } else {
        $fresh_media = $all_media;
        //  no category search
    }
    if (!$cat_sel0) {
        $cat_sel0 = $cat_sel_all;
    }
    if (!$cat_sel1) {
        $cat_sel1 = $cat_sel_all;
    }
    if (!$cat_sel2) {
        $cat_sel2 = $cat_sel_all;
    }
    if (!$cat_sel3) {
        $cat_sel3 = $cat_sel_all;
    }
    if (!$cat_sel4) {
        $cat_sel4 = $cat_sel_all;
    }
    //  enter here for multiple category search and, if necessary, reduce results
    if ($cat_sel0 != $cat_sel_all || $cat_sel1 != $cat_sel_all || $cat_sel2 != $cat_sel_all || $cat_sel3 != $cat_sel_all || $cat_sel4 != $cat_sel_all) {
        $temp_array = $fresh_media;
        $fresh_media = array();
        while (list($key, $value) = each($all_media)) {
            $cat_to_find = '1';
            //  get site_id for this link_id
            $sql_query = "SELECT site_id from " . $mysql_table_prefix . "links\n                                                where url = '{$value['2']}'";
            $res0 = $db_con->query($sql_query);
            if ($debug && $db_con->errno) {
                $err_row = __LINE__ - 2;
                printf("<p><span class='red'>&nbsp;MySQL failure: %s&nbsp;\n<br /></span></p>", $db_con->error);
                if (__FUNCTION__) {
                    printf("<p><span class='red'>&nbsp;Found in script: " . __FILE__ . "&nbsp;&nbsp;row: {$err_row}&nbsp;&nbsp;in function():&nbsp;" . __FUNCTION__ . "&nbsp;<br /></span></p>");
                } else {
                    printf("<p><span class='red'>&nbsp;Found in script: " . __FILE__ . "&nbsp;&nbsp;row: {$err_row}&nbsp;<br /></span></p>");
                }
                printf("<p><span class='red'>&nbsp;Script execution aborted.&nbsp;<br /></span>");
                printf("<p><strong>Invalid query string, which caused the SQL error:</strong></p>");
                echo "<p> {$sql_query} </p>";
                exit;
            }
            $site_id = $res0->fetch_array(MYSQLI_NUM);
            //  get category_id for this site
            $sql_query = "SELECT category_id from " . $mysql_table_prefix . "site_category where site_id = '{$site_id['0']}'";
            $res1 = $db_con->query($sql_query);
            if ($debug && $db_con->errno) {
                $err_row = __LINE__ - 2;
                printf("<p><span class='red'>&nbsp;MySQL failure: %s&nbsp;\n<br /></span></p>", $db_con->error);
                if (__FUNCTION__) {
                    printf("<p><span class='red'>&nbsp;Found in script: " . __FILE__ . "&nbsp;&nbsp;row: {$err_row}&nbsp;&nbsp;in function():&nbsp;" . __FUNCTION__ . "&nbsp;<br /></span></p>");
                } else {
                    printf("<p><span class='red'>&nbsp;Found in script: " . __FILE__ . "&nbsp;&nbsp;row: {$err_row}&nbsp;<br /></span></p>");
                }
                printf("<p><span class='red'>&nbsp;Script execution aborted.&nbsp;<br /></span>");
                printf("<p><strong>Invalid query string, which caused the SQL error:</strong></p>");
                echo "<p> {$sql_query} </p>";
                exit;
            }
            $category_id = $res1->fetch_array(MYSQLI_NUM);
            //  check, whether  this category_id corresponds with cat_selector 0
            //  and try to find results for all active category selections
            if ($category_id) {
                if ($cat_sel0 == $cat_sel_all) {
                    $cat_sel0 = "%";
                }
                if ($cat_sel0a == $cat_sel_all) {
                    $cat_sel0a = "%";
                }
                if ($cat_sel1 == $cat_sel_all) {
                    $cat_sel1 = "%";
                }
                if ($cat_sel2 == $cat_sel_all) {
                    $cat_sel2 = "%";
                }
                if ($cat_sel3 == $cat_sel_all) {
                    $cat_sel3 = "%";
                }
                if ($cat_sel4 == $cat_sel_all) {
                    $cat_sel4 = "%";
                }
                if ($cat_sel0 != "%") {
                    $sql_query = "SELECT * from " . $mysql_table_prefix . "categories where category_id = '{$category_id['0']}' and category >= '{$cat_sel0}' and category <= '{$cat_sel0a}' and group_sel0 like '{$cat_sel1}' and group_sel1 like '{$cat_sel2}' and group_sel2 like '{$cat_sel3}' and group_sel3 like '{$cat_sel4}'";
                    $res_cat = $db_con->query($sql_query);
                    if ($debug && $db_con->errno) {
                        $err_row = __LINE__ - 2;
                        printf("<p><span class='red'>&nbsp;MySQL failure: %s&nbsp;\n<br /></span></p>", $db_con->error);
                        if (__FUNCTION__) {
                            printf("<p><span class='red'>&nbsp;Found in script: " . __FILE__ . "&nbsp;&nbsp;row: {$err_row}&nbsp;&nbsp;in function():&nbsp;" . __FUNCTION__ . "&nbsp;<br /></span></p>");
                        } else {
                            printf("<p><span class='red'>&nbsp;Found in script: " . __FILE__ . "&nbsp;&nbsp;row: {$err_row}&nbsp;<br /></span></p>");
                        }
                        printf("<p><span class='red'>&nbsp;Script execution aborted.&nbsp;<br /></span>");
                        printf("<p><strong>Invalid query string, which caused the SQL error:</strong></p>");
                        echo "<p> {$sql_query} </p>";
                        exit;
                    }
                } else {
                    $sql_query = "SELECT * from " . $mysql_table_prefix . "categories where category_id = '{$category_id['0']}' and group_sel0 like '{$cat_sel1}' and group_sel1 like '{$cat_sel2}' and group_sel2 like '{$cat_sel3}' and group_sel3 like '{$cat_sel4}'";
                    $res_cat = $db_con->query($sql_query);
                    if ($debug && $db_con->errno) {
                        $err_row = __LINE__ - 2;
                        printf("<p><span class='red'>&nbsp;MySQL failure: %s&nbsp;\n<br /></span></p>", $db_con->error);
                        if (__FUNCTION__) {
                            printf("<p><span class='red'>&nbsp;Found in script: " . __FILE__ . "&nbsp;&nbsp;row: {$err_row}&nbsp;&nbsp;in function():&nbsp;" . __FUNCTION__ . "&nbsp;<br /></span></p>");
                        } else {
                            printf("<p><span class='red'>&nbsp;Found in script: " . __FILE__ . "&nbsp;&nbsp;row: {$err_row}&nbsp;<br /></span></p>");
                        }
                        printf("<p><span class='red'>&nbsp;Script execution aborted.&nbsp;<br /></span>");
                        printf("<p><strong>Invalid query string, which caused the SQL error:</strong></p>");
                        echo "<p> {$sql_query} </p>";
                        exit;
                    }
                }
                if ($res_cat->num_rows) {
                } else {
                    $cat_to_find = '0';
                }
                //  if found in all active category selctions, add the ccurrent link to the result array
                if ($cat_to_find) {
                    $fresh_media[] = $value;
                }
            }
        }
        //  restore the original $cat_sel_all
        $cat_sel0 = str_replace("%", $cat_sel_all, $cat_sel0);
        $cat_sel0a = str_replace("%", $cat_sel_all, $cat_sel0a);
        $cat_sel1 = str_replace("%", $cat_sel_all, $cat_sel1);
        $cat_sel2 = str_replace("%", $cat_sel_all, $cat_sel2);
        $cat_sel3 = str_replace("%", $cat_sel_all, $cat_sel3);
        $cat_sel4 = str_replace("%", $cat_sel_all, $cat_sel4);
    }
    $i = 0;
    $title = '';
    $this_media = array();
    //  reset, because used a second time
    if ($use_cache) {
        $thumb_dir = $mediacache_dir;
        //  store new thumbnail in folder for media cache
    }
    foreach ($fresh_media as $this_media[$i]) {
        if ($this_media[$i][6] == 'image') {
            //  build thumbnail to be displayed in result listing
            $name = basename($this_media[$i][3]);
            //  extract file name
            $title = substr($this_media[$i][5], 0, strpos($this_media[$i][5], $delim));
            //  get basic part of the title
            //  add folder path, db, table-prefix, remove original suffix and add own suffix
            $file = utf8_decode("" . $thumb_dir . "/db" . $db_slv . "_" . $mysql_table_prefix . "_" . substr($name, 0, strrpos($name, ".")) . ".gif");
            if (!($handle = fopen($file, "ab"))) {
                if ($debug_user == '1') {
                    print "Unable to open {$file} ";
                }
            }
            if (!fwrite($handle, $this_media[$i][4])) {
                if ($debug_user == '1') {
                    print "Unable to write the file {$file}. No thumbnails will be presented";
                }
            }
            fclose($handle);
            $this_media[$i][4] = $file;
            //  replace content of thumbnail  with path to thumbnail
            $this_media[$i][5] = $title;
            //  the title up to delimeter
            $i++;
        }
    }
    //echo "\r\n\r\n<br>this_media Array0:<br><pre>";print_r($this_media);echo "</pre>\r\n";
    return $this_media;
}
コード例 #9
0
 private function to_lower($str)
 {
     return lower_case($str);
 }
コード例 #10
0
ファイル: searchfuncs.php プロジェクト: sean-tan/sphiderplus
function get_search_results($query, $start, $category, $searchtype, $results, $domain)
{
    global $sph_messages, $results_per_page, $all_wild, $show_meta_description, $title_length, $links_to_next, $wildsearch, $show_warning, $mark, $type, $show_query_scores, $index_host, $url_length, $query_hits, $mysql_table_prefix, $desc_length, $utf8, $case_sensitive;
    if ($results != "") {
        $results_per_page = $results;
    }
    if ($searchtype == "phrase") {
        $query = str_replace('"', '', $query);
        $query = "\"" . $query . "\"";
    }
    if ($utf8 == 1 && $case_sensitive == 0 && $searchtype != "phrase") {
        $query = lower_case($query);
    }
    $starttime = getmicrotime();
    // catch " if only one time entered
    if (substr_count($query, '"') == 1) {
        $query = str_replace('"', '', $query);
    }
    $words = makeboollist($query);
    $ignorewords = $words['ignore'];
    $full_result['ignore_words'] = $words['ignore'];
    if ($start == 0) {
        $start = 1;
    }
    $result = search($words, $category, $start, $results_per_page, $searchtype, $domain);
    $query = stripslashes($query);
    $entitiesQuery = htmlspecialchars(str_replace("\"", "", $query));
    $full_result['ent_query'] = $entitiesQuery;
    $endtime = getmicrotime() - $starttime;
    $rows = $result['results'];
    $time = round($endtime * 100) / 100;
    $full_result['time'] = $time;
    $did_you_mean = "";
    if (isset($result['did_you_mean'])) {
        $did_you_mean_b = $entitiesQuery;
        $did_you_mean = $entitiesQuery;
        while (list($key, $val) = each($result['did_you_mean'])) {
            if ($key != $val) {
                $did_you_mean_b = str_replace($key, "<b>{$val}</b>", $did_you_mean_b);
                $did_you_mean = str_replace($key, "{$val}", $did_you_mean);
            }
        }
    }
    $full_result['did_you_mean'] = $did_you_mean;
    $full_result['did_you_mean_b'] = $did_you_mean_b;
    $matchword = $sph_messages["matches"];
    if ($rows == 1) {
        $matchword = $sph_messages["match"];
    }
    $num_of_results = count($result) - 2;
    $full_result['num_of_results'] = $num_of_results;
    if ($start < 2) {
        saveToLog(addslashes($query), $time, $rows);
    }
    $from = ($start - 1) * $results_per_page + 1;
    $to = min($start * $results_per_page, $rows);
    $full_result['from'] = $from;
    $full_result['to'] = $to;
    $full_result['total_results'] = $rows;
    if ($rows > 0) {
        $maxweight = $result['maxweight'];
        $i = 0;
        while ($i < $num_of_results && $i < $results_per_page) {
            $title = $result[$i]['title'];
            $url = $result[$i]['url'];
            $fulltxt = $result[$i]['fulltxt'];
            $page_size = $result[$i]['size'];
            $domain = $result[$i]['domain'];
            if ($page_size != "") {
                $page_size = number_format($page_size, 1) . " kb";
            }
            //  If available, enable part of a word highlighting in result report
            if ($all_wild) {
                $words = makeboollist($all_wild);
            }
            $txtlen = strlen($fulltxt);
            //$refreshed = ereg_replace("[*!]", '',trim($query)); //  works also for *wildcard search
            if ($show_meta_description === 1 || $txtlen > $desc_length) {
                $places = array();
                $strictpos = strpos($query, '!');
                if ($strictpos === 0) {
                    // if !strict search enter here
                    if ($case_sensitive == '1') {
                        $recovered = str_replace('!', '', trim($query));
                        $tmp = $fulltxt;
                    } else {
                        $recovered = str_replace('!', '', trim(lower_case($query)));
                        $tmp = lower_case($fulltxt);
                    }
                    $words['hilight'][0] = "{$recovered}";
                    //  replace without ' ! '
                    $strict_length = strlen($recovered);
                    $found_in = '1';
                    //  pointer position start
                    $pos_absolut = '0';
                    foreach ($words['hilight'] as $word) {
                        while (!($found_in == '')) {
                            $found_in = strpos($tmp, $word);
                            $tmp_front = substr($tmp, $found_in - 1);
                            //  one character before found match position
                            $pos = $found_in + strlen($word);
                            $pos_absolut = $pos_absolut + $found_in;
                            $tmp = substr($tmp, $pos);
                            //  get rest of fulltxt
                            //  check weather found match is realy strict
                            $found_before = preg_match("/[(a-z)-_*.\\/\\:&@\\w]/", substr($tmp_front, 0, 1));
                            $found_behind = preg_match("/[(a-z)-_*.,\\/\\:&@\\w]/", substr($tmp, 0, 1));
                            if ($found_before === 0 && $found_behind === 0) {
                                $places[] = $pos_absolut;
                                //  remind absolut position of match
                                $found_in = '';
                            }
                        }
                    }
                } else {
                    // if not !strict search enter here (standard search)
                    foreach ($words['hilight'] as $word) {
                        if ($case_sensitive == '0') {
                            $tmp = lower_case($fulltxt);
                            $word = lower_case($word);
                        } else {
                            $tmp = $fulltxt;
                        }
                        $found_in = strpos($tmp, $word);
                        $sum = -strlen($word);
                        while (!($found_in == '')) {
                            $pos = $found_in + strlen($word);
                            $sum += $pos;
                            //FIX!!
                            $tmp = substr($tmp, $pos);
                            $places[] = $sum;
                            $found_in = strpos($tmp, $word);
                        }
                    }
                }
                sort($places);
                $x = 0;
                $begin = 0;
                $end = 0;
                while (list($id, $place) = each($places)) {
                    while ($places[$id + $x] - $place < $desc_length && $x + $id < count($places) && $place < strlen($fulltxt) - $desc_length) {
                        $x++;
                        $begin = $id;
                        $end = $id + $x;
                    }
                }
                $begin_pos = max(0, $places[$begin] - 80);
                $fulltxt = substr($fulltxt, $begin_pos, $desc_length);
                if ($places[$begin] > 0) {
                    $begin_pos = strpos($fulltxt, " ");
                }
                $fulltxt = substr($fulltxt, $begin_pos, $desc_length);
                $fulltxt = substr($fulltxt, 0, strrpos($fulltxt, " "));
                $fulltxt = $fulltxt;
            }
            if ($query_hits == '0') {
                //  calculate percentage of weight
                $weight = number_format($result[$i]['weight'] / $maxweight * 100, 1);
            } else {
                $weight = number_format($result[$i]['weight']);
            }
            if ($title == '') {
                $title = $sph_messages["Untitled"];
            }
            $regs = array();
            if (strlen($title) > $title_length) {
                // if necessary shorten length of title in result page
                $length_tot = strpos($title, " ", $title_length);
                // find end of last word for shortened title
                if ($length_tot) {
                    $title = substr($title, 0, $length_tot) . " ...";
                }
            }
            $url2 = $url;
            if (strlen($url) > $url_length) {
                // if necessary shorten length of URL in result page
                $url2 = substr($url, 0, $url_length) . "...";
            }
            if ($places[0] == '' && $query_hits == 1 && $type != 'tol') {
                //  if nothing found in HTML text and query hits as result output
                $weight = '0';
            }
            if ($places[0] == '' && $show_warning == '1' && $type != 'tol' || $show_warning == '1' && $weight == '0') {
                // if  no HTML text to highlight
                $warnmessage = $sph_messages['showWarning'];
                $fulltxt = "<span class='warn'>{$warnmessage}</span>";
            }
            if ($mark == 'markbold') {
                $highlight = "span class='mak_1'";
            }
            if ($mark == 'markblue') {
                $highlight = "span class='mak_2'";
            }
            if ($mark == 'markyellow') {
                $highlight = "span class='mak_3'";
            }
            if ($mark == 'markgreen') {
                $highlight = "span class='mak_4'";
            }
            foreach ($words['hilight'] as $change) {
                if (!($strictpos === 0)) {
                    //  no marking in title and url if strict search
                    if ($case_sensitive == '1') {
                        //  if we have to search case sensetive, enter here
                        while (@ereg("[^\\>](" . $change . ")[^\\<]", " " . $title . " ", $regs)) {
                            $title = ereg_replace($regs[1], "<{$highlight}>" . $regs[1] . "</span>", $title);
                        }
                        if ($index_host == '1') {
                            while (@ereg("[^\\>](" . $change . ")[^\\<]", $url2, $regs)) {
                                $url2 = ereg_replace($regs[1], "<{$highlight}>" . $regs[1] . "</span>", $url2);
                            }
                        }
                    } else {
                        // mark upper and lower case match
                        while (@eregi("[^\\>](" . $change . ")[^\\<]", " " . $title . " ", $regs)) {
                            $title = eregi_replace($regs[1], "<{$highlight}>" . $regs[1] . "</span>", $title);
                        }
                        if ($index_host == '1') {
                            while (@eregi("[^\\>](" . $change . ")[^\\<]", $url2, $regs)) {
                                $url2 = eregi_replace($regs[1], "<{$highlight}>" . $regs[1] . "</span>", $url2);
                            }
                        }
                    }
                }
                if ($strictpos === 0) {
                    //      if strict search mark only the real result with blanks before and behind
                    $change = " {$change} ";
                }
                if ($case_sensitive == '1') {
                    //  mark fulltext case sensitive
                    while (@ereg("[^\\>](" . $change . ")[^\\<]", " " . $fulltxt . " ", $regs)) {
                        $fulltxt = ereg_replace($regs[1], "<{$highlight}>" . $regs[1] . "</span>", $fulltxt);
                    }
                } else {
                    //      mark all in fulltext
                    while (@eregi("[^\\>](" . $change . ")[^\\<]", " " . $fulltxt . " ", $regs)) {
                        $fulltxt = eregi_replace($regs[1], "<{$highlight}>" . $regs[1] . "</span>", $fulltxt);
                    }
                }
            }
            $places = array();
            $num = $from + $i;
            $full_result['qry_results'][$i]['num'] = $num;
            $full_result['qry_results'][$i]['weight'] = $weight;
            $full_result['qry_results'][$i]['url'] = $url;
            $full_result['qry_results'][$i]['title'] = $title;
            $full_result['qry_results'][$i]['fulltxt'] = $fulltxt;
            $full_result['qry_results'][$i]['url2'] = $url2;
            $full_result['qry_results'][$i]['page_size'] = $page_size;
            $full_result['qry_results'][$i]['domain_name'] = $domain;
            $i++;
        }
    }
    $pages = ceil($rows / $results_per_page);
    $full_result['pages'] = $pages;
    $prev = $start - 1;
    $full_result['prev'] = $prev;
    $next = $start + 1;
    $full_result['next'] = $next;
    $full_result['start'] = $start;
    $full_result['query'] = $entitiesQuery;
    if ($from <= $to) {
        $firstpage = $start - $links_to_next;
        if ($firstpage < 1) {
            $firstpage = 1;
        }
        $lastpage = $start + $links_to_next;
        if ($lastpage > $pages) {
            $lastpage = $pages;
        }
        for ($x = $firstpage; $x <= $lastpage; $x++) {
            $full_result['other_pages'][] = $x;
        }
    }
    return $full_result;
}
コード例 #11
0
ファイル: spiderfuncs.php プロジェクト: sean-tan/sphiderplus
function clean_file($file, $url, $type)
{
    global $entities, $index_host, $index_meta_keywords, $utf8, $case_sensitive;
    $urlparts = parse_url($url);
    $host = $urlparts['host'];
    //remove filename from path
    $path = eregi_replace('([^/]+)$', "", $urlparts['path']);
    $file = preg_replace("/<link rel[^<>]*>/i", " ", $file);
    $file = preg_replace("@<!--sphider_noindex-->.*?<!--\\/sphider_noindex-->@si", " ", $file);
    $file = preg_replace("@<!--.*?-->@si", " ", $file);
    $file = preg_replace("@<script[^>]*?>.*?</script>@si", " ", $file);
    $headdata = get_head_data($file);
    $regs = array();
    if (preg_match("@<title *>(.*?)<\\/title*>@si", $file, $regs)) {
        $title = trim($regs[1]);
        $file = str_replace($regs[0], "", $file);
    } else {
        if ($type == 'pdf' || $type == 'doc' || $type == 'ppt' || $type == 'rtf' || $type == 'xls') {
            //create title for a non-html files
            //$title = substr($file, 0, strrpos(substr($file, 0, 40), " "));
            $offset = strrpos($url, '/');
            //      get document name
            $title = substr($url, $offset + 1);
        }
    }
    $file = preg_replace("@<style[^>]*>.*?<\\/style>@si", " ", $file);
    //create spaces between tags, so that removing tags doesnt concatenate strings
    $file = preg_replace("/<[\\w ]+>/", "\\0 ", $file);
    $file = preg_replace("/<\\/[\\w ]+>/", "\\0 ", $file);
    $file = strip_tags($file);
    $file = preg_replace("/&nbsp;/", " ", $file);
    $fulltext = $file;
    $file .= " " . $title;
    if ($index_host == 1) {
        //  separate words in host and path
        $host_sep = preg_replace("/\\.|\\/|\\\\/", " ", $host);
        $path_sep = preg_replace("/\\.|\\/|\\\\/", " ", $path);
        $file = $file . " " . $host . " " . $host_sep . " " . ucwords($host_sep);
        $file = $file . " " . $path . " " . $path_sep . " " . ucwords($path_sep);
    }
    if ($index_meta_keywords == 1) {
        $file = $file . " " . $headdata['keywords'];
    }
    //replace codes with ascii chars
    $file = preg_replace('~&#x([0-9a-f]+);~ei', 'chr(hexdec("\\1"))', $file);
    $file = preg_replace('~&#([0-9]+);~e', 'chr("\\1")', $file);
    if ($utf8 != 1) {
        // if we want to buiild a UTF8 coded database, we also need the upper-case characters
        $file = lower_case($file);
    }
    reset($entities);
    while ($char = each($entities)) {
        $file = preg_replace("/" . $char[0] . "/i", $char[1], $file);
    }
    $file = preg_replace("/&[a-z]{1,6};/", " ", $file);
    $trash = array("\r\n", "\n", "\r");
    // kill 'LF' and the others
    $replace = ' ';
    $file = str_replace($trash, $replace, $file);
    $trash = array("\\r\\n", "\\n", "\\r");
    // kill 'LF' and the others
    $replace = ' ';
    $file = str_replace($trash, $replace, $file);
    if ($utf8 == '0') {
        $file = preg_replace("/\\s+/", " ", $file);
        //  kill whitespace character
        $fulltext = html_entity_decode($fulltext);
        //  compatible with Suggest Framework
    }
    $data['fulltext'] = addslashes($fulltext);
    $data['content'] = addslashes($file);
    $data['title'] = addslashes($title);
    $data['description'] = $headdata['description'];
    $data['keywords'] = $headdata['keywords'];
    $data['host'] = $host;
    $data['path'] = $path;
    $data['nofollow'] = $headdata['nofollow'];
    $data['noindex'] = $headdata['noindex'];
    $data['base'] = $headdata['base'];
    unset($char, $file, $fulltext, $path_sep, $headdata, $regs, $urlparts, $host);
    return $data;
}
コード例 #12
0
ファイル: suggest.php プロジェクト: sean-tan/sphiderplus
    require_once('../../settings/database.php'); 
    require_once('../../settings/conf.php');

    if (get_magic_quotes_gpc()==1) {
    	$_GET['q'] = stripslashes($_GET['q']);
    } 

    if ($utf8 == '1') {   
        $_GET['q'] = addslashes(($_GET['q']));
        //$_GET['q'] = utf8_encode(addslashes(($_GET['q'])));         
        if ($case_sensitive == 0) {
            $_GET['q'] = lower_case($_GET['q']);
        }
    } else {
        $_GET['q'] = html_entity_decode(addslashes(lower_case($_GET['q'])));
    }
    
    //  if search string too small, do not search for keywords/phrases
    if (strlen($_GET['q'])<3) {
    	$suggest_phrases = false;
    	$suggest_keywords = false;
    }

    //check if search string is phrase
    if (!strpos($_GET['q'],' ')) {
    	$suggest_phrases = false;
    }
    
    //     remove control character
    $_GET['q'] = preg_replace("/!|\"|\*/", "",$_GET['q']);
コード例 #13
0
    //  if enabled in Admin settings get country code
    if (isset($_SERVER['HTTP_ACCEPT_LANGUAGE'])) {
        $cc = substr(htmlspecialchars($_SERVER['HTTP_ACCEPT_LANGUAGE']), 0, 2);
        $handle = @fopen("{$language_dir}/{$cc}-language.php", "r");
        if ($handle) {
            $language = $cc;
            // if available set language to users slang
            include "{$language_dir}/{$language}-language.php";
            @fclose($handle);
        }
    }
}
$keyword = addslashes($keyword);
if ($case_sensitive == 0) {
    $keyword = lower_ent($keyword);
    $keyword = lower_case($keyword);
}
if ($translit_el && $media_only) {
    $keyword = translit_el($keyword);
}
if (strlen($keyword) < $min_sug_chars) {
    //  if search string too small, do not search for keywords/phrases
    $suggest_phrases = false;
    $suggest_keywords = false;
}
$keyword = str_replace("%20", " ", $keyword);
//  replace 'blank'
if (!strpos($keyword, ' ')) {
    //check if search string is phrase
    //$suggest_phrases = false;
}
コード例 #14
0
ファイル: domains.php プロジェクト: reliberate/fusionpbx
 public function upgrade()
 {
     //set the global variable
     global $db, $db_type, $db_name, $db_username, $db_password, $db_host, $db_path, $db_port;
     //get the PROJECT PATH
     include "root.php";
     //get the list of installed apps from the core and app directories (note: GLOB_BRACE doesn't work on some systems)
     $config_list_1 = glob($_SERVER["DOCUMENT_ROOT"] . PROJECT_PATH . "/*/*/app_config.php");
     $config_list_2 = glob($_SERVER["DOCUMENT_ROOT"] . PROJECT_PATH . "/*/*/app_menu.php");
     $config_list = array_merge((array) $config_list_1, (array) $config_list_2);
     unset($config_list_1, $config_list_2);
     $x = 0;
     foreach ($config_list as &$config_path) {
         include $config_path;
         $x++;
     }
     //get the domain_uuid
     $sql = "select * from v_domains ";
     $prep_statement = $db->prepare($sql);
     $prep_statement->execute();
     $result = $prep_statement->fetchAll(PDO::FETCH_NAMED);
     foreach ($result as $row) {
         if (count($result) == 1) {
             $_SESSION["domain_uuid"] = $row["domain_uuid"];
             $_SESSION["domain_name"] = $row['domain_name'];
         } else {
             if (lower_case($row['domain_name']) == lower_case($domain_array[0]) || lower_case($row['domain_name']) == lower_case('www.' . $domain_array[0])) {
                 $_SESSION["domain_uuid"] = $row["domain_uuid"];
                 $_SESSION["domain_name"] = $row['domain_name'];
             }
             $_SESSION['domains'][$row['domain_uuid']]['domain_uuid'] = $row['domain_uuid'];
             $_SESSION['domains'][$row['domain_uuid']]['domain_name'] = $row['domain_name'];
         }
     }
     unset($result, $prep_statement);
     //get the default settings
     $sql = "select * from v_default_settings ";
     $sql .= "where default_setting_enabled = 'true' ";
     $prep_statement = $db->prepare($sql);
     $prep_statement->execute();
     $result_default_settings = $prep_statement->fetchAll(PDO::FETCH_NAMED);
     //get the default recordings directory
     foreach ($result_default_settings as $row) {
         $name = $row['default_setting_name'];
         $category = $row['default_setting_category'];
         $subcategory = $row['default_setting_subcategory'];
         if ($category == 'switch' && $subcategory == 'recordings' && $name == 'dir') {
             $switch_recordings_dir = $row['default_setting_value'];
         }
     }
     //loop through all domains
     $sql = "select * from v_domains ";
     $v_prep_statement = $db->prepare(check_sql($sql));
     $v_prep_statement->execute();
     $main_result = $v_prep_statement->fetchAll(PDO::FETCH_ASSOC);
     $domain_count = count($main_result);
     $domains_processed = 1;
     foreach ($main_result as &$row) {
         //get the values from database and set them as php variables
         $domain_uuid = $row["domain_uuid"];
         $domain_name = $row["domain_name"];
         //get the context
         $context = $domain_name;
         //show the domain when display_type is set to text
         if ($display_type == "text") {
             echo "\n";
             echo $domain_name;
             echo "\n";
         }
         //get the default settings - this needs to be done to reset the session values back to the defaults for each domain in the loop
         foreach ($result_defaults_settings as $row) {
             $name = $row['default_setting_name'];
             $category = $row['default_setting_category'];
             $subcategory = $row['default_setting_subcategory'];
             if (strlen($subcategory) == 0) {
                 if ($name == "array") {
                     $_SESSION[$category][] = $row['default_setting_value'];
                 } else {
                     $_SESSION[$category][$name] = $row['default_setting_value'];
                 }
             } else {
                 if ($name == "array") {
                     $_SESSION[$category][$subcategory][] = $row['default_setting_value'];
                 } else {
                     $_SESSION[$category][$subcategory]['uuid'] = $row['default_setting_uuid'];
                     $_SESSION[$category][$subcategory][$name] = $row['default_setting_value'];
                 }
             }
         }
         //get the domains settings
         $sql = "select * from v_domain_settings ";
         $sql .= "where domain_uuid = '" . $domain_uuid . "' ";
         $sql .= "and domain_setting_enabled = 'true' ";
         $prep_statement = $db->prepare($sql);
         $prep_statement->execute();
         $result = $prep_statement->fetchAll(PDO::FETCH_NAMED);
         foreach ($result as $row) {
             $name = $row['domain_setting_name'];
             $category = $row['domain_setting_category'];
             $subcategory = $row['domain_setting_subcategory'];
             if (strlen($subcategory) == 0) {
                 //$$category[$name] = $row['domain_setting_value'];
                 $_SESSION[$category][$name] = $row['domain_setting_value'];
             } else {
                 //$$category[$subcategory][$name] = $row['domain_setting_value'];
                 $_SESSION[$category][$subcategory][$name] = $row['domain_setting_value'];
             }
         }
         //set the recordings directory
         if (strlen($switch_recordings_dir) > 1 && count($_SESSION["domains"]) > 1) {
             $_SESSION['switch']['recordings']['dir'] = $switch_recordings_dir . "/" . $domain_name;
         }
         //get the list of installed apps from the core and mod directories and execute the php code in app_defaults.php
         $default_list = glob($_SERVER["DOCUMENT_ROOT"] . PROJECT_PATH . "/*/*/app_defaults.php");
         foreach ($default_list as &$default_path) {
             include $default_path;
         }
         //track of the number of domains processed
         $domains_processed++;
     }
     unset($v_prep_statement);
     //synchronize the dialplan
     if (function_exists('save_dialplan_xml')) {
         save_dialplan_xml();
     }
     //clear the session variables
     unset($_SESSION['domain']);
     unset($_SESSION['switch']);
 }