}
        }
        if ($clear == 1) {
            clean_resource($result, '01');
        }
    }
    if (!isset($in)) {
        $in = "";
    }
    if (!isset($out)) {
        $out = "";
    }
    $started = time();
    index_site($url, $reindex, $maxlevel, $soption, $in, $out, $can_leave, $use_robot, $use_nofollow, $cl, $all, $use_pref);
    $ended = time();
    $consumed = $ended - $started;
    printConsumedReport('consumed', $cl, '0', $consumed);
    printStandardReport('ReindexFinish', $command_line, '0');
}
printStandardReport('quit', $command_line, '0');
if ($email_log) {
    $indexed = $all == 1 ? 'ALL' : $url;
    $log_report = "";
    if ($log_handle) {
        $log_report = "Log saved into {$log_file}";
    }
    mail($admin_email, "Sphider indexing report", "Sphider has finished indexing {$indexed} at " . date("y-m-d H:i:s") . ". " . $log_report);
}
if ($log_handle) {
    fclose($log_handle);
}
function check_for_removal($url)
{
    global $mysql_table_prefix;
    global $command_line;
    $result = mysql_query("select link_id, visible from " . $mysql_table_prefix . "links" . " where url='{$url}'");
    echo mysql_error();
    if (mysql_num_rows($result) > 0) {
        $row = mysql_fetch_row($result);
        $link_id = $row[0];
        $visible = $row[1];
        if ($visible > 0) {
            $visible--;
            mysql_query("update " . $mysql_table_prefix . "links set visible={$visible} where link_id={$link_id}");
            echo mysql_error();
        } else {
            mysql_query("delete from " . $mysql_table_prefix . "links where link_id={$link_id}");
            echo mysql_error();
            for ($i = 0; $i <= 15; $i++) {
                $char = dechex($i);
                mysql_query("delete from " . $mysql_table_prefix . "link_keyword{$char} where link_id={$link_id}");
                echo mysql_error();
            }
            printStandardReport('pageRemoved', $command_line);
        }
    }
}
Beispiel #3
0
function index_site($url, $reindex, $maxlevel, $soption, $url_inc, $url_not_inc, $can_leave_domain)
{
    global $mysql_table_prefix, $command_line, $mainurl, $tmp_urls, $domain_arr, $all_keywords;
    if (!isset($all_keywords)) {
        $result = mysql_query("select keyword_ID, keyword from " . $mysql_table_prefix . "keywords");
        echo mysql_error();
        while ($row = mysql_fetch_array($result)) {
            $all_keywords[addslashes($row[1])] = $row[0];
        }
    }
    $compurl = parse_url($url);
    if ($compurl['path'] == '') {
        $url = $url . "/";
    }
    $t = microtime();
    $a = getenv("REMOTE_ADDR");
    $sessid = md5($t . $a);
    $urlparts = parse_url($url);
    $domain = $urlparts['host'];
    if (isset($urlparts['port'])) {
        $port = (int) $urlparts['port'];
    } else {
        $port = 80;
    }
    $result = mysql_query("select site_id from " . $mysql_table_prefix . "sites where url='{$url}'");
    echo mysql_error();
    $row = mysql_fetch_row($result);
    $site_id = $row[0];
    if ($site_id != "" && $reindex == 1) {
        mysql_query("insert into " . $mysql_table_prefix . "temp (link, level, id) values ('{$url}', 0, '{$sessid}')");
        echo mysql_error();
        $result = mysql_query("select url, level from " . $mysql_table_prefix . "links where site_id = {$site_id}");
        while ($row = mysql_fetch_array($result)) {
            $site_link = $row['url'];
            $link_level = $row['level'];
            if ($site_link != $url) {
                mysql_query("insert into " . $mysql_table_prefix . "temp (link, level, id) values ('{$site_link}', {$link_level}, '{$sessid}')");
            }
        }
        $qry = "update " . $mysql_table_prefix . "sites set indexdate=now(), spider_depth = {$maxlevel}, required = '{$url_inc}'," . "disallowed = '{$url_not_inc}', can_leave_domain={$can_leave_domain} where site_id={$site_id}";
        mysql_query($qry);
        echo mysql_error();
    } else {
        if ($site_id == '') {
            mysql_query("insert into " . $mysql_table_prefix . "sites (url, indexdate, spider_depth, required, disallowed, can_leave_domain) " . "values ('{$url}', now(), {$maxlevel}, '{$url_inc}', '{$url_not_inc}', {$can_leave_domain})");
            echo mysql_error();
            $result = mysql_query("select site_ID from " . $mysql_table_prefix . "sites where url='{$url}'");
            $row = mysql_fetch_row($result);
            $site_id = $row[0];
        } else {
            mysql_query("update " . $mysql_table_prefix . "sites set indexdate=now(), spider_depth = {$maxlevel}, required = '{$url_inc}'," . "disallowed = '{$url_not_inc}', can_leave_domain={$can_leave_domain} where site_id={$site_id}");
            echo mysql_error();
        }
    }
    $result = mysql_query("select site_id, temp_id, level, count, num from " . $mysql_table_prefix . "pending where site_id='{$site_id}'");
    echo mysql_error();
    $row = mysql_fetch_row($result);
    $pending = $row[0];
    $level = 0;
    $domain_arr = get_domains();
    if ($pending == '') {
        mysql_query("insert into " . $mysql_table_prefix . "temp (link, level, id) values ('{$url}', 0, '{$sessid}')");
        echo mysql_error();
    } else {
        if ($pending != '') {
            printStandardReport('continueSuspended', $command_line);
            mysql_query("select temp_id, level, count from " . $mysql_table_prefix . "pending where site_id='{$site_id}'");
            echo mysql_error();
            $sessid = $row[1];
            $level = $row[2];
            $pend_count = $row[3] + 1;
            $num = $row[4];
            $pending = 1;
            $tmp_urls = get_temp_urls($sessid);
        }
    }
    if ($reindex != 1) {
        mysql_query("insert into " . $mysql_table_prefix . "pending (site_id, temp_id, level, count) values ('{$site_id}', '{$sessid}', '0', '0')");
        echo mysql_error();
    }
    $time = time();
    $omit = check_robot_txt($url);
    printHeader($omit, $url, $command_line);
    $mainurl = $url;
    $num = 0;
    while ($level <= $maxlevel && $soption == 'level' || $soption == 'full') {
        if ($pending == 1) {
            $count = $pend_count;
            $pending = 0;
        } else {
            $count = 0;
        }
        $links = array();
        $result = mysql_query("select distinct link from " . $mysql_table_prefix . "temp where level={$level} && id='{$sessid}' order by link");
        echo mysql_error();
        $rows = mysql_num_rows($result);
        if ($rows == 0) {
            break;
        }
        $i = 0;
        while ($row = mysql_fetch_array($result)) {
            $links[] = $row['link'];
        }
        reset($links);
        while ($count < count($links)) {
            $num++;
            $thislink = $links[$count];
            $urlparts = parse_url($thislink);
            reset($omit);
            $forbidden = 0;
            foreach ($omit as $omiturl) {
                $omiturl = trim($omiturl);
                $omiturl_parts = parse_url($omiturl);
                if ($omiturl_parts['scheme'] == '') {
                    $check_omit = $urlparts['host'] . $omiturl;
                } else {
                    $check_omit = $omiturl;
                }
                if (strpos($thislink, $check_omit)) {
                    printRobotsReport($num, $thislink, $command_line);
                    check_for_removal($thislink);
                    $forbidden = 1;
                    break;
                }
            }
            if (!check_include($thislink, $url_inc, $url_not_inc)) {
                printUrlStringReport($num, $thislink, $command_line);
                check_for_removal($thislink);
                $forbidden = 1;
            }
            if ($forbidden == 0) {
                printRetrieving($num, $thislink, $command_line);
                $query = "select md5sum, indexdate from " . $mysql_table_prefix . "links where url='{$thislink}'";
                $result = mysql_query($query);
                echo mysql_error();
                $rows = mysql_num_rows($result);
                if ($rows == 0) {
                    index_url($thislink, $level + 1, $site_id, '', $domain, '', $sessid, $can_leave_domain, $reindex);
                    mysql_query("update " . $mysql_table_prefix . "pending set level = {$level}, count={$count}, num={$num} where site_id={$site_id}");
                    echo mysql_error();
                } else {
                    if ($rows != 0 && $reindex == 1) {
                        $row = mysql_fetch_array($result);
                        $md5sum = $row['md5sum'];
                        $indexdate = $row['indexdate'];
                        index_url($thislink, $level + 1, $site_id, $md5sum, $domain, $indexdate, $sessid, $can_leave_domain, $reindex);
                        mysql_query("update " . $mysql_table_prefix . "pending set level = {$level}, count={$count}, num={$num} where site_id={$site_id}");
                        echo mysql_error();
                    } else {
                        printStandardReport('inDatabase', $command_line);
                    }
                }
            }
            $count++;
        }
        $level++;
    }
    mysql_query("delete from " . $mysql_table_prefix . "temp where id = '{$sessid}'");
    echo mysql_error();
    mysql_query("delete from " . $mysql_table_prefix . "pending where site_id = '{$site_id}'");
    echo mysql_error();
    printStandardReport('completed', $command_line);
}
function index_suspended()
{
    global $db_con, $mysql_table_prefix, $command_line, $debug, $use_robot, $use_nofollow, $no_log, $clear, $started, $cl;
    $started = time();
    $reindex = 0;
    printStandardReport('SuspendedStart', $command_line, $no_log);
    //  get ID and URL of all sites
    $sql_query = "SELECT site_id, url from " . $mysql_table_prefix . "sites ORDER by url";
    $result1 = $db_con->query($sql_query);
    if ($debug && $db_con->errno) {
        $err_row = __LINE__ - 2;
        printf("<p><span class='red'>&nbsp;MySQL failure: %s&nbsp;\n<br /></span></p>", $db_con->error);
        if (__FUNCTION__) {
            printf("<p><span class='red'>&nbsp;Found in script: " . __FILE__ . "&nbsp;&nbsp;row: {$err_row}&nbsp;&nbsp;in function():&nbsp;" . __FUNCTION__ . "&nbsp;<br /></span></p>");
        } else {
            printf("<p><span class='red'>&nbsp;Found in script: " . __FILE__ . "&nbsp;&nbsp;row: {$err_row}&nbsp;<br /></span></p>");
        }
        printf("<p><span class='red'>&nbsp;Script execution aborted.&nbsp;<br /></span>");
        printf("<p><strong>Invalid query string, which caused the SQL error:</strong></p>");
        echo "<p> {$sql_query} </p>";
        exit;
    }
    while ($row1 = $result1->fetch_array(MYSQLI_NUM)) {
        $url = $row1[1];
        $site_id = $row1[0];
        //  check whether this site is pending
        $sql_query = "SELECT site_id from " . $mysql_table_prefix . "pending where site_id ={$site_id}";
        $result2 = $db_con->query($sql_query);
        $row2 = $result2->fetch_array(MYSQLI_ASSOC);
        //  if pending, continue indexing this URL
        if ($row2['site_id'] == $site_id) {
            //  fetch all important data of this site
            $sql_query = "SELECT url, spider_depth, required, disallowed, can_leave_domain, use_prefcharset from " . $mysql_table_prefix . "sites where url='{$url}'";
            $result = $db_con->query($sql_query);
            if ($row = $result->fetch_array(MYSQLI_NUM)) {
                $maxlevel = $row[1];
                $in = $row[2];
                $out = $row[3];
                $domaincb = $row[4];
                $use_prefcharset = $row[5];
                if ($domaincb == '') {
                    $domaincb = 0;
                }
                if ($maxlevel == -1) {
                    $soption = 'full';
                } else {
                    $soption = 'level';
                }
            }
            if ($clear == 1) {
                clean_resource($result, '21');
            }
            if (!isset($in)) {
                $in = "";
            }
            if (!isset($out)) {
                $out = "";
            }
            //  now indnex the rest of this site
            index_site($url, $reindex, $maxlevel, $soption, $in, $out, $domaincb, $use_robot, $use_nofollow, $cl, $all, $use_prefcharset);
        }
    }
    if ($clear == 1) {
        clean_resource($result, '20');
    }
    $ended = time();
    $consumed = $ended - $started;
    printConsumedReport('consumed', $cl, '0', $consumed);
    printStandardReport('SuspendedFinish', $command_line, '0');
    create_footer();
}
 sort($all_media);
 if ($clear == '1') {
     unset($id3_string, $element, $thisaudio, $title, $title_orig, $title_trans, $id3_string, $id3_rem, $name, $select);
 }
 //  ********** find videos **********
 mysqltest();
 if ($index_video == '1') {
     $select = $videolist;
     //  find only videos as defined in file 'video.txt'
     $element = 'video';
     $title = '';
     //get media placed in <video> elements
     $all_media = get_elements($element, $all_media, $raw_file, $regs, $trash1, $replace1, $handle, $store_file);
     if ($debug == '2') {
         //  if debug mode, show details
         printStandardReport('newVideo', $command_line, $no_log);
     }
     foreach ($all_media as $thisvideo) {
         preg_match("/([\\/]?value|[\\/]?href|[\\/]?data|[\\/]?classid|[\\/]?src)\\s*=\\s*[\\'\"](.*?)[\\'\"]/si", $thisvideo[0], $this_video);
         if (($link = build_url($this_video[2], $url, $select, $thisvideo[0], $handle, $store_file)) != '') {
             //  if valid URL was built
             $link = $db_con->real_escape_string($link);
             $handle = @fopen($link, "r");
             if ($handle) {
                 //  really existing video, or dead link only
                 @fclose($handle);
                 $new_md5 = md5_file($link);
                 //      calculate checksum of new video
                 $suffix = strtolower(substr($link, strrpos($link, ".")));
                 $my_name = basename($link, $suffix);
                 //      try to find already indexed video with the same md5sum
function check_for_removal($url)
{
    global $command_line;
    global $db;
    $result = $db->query("select link_id, visible from " . TABLE_PREFIX . "links" . " where url='{$url}'");
    echo sql_errorstring(__FILE__, __LINE__);
    $row = $result->fetch();
    $result->closeCursor();
    if ($row) {
        $link_id = $row[0];
        $visible = $row[1];
        if ($visible > 0) {
            $visible--;
            $db->exec("update " . TABLE_PREFIX . "links set visible={$visible} where link_id={$link_id}");
            echo sql_errorstring(__FILE__, __LINE__);
        } else {
            $db->exec("delete from " . TABLE_PREFIX . "links where link_id={$link_id}");
            echo sql_errorstring(__FILE__, __LINE__);
            for ($i = 0; $i <= 15; $i++) {
                $char = dechex($i);
                $db->exec("delete from " . TABLE_PREFIX . "link_keyword{$char} where link_id={$link_id}");
                echo sql_errorstring(__FILE__, __LINE__);
            }
            printStandardReport('pageRemoved', $command_line);
        }
    }
}
 //  read .xls files into an array
 if (stristr($file, ".xls")) {
     $error = '';
     require_once "" . $converter_dir . "/xls_reader.php";
     $data = new Spreadsheet_Excel_Reader();
     if ($mb == '1') {
         //  if extention exists, change 'iconv' to mb_convert_encoding:
         $data->setUTFEncoder('mb');
     }
     // set output encoding.
     $data->setOutputEncoding('UTF-8');
     //  read this document
     $data->read($file);
     $error = $data->_ole->error;
     if ($error == '1') {
         printStandardReport('xlsError', $command_line, $no_log);
         $result = 'ERROR';
     } else {
         $result = ' ';
         $boundsheets = array();
         $sheets = array();
         $boundsheets = $data->boundsheets;
         // get all tables in this file
         $sheets = $data->sheets;
         // get content of all sheets in all tables
         if ($boundsheets) {
             foreach ($boundsheets as &$bs) {
                 //$result .= "".$bs['name'].", "; //  collect all table names in this file
             }
             if ($sheets) {
                 foreach ($sheets as &$sheet) {
Beispiel #8
0
function index_new()
{
    global $command_line, $use_robot;
    $reindex == 0;
    //include "admin_header.php";
    printStandardReport('NewStart', $command_line);
    $result = mysql_query("select url, indexdate, spider_depth, required, disallowed, can_leave_domain from " . TABLE_PREFIX . "sites");
    if (DEBUG > '0') {
        echo mysql_error();
    }
    while ($row = mysql_fetch_row($result)) {
        $url = $row[0];
        $indexdate = $row[1];
        $depth = $row[2];
        $include = $row[3];
        $not_include = $row[4];
        $can_leave_domain = $row[5];
        if ($can_leave_domain == '') {
            $can_leave_domain = 0;
        }
        if ($depth == -1) {
            $soption = 'full';
        } else {
            $soption = 'level';
        }
        if ($indexdate == '') {
            index_site($url, 1, $depth, $soption, $include, $not_include, $can_leave_domain, $use_robot);
        }
    }
    clean_resource($result);
    printStandardReport('NewFinish', $command_line);
    create_footer();
}
 function MakeConvertTable($FromCharset, $ToCharset = '')
 {
     global $home_charset, $cl, $command_line, $no_log;
     $ConvertTable = array();
     for ($i = 0; $i < func_num_args(); $i++) {
         $FileName = func_get_arg($i);
         $FileName = "{$FileName}.txt";
         if (!is_file(CONVERT_TABLES_DIR . $FileName)) {
             //print $this -> DebugOutput(0, 0, CONVERT_TABLES_DIR . $FileName); //Print an error message
             printConverterError($FileName, $cl);
             printTryHome($home_charset, $cl);
             $homeSet = str_ireplace('iso-', '', $home_charset);
             //$homeSet = str_ireplace ('iso','',$home_charset);
             $FileName = "{$homeSet}.txt";
             if (!is_file(CONVERT_TABLES_DIR . $FileName)) {
                 //print $this -> DebugOutput(0, 0, CONVERT_TABLES_DIR . $FileName); //Print an error message
                 printConverterError($FileName, $cl);
                 printStandardReport('abortedIndx', $command_line, $no_log);
                 //echo "<p class='evrow'><a class='bkbtn' href='admin.php' title='Go back to Admin'>Back to admin</a></p>";
                 return '';
             }
         }
         $FileWithEncTabe = fopen(CONVERT_TABLES_DIR . $FileName, "r") or die;
         //This die(); is just to make sure...
         while (!feof($FileWithEncTabe)) {
             if ($OneLine = trim(fgets($FileWithEncTabe, 1024))) {
                 if (substr($OneLine, 0, 1) != "#") {
                     $HexValue = preg_split("/[\\s,]+/", $OneLine, 3);
                     //We need only first 2 values
                     if (substr($HexValue[1], 0, 1) != "#") {
                         $ArrayKey = strtoupper(str_replace(strtolower("0x"), "", $HexValue[1]));
                         $ArrayValue = strtoupper(str_replace(strtolower("0x"), "", $HexValue[0]));
                         $ConvertTable[func_get_arg($i)][$ArrayKey] = $ArrayValue;
                     }
                 }
             }
         }
     }
     if (!is_array($ConvertTable[$FromCharset])) {
         $ConvertTable[$FromCharset] = array();
     }
     if (func_num_args() > 1 && count($ConvertTable[$FromCharset]) == count($ConvertTable[$ToCharset]) && count(array_diff_assoc($ConvertTable[$FromCharset], $ConvertTable[$ToCharset])) == 0) {
         print $this->DebugOutput(1, 1, "{$FromCharset}, {$ToCharset}");
     }
     return $ConvertTable;
 }
Beispiel #10
0
function get_sitemap($input_file, $mysql_table_prefix)
{
    global $command_line;
    $s_map = simplexml_load_file($input_file);
    if ($s_map != '') {
        // if sitemap.xml was conform to XML version 1.0
        $links = array();
        foreach ($s_map as $url) {
            $the_url = str_replace("&amp;", "&", $url->loc);
            //$the_url = substr($the_url, 0, strrpos($the_url,'/'));
            $lastmod = strtotime($url->lastmod);
            // get lastmod date only for this page from sitemap
            $del = mysql_query("delete from " . $mysql_table_prefix . "temp");
            // function get_sitemap will build a new temp table
            $res = mysql_query("select indexdate from " . $mysql_table_prefix . "links where url like '%{$the_url}%'");
            $num_rows = mysql_num_rows($res);
            // do we already know this link?
            $indexdate = 0;
            if ($num_rows > 0) {
                $indexdate = strtotime(mysql_result($res, "indexdate"));
            }
            $new = $lastmod - $indexdate;
            if ($new > '0') {
                $links[] = $url->loc;
            }
            // add new link only if date from sitemap.xml is newer than date of last index
        }
        clean_resource($res);
        $links = explode(",", implode(",", $links));
        // destroy SimpleXMLElement Object and get link array
    }
    if ($links) {
        printStandardReport('validSitemap', $command_line);
    } else {
        printStandardReport('invalidSitemap', $command_line);
    }
    //echo "<br>Link´ Array:<br><pre>";print_r($links);echo "</pre>";
    return $links;
}