} } if ($clear == 1) { clean_resource($result, '01'); } } if (!isset($in)) { $in = ""; } if (!isset($out)) { $out = ""; } $started = time(); index_site($url, $reindex, $maxlevel, $soption, $in, $out, $can_leave, $use_robot, $use_nofollow, $cl, $all, $use_pref); $ended = time(); $consumed = $ended - $started; printConsumedReport('consumed', $cl, '0', $consumed); printStandardReport('ReindexFinish', $command_line, '0'); } printStandardReport('quit', $command_line, '0'); if ($email_log) { $indexed = $all == 1 ? 'ALL' : $url; $log_report = ""; if ($log_handle) { $log_report = "Log saved into {$log_file}"; } mail($admin_email, "Sphider indexing report", "Sphider has finished indexing {$indexed} at " . date("y-m-d H:i:s") . ". " . $log_report); } if ($log_handle) { fclose($log_handle); }
function check_for_removal($url) { global $mysql_table_prefix; global $command_line; $result = mysql_query("select link_id, visible from " . $mysql_table_prefix . "links" . " where url='{$url}'"); echo mysql_error(); if (mysql_num_rows($result) > 0) { $row = mysql_fetch_row($result); $link_id = $row[0]; $visible = $row[1]; if ($visible > 0) { $visible--; mysql_query("update " . $mysql_table_prefix . "links set visible={$visible} where link_id={$link_id}"); echo mysql_error(); } else { mysql_query("delete from " . $mysql_table_prefix . "links where link_id={$link_id}"); echo mysql_error(); for ($i = 0; $i <= 15; $i++) { $char = dechex($i); mysql_query("delete from " . $mysql_table_prefix . "link_keyword{$char} where link_id={$link_id}"); echo mysql_error(); } printStandardReport('pageRemoved', $command_line); } } }
function index_site($url, $reindex, $maxlevel, $soption, $url_inc, $url_not_inc, $can_leave_domain) { global $mysql_table_prefix, $command_line, $mainurl, $tmp_urls, $domain_arr, $all_keywords; if (!isset($all_keywords)) { $result = mysql_query("select keyword_ID, keyword from " . $mysql_table_prefix . "keywords"); echo mysql_error(); while ($row = mysql_fetch_array($result)) { $all_keywords[addslashes($row[1])] = $row[0]; } } $compurl = parse_url($url); if ($compurl['path'] == '') { $url = $url . "/"; } $t = microtime(); $a = getenv("REMOTE_ADDR"); $sessid = md5($t . $a); $urlparts = parse_url($url); $domain = $urlparts['host']; if (isset($urlparts['port'])) { $port = (int) $urlparts['port']; } else { $port = 80; } $result = mysql_query("select site_id from " . $mysql_table_prefix . "sites where url='{$url}'"); echo mysql_error(); $row = mysql_fetch_row($result); $site_id = $row[0]; if ($site_id != "" && $reindex == 1) { mysql_query("insert into " . $mysql_table_prefix . "temp (link, level, id) values ('{$url}', 0, '{$sessid}')"); echo mysql_error(); $result = mysql_query("select url, level from " . $mysql_table_prefix . "links where site_id = {$site_id}"); while ($row = mysql_fetch_array($result)) { $site_link = $row['url']; $link_level = $row['level']; if ($site_link != $url) { mysql_query("insert into " . $mysql_table_prefix . "temp (link, level, id) values ('{$site_link}', {$link_level}, '{$sessid}')"); } } $qry = "update " . $mysql_table_prefix . "sites set indexdate=now(), spider_depth = {$maxlevel}, required = '{$url_inc}'," . "disallowed = '{$url_not_inc}', can_leave_domain={$can_leave_domain} where site_id={$site_id}"; mysql_query($qry); echo mysql_error(); } else { if ($site_id == '') { mysql_query("insert into " . $mysql_table_prefix . "sites (url, indexdate, spider_depth, required, disallowed, can_leave_domain) " . "values ('{$url}', now(), {$maxlevel}, '{$url_inc}', '{$url_not_inc}', {$can_leave_domain})"); echo mysql_error(); $result = mysql_query("select site_ID from " . $mysql_table_prefix . "sites where url='{$url}'"); $row = mysql_fetch_row($result); $site_id = $row[0]; } else { mysql_query("update " . $mysql_table_prefix . "sites set indexdate=now(), spider_depth = {$maxlevel}, required = '{$url_inc}'," . "disallowed = '{$url_not_inc}', can_leave_domain={$can_leave_domain} where site_id={$site_id}"); echo mysql_error(); } } $result = mysql_query("select site_id, temp_id, level, count, num from " . $mysql_table_prefix . "pending where site_id='{$site_id}'"); echo mysql_error(); $row = mysql_fetch_row($result); $pending = $row[0]; $level = 0; $domain_arr = get_domains(); if ($pending == '') { mysql_query("insert into " . $mysql_table_prefix . "temp (link, level, id) values ('{$url}', 0, '{$sessid}')"); echo mysql_error(); } else { if ($pending != '') { printStandardReport('continueSuspended', $command_line); mysql_query("select temp_id, level, count from " . $mysql_table_prefix . "pending where site_id='{$site_id}'"); echo mysql_error(); $sessid = $row[1]; $level = $row[2]; $pend_count = $row[3] + 1; $num = $row[4]; $pending = 1; $tmp_urls = get_temp_urls($sessid); } } if ($reindex != 1) { mysql_query("insert into " . $mysql_table_prefix . "pending (site_id, temp_id, level, count) values ('{$site_id}', '{$sessid}', '0', '0')"); echo mysql_error(); } $time = time(); $omit = check_robot_txt($url); printHeader($omit, $url, $command_line); $mainurl = $url; $num = 0; while ($level <= $maxlevel && $soption == 'level' || $soption == 'full') { if ($pending == 1) { $count = $pend_count; $pending = 0; } else { $count = 0; } $links = array(); $result = mysql_query("select distinct link from " . $mysql_table_prefix . "temp where level={$level} && id='{$sessid}' order by link"); echo mysql_error(); $rows = mysql_num_rows($result); if ($rows == 0) { break; } $i = 0; while ($row = mysql_fetch_array($result)) { $links[] = $row['link']; } reset($links); while ($count < count($links)) { $num++; $thislink = $links[$count]; $urlparts = parse_url($thislink); reset($omit); $forbidden = 0; foreach ($omit as $omiturl) { $omiturl = trim($omiturl); $omiturl_parts = parse_url($omiturl); if ($omiturl_parts['scheme'] == '') { $check_omit = $urlparts['host'] . $omiturl; } else { $check_omit = $omiturl; } if (strpos($thislink, $check_omit)) { printRobotsReport($num, $thislink, $command_line); check_for_removal($thislink); $forbidden = 1; break; } } if (!check_include($thislink, $url_inc, $url_not_inc)) { printUrlStringReport($num, $thislink, $command_line); check_for_removal($thislink); $forbidden = 1; } if ($forbidden == 0) { printRetrieving($num, $thislink, $command_line); $query = "select md5sum, indexdate from " . $mysql_table_prefix . "links where url='{$thislink}'"; $result = mysql_query($query); echo mysql_error(); $rows = mysql_num_rows($result); if ($rows == 0) { index_url($thislink, $level + 1, $site_id, '', $domain, '', $sessid, $can_leave_domain, $reindex); mysql_query("update " . $mysql_table_prefix . "pending set level = {$level}, count={$count}, num={$num} where site_id={$site_id}"); echo mysql_error(); } else { if ($rows != 0 && $reindex == 1) { $row = mysql_fetch_array($result); $md5sum = $row['md5sum']; $indexdate = $row['indexdate']; index_url($thislink, $level + 1, $site_id, $md5sum, $domain, $indexdate, $sessid, $can_leave_domain, $reindex); mysql_query("update " . $mysql_table_prefix . "pending set level = {$level}, count={$count}, num={$num} where site_id={$site_id}"); echo mysql_error(); } else { printStandardReport('inDatabase', $command_line); } } } $count++; } $level++; } mysql_query("delete from " . $mysql_table_prefix . "temp where id = '{$sessid}'"); echo mysql_error(); mysql_query("delete from " . $mysql_table_prefix . "pending where site_id = '{$site_id}'"); echo mysql_error(); printStandardReport('completed', $command_line); }
function index_suspended() { global $db_con, $mysql_table_prefix, $command_line, $debug, $use_robot, $use_nofollow, $no_log, $clear, $started, $cl; $started = time(); $reindex = 0; printStandardReport('SuspendedStart', $command_line, $no_log); // get ID and URL of all sites $sql_query = "SELECT site_id, url from " . $mysql_table_prefix . "sites ORDER by url"; $result1 = $db_con->query($sql_query); if ($debug && $db_con->errno) { $err_row = __LINE__ - 2; printf("<p><span class='red'> MySQL failure: %s \n<br /></span></p>", $db_con->error); if (__FUNCTION__) { printf("<p><span class='red'> Found in script: " . __FILE__ . " row: {$err_row} in function(): " . __FUNCTION__ . " <br /></span></p>"); } else { printf("<p><span class='red'> Found in script: " . __FILE__ . " row: {$err_row} <br /></span></p>"); } printf("<p><span class='red'> Script execution aborted. <br /></span>"); printf("<p><strong>Invalid query string, which caused the SQL error:</strong></p>"); echo "<p> {$sql_query} </p>"; exit; } while ($row1 = $result1->fetch_array(MYSQLI_NUM)) { $url = $row1[1]; $site_id = $row1[0]; // check whether this site is pending $sql_query = "SELECT site_id from " . $mysql_table_prefix . "pending where site_id ={$site_id}"; $result2 = $db_con->query($sql_query); $row2 = $result2->fetch_array(MYSQLI_ASSOC); // if pending, continue indexing this URL if ($row2['site_id'] == $site_id) { // fetch all important data of this site $sql_query = "SELECT url, spider_depth, required, disallowed, can_leave_domain, use_prefcharset from " . $mysql_table_prefix . "sites where url='{$url}'"; $result = $db_con->query($sql_query); if ($row = $result->fetch_array(MYSQLI_NUM)) { $maxlevel = $row[1]; $in = $row[2]; $out = $row[3]; $domaincb = $row[4]; $use_prefcharset = $row[5]; if ($domaincb == '') { $domaincb = 0; } if ($maxlevel == -1) { $soption = 'full'; } else { $soption = 'level'; } } if ($clear == 1) { clean_resource($result, '21'); } if (!isset($in)) { $in = ""; } if (!isset($out)) { $out = ""; } // now indnex the rest of this site index_site($url, $reindex, $maxlevel, $soption, $in, $out, $domaincb, $use_robot, $use_nofollow, $cl, $all, $use_prefcharset); } } if ($clear == 1) { clean_resource($result, '20'); } $ended = time(); $consumed = $ended - $started; printConsumedReport('consumed', $cl, '0', $consumed); printStandardReport('SuspendedFinish', $command_line, '0'); create_footer(); }
sort($all_media); if ($clear == '1') { unset($id3_string, $element, $thisaudio, $title, $title_orig, $title_trans, $id3_string, $id3_rem, $name, $select); } // ********** find videos ********** mysqltest(); if ($index_video == '1') { $select = $videolist; // find only videos as defined in file 'video.txt' $element = 'video'; $title = ''; //get media placed in <video> elements $all_media = get_elements($element, $all_media, $raw_file, $regs, $trash1, $replace1, $handle, $store_file); if ($debug == '2') { // if debug mode, show details printStandardReport('newVideo', $command_line, $no_log); } foreach ($all_media as $thisvideo) { preg_match("/([\\/]?value|[\\/]?href|[\\/]?data|[\\/]?classid|[\\/]?src)\\s*=\\s*[\\'\"](.*?)[\\'\"]/si", $thisvideo[0], $this_video); if (($link = build_url($this_video[2], $url, $select, $thisvideo[0], $handle, $store_file)) != '') { // if valid URL was built $link = $db_con->real_escape_string($link); $handle = @fopen($link, "r"); if ($handle) { // really existing video, or dead link only @fclose($handle); $new_md5 = md5_file($link); // calculate checksum of new video $suffix = strtolower(substr($link, strrpos($link, "."))); $my_name = basename($link, $suffix); // try to find already indexed video with the same md5sum
function check_for_removal($url) { global $command_line; global $db; $result = $db->query("select link_id, visible from " . TABLE_PREFIX . "links" . " where url='{$url}'"); echo sql_errorstring(__FILE__, __LINE__); $row = $result->fetch(); $result->closeCursor(); if ($row) { $link_id = $row[0]; $visible = $row[1]; if ($visible > 0) { $visible--; $db->exec("update " . TABLE_PREFIX . "links set visible={$visible} where link_id={$link_id}"); echo sql_errorstring(__FILE__, __LINE__); } else { $db->exec("delete from " . TABLE_PREFIX . "links where link_id={$link_id}"); echo sql_errorstring(__FILE__, __LINE__); for ($i = 0; $i <= 15; $i++) { $char = dechex($i); $db->exec("delete from " . TABLE_PREFIX . "link_keyword{$char} where link_id={$link_id}"); echo sql_errorstring(__FILE__, __LINE__); } printStandardReport('pageRemoved', $command_line); } } }
// read .xls files into an array if (stristr($file, ".xls")) { $error = ''; require_once "" . $converter_dir . "/xls_reader.php"; $data = new Spreadsheet_Excel_Reader(); if ($mb == '1') { // if extention exists, change 'iconv' to mb_convert_encoding: $data->setUTFEncoder('mb'); } // set output encoding. $data->setOutputEncoding('UTF-8'); // read this document $data->read($file); $error = $data->_ole->error; if ($error == '1') { printStandardReport('xlsError', $command_line, $no_log); $result = 'ERROR'; } else { $result = ' '; $boundsheets = array(); $sheets = array(); $boundsheets = $data->boundsheets; // get all tables in this file $sheets = $data->sheets; // get content of all sheets in all tables if ($boundsheets) { foreach ($boundsheets as &$bs) { //$result .= "".$bs['name'].", "; // collect all table names in this file } if ($sheets) { foreach ($sheets as &$sheet) {
function index_new() { global $command_line, $use_robot; $reindex == 0; //include "admin_header.php"; printStandardReport('NewStart', $command_line); $result = mysql_query("select url, indexdate, spider_depth, required, disallowed, can_leave_domain from " . TABLE_PREFIX . "sites"); if (DEBUG > '0') { echo mysql_error(); } while ($row = mysql_fetch_row($result)) { $url = $row[0]; $indexdate = $row[1]; $depth = $row[2]; $include = $row[3]; $not_include = $row[4]; $can_leave_domain = $row[5]; if ($can_leave_domain == '') { $can_leave_domain = 0; } if ($depth == -1) { $soption = 'full'; } else { $soption = 'level'; } if ($indexdate == '') { index_site($url, 1, $depth, $soption, $include, $not_include, $can_leave_domain, $use_robot); } } clean_resource($result); printStandardReport('NewFinish', $command_line); create_footer(); }
function MakeConvertTable($FromCharset, $ToCharset = '') { global $home_charset, $cl, $command_line, $no_log; $ConvertTable = array(); for ($i = 0; $i < func_num_args(); $i++) { $FileName = func_get_arg($i); $FileName = "{$FileName}.txt"; if (!is_file(CONVERT_TABLES_DIR . $FileName)) { //print $this -> DebugOutput(0, 0, CONVERT_TABLES_DIR . $FileName); //Print an error message printConverterError($FileName, $cl); printTryHome($home_charset, $cl); $homeSet = str_ireplace('iso-', '', $home_charset); //$homeSet = str_ireplace ('iso','',$home_charset); $FileName = "{$homeSet}.txt"; if (!is_file(CONVERT_TABLES_DIR . $FileName)) { //print $this -> DebugOutput(0, 0, CONVERT_TABLES_DIR . $FileName); //Print an error message printConverterError($FileName, $cl); printStandardReport('abortedIndx', $command_line, $no_log); //echo "<p class='evrow'><a class='bkbtn' href='admin.php' title='Go back to Admin'>Back to admin</a></p>"; return ''; } } $FileWithEncTabe = fopen(CONVERT_TABLES_DIR . $FileName, "r") or die; //This die(); is just to make sure... while (!feof($FileWithEncTabe)) { if ($OneLine = trim(fgets($FileWithEncTabe, 1024))) { if (substr($OneLine, 0, 1) != "#") { $HexValue = preg_split("/[\\s,]+/", $OneLine, 3); //We need only first 2 values if (substr($HexValue[1], 0, 1) != "#") { $ArrayKey = strtoupper(str_replace(strtolower("0x"), "", $HexValue[1])); $ArrayValue = strtoupper(str_replace(strtolower("0x"), "", $HexValue[0])); $ConvertTable[func_get_arg($i)][$ArrayKey] = $ArrayValue; } } } } } if (!is_array($ConvertTable[$FromCharset])) { $ConvertTable[$FromCharset] = array(); } if (func_num_args() > 1 && count($ConvertTable[$FromCharset]) == count($ConvertTable[$ToCharset]) && count(array_diff_assoc($ConvertTable[$FromCharset], $ConvertTable[$ToCharset])) == 0) { print $this->DebugOutput(1, 1, "{$FromCharset}, {$ToCharset}"); } return $ConvertTable; }
function get_sitemap($input_file, $mysql_table_prefix) { global $command_line; $s_map = simplexml_load_file($input_file); if ($s_map != '') { // if sitemap.xml was conform to XML version 1.0 $links = array(); foreach ($s_map as $url) { $the_url = str_replace("&", "&", $url->loc); //$the_url = substr($the_url, 0, strrpos($the_url,'/')); $lastmod = strtotime($url->lastmod); // get lastmod date only for this page from sitemap $del = mysql_query("delete from " . $mysql_table_prefix . "temp"); // function get_sitemap will build a new temp table $res = mysql_query("select indexdate from " . $mysql_table_prefix . "links where url like '%{$the_url}%'"); $num_rows = mysql_num_rows($res); // do we already know this link? $indexdate = 0; if ($num_rows > 0) { $indexdate = strtotime(mysql_result($res, "indexdate")); } $new = $lastmod - $indexdate; if ($new > '0') { $links[] = $url->loc; } // add new link only if date from sitemap.xml is newer than date of last index } clean_resource($res); $links = explode(",", implode(",", $links)); // destroy SimpleXMLElement Object and get link array } if ($links) { printStandardReport('validSitemap', $command_line); } else { printStandardReport('invalidSitemap', $command_line); } //echo "<br>Link´ Array:<br><pre>";print_r($links);echo "</pre>"; return $links; }