function phpdigSearch($id_connect, $query_string, $option = 'start', $refine = 0, $refine_url = '', $lim_start = 0, $limite = 10, $browse = 0, $site = 0, $path = '', $relative_script_path = '.', $template = '', $adlog_flag = 0, $rssdf = '', $template_demo = '') { global $c, $phpdig_words_chars; $relative_script_path = $c['path'] . 'ext/phpdig'; // check input // $id_connect set in connect.php file // $query_string cleaned in $query_to_parse in search_function.php file if ($option != "start" && $option != "any" && $option != "exact") { $option = SEARCH_DEFAULT_MODE; } if ($refine != 0 && $refine != 1) { $refine = 0; } // $refine_url set in search_function.php file // $lim_start set in search_function.php file settype($limite, 'integer'); if ($limite != 10 && $limite != 30 && $limite != 100) { $limite = SEARCH_DEFAULT_LIMIT; } if ($browse != 0 && $browse != 1) { $browse = 0; } if (eregi("^[0-9]+[,]", $site)) { $tempbust = explode(",", $site); $site = $tempbust[0]; $path = $tempbust[1]; } settype($site, 'integer'); // now set to integer settype($path, 'string'); // make sure set to string if (!get_magic_quotes_gpc()) { $my_path = addslashes($path); } else { $my_path = addslashes(stripslashes($path)); $path = stripslashes($path); } if (empty($site) && empty($path)) { $refine = 0; } else { $refine = 1; } if ($path == "-###-") { $site = 0; $path = ""; $refine = 0; } // $relative_script_path set in search.php file // $template set in config.php file // $adlog_flag set in search.php file // $rssdf set in search.php file // $template_demo set in config.php $timer = new phpdigTimer('html'); $timer->start('All'); // init variables settype($maxweight, 'integer'); $ignore = ''; $ignore_common = ''; $ignore_message = ''; $ignore_commess = ''; $wheresite = ''; $wherepath = ''; $table_results = ''; $final_result = ''; $search_time = 0; $strings = ''; $num_tot = 0; $leven_final = ""; $exclude = array(); $nav_bar = ''; $pages_bar = ''; $previous_link = ''; $next_link = ''; $mtime = explode(' ', microtime()); $start_time = $mtime[0] + $mtime[1]; $timer->start('All backend'); $timer->start('parsing strings'); if (!$option) { $option = SEARCH_DEFAULT_MODE; } if (!in_array($option, array('start', 'any', 'exact'))) { return 0; } // the query was filled if ($query_string) { $common_words = phpdigComWords("{$relative_script_path}/includes/common_words.txt"); $like_start = array("start" => "", "any" => "", "exact" => ""); $like_end = array("start" => "%", "any" => "%", "exact" => "%"); $like_operator = array("start" => "like", "any" => "like", "exact" => "like"); if ($refine) { $wheresite = "AND spider.site_id = {$site} "; if ($path && strlen($path) > 0) { $my_path = str_replace(" ", "\\%20", $my_path); $wherepath = "AND spider.path = '{$my_path}' "; } $refine_url = "&refine=1&site={$site}&path=" . urlencode($path); } else { $refine_url = ""; } settype($lim_start, "integer"); if ($lim_start < 0) { $lim_start = 0; } $n_words = count(explode(" ", $query_string)); $ncrit = 0; $tin = "0"; if (!get_magic_quotes_gpc()) { $query_to_parse = addslashes($query_string); } else { $query_to_parse = $query_string; } $my_query_string_link = stripslashes($query_to_parse); $query_to_parse = str_replace('_', '\\_', $query_to_parse); // avoid '_' in the query $query_to_parse = str_replace('%', '\\%', $query_to_parse); // avoid '%' in the query $query_to_parse = str_replace('\\"', ' ', $query_to_parse); // avoid '"' in the query $query_to_parse = phpdigStripAccents(strtolower($query_to_parse)); //made all lowercase $what_query_chars = "[^" . $phpdig_words_chars[PHPDIG_ENCODING] . " \\'.\\_~@#\$:&\\%/;,=-]+"; // epure chars \'._~@#$:&%/;,=- if (eregi($what_query_chars, $query_to_parse)) { $query_to_parse = eregi_replace($what_query_chars, " ", $query_to_parse); } $query_to_parse = ereg_replace('([' . $phpdig_words_chars[PHPDIG_ENCODING] . '])[\'.\\_~@#$:&\\%/;,=-]+($|[[:space:]]$|[[:space:]][' . $phpdig_words_chars[PHPDIG_ENCODING] . '])', '\\1 \\2', $query_to_parse); $query_to_parse = trim(ereg_replace(" +", " ", $query_to_parse)); // no more than 1 blank $query_for_strings = $query_to_parse; $query_for_phrase = $query_to_parse; $test_short = $query_to_parse; $query_to_parse2 = explode(" ", $query_to_parse); usort($query_to_parse2, "phpdigByLength"); $query_to_parse = implode(" ", $query_to_parse2); if (isset($query_to_parse2)) { unset($query_to_parse2); } if (SMALL_WORDS_SIZE >= 1) { $ignore_short_flag = 0; $test_short_counter = 0; $test_short2 = explode(" ", $test_short); for ($i = 0; $i < count($test_short2); $i++) { $test_short2[$i] = trim($test_short2[$i]); } $test_short2 = array_unique($test_short2); sort($test_short2); $test_short3 = array(); for ($i = 0; $i < count($test_short2); $i++) { if (strlen($test_short2[$i]) <= SMALL_WORDS_SIZE && strlen($test_short2[$i]) > 0) { $test_short2[$i] .= " "; $test_short_counter++; $test_short3[] = $test_short2[$i]; } } $test_short = implode(" ", $test_short3); if (isset($test_short2)) { unset($test_short2); } if (isset($test_short3)) { unset($test_short3); } while (ereg('( [^ ]{1,' . SMALL_WORDS_SIZE . '} )|( [^ ]{1,' . SMALL_WORDS_SIZE . '})$|^([^ ]{1,' . SMALL_WORDS_SIZE . '} )', $test_short, $regs)) { for ($n = 1; $n <= 3; $n++) { if ($regs[$n] || $regs[$n] == 0) { $ignore_short_flag++; if (!eregi("\"" . trim(stripslashes($regs[$n])) . "\", ", $ignore)) { $ignore .= "\"" . trim(stripslashes($regs[$n])) . "\", "; } $test_short = trim(str_replace($regs[$n], "", $test_short)); } } } if (strlen($test_short) <= SMALL_WORDS_SIZE) { if (!eregi("\"" . $test_short . "\", ", $ignore)) { $ignore_short_flag++; $ignore .= "\"" . stripslashes($test_short) . "\", "; } $test_short = trim(str_replace($test_short, "", $test_short)); } } $ignore = str_replace("\"\", ", "", $ignore); if ($option != "exact") { if ($ignore && $ignore_short_flag > 1 && $test_short_counter > 1) { $ignore_message = $ignore . ' ' . phpdigMsg('w_short_plur'); } elseif ($ignore) { $ignore_message = $ignore . ' ' . phpdigMsg('w_short_sing'); } } $ignore_common_flag = 0; while (ereg("(-)?([^ ]{" . (SMALL_WORDS_SIZE + 1) . ",}).*", $query_for_strings, $regs)) { $query_for_strings = trim(str_replace($regs[2], "", $query_for_strings)); if (!isset($common_words[stripslashes($regs[2])])) { if ($regs[1] == '-') { $exclude[$ncrit] = $regs[2]; $query_for_phrase = trim(str_replace("-" . $regs[2], "", $query_for_phrase)); } else { $strings[$ncrit] = $regs[2]; } $kconds[$ncrit] = ''; if ($option != 'any') { $kconds[$ncrit] .= " AND k.twoletters = '" . addslashes(substr(str_replace('\\', '', $regs[2]), 0, 2)) . "' "; } $kconds[$ncrit] .= " AND k.keyword " . $like_operator[$option] . " '" . $like_start[$option] . $regs[2] . $like_end[$option] . "' "; $ncrit++; } else { $ignore_common_flag++; $ignore_common .= "\"" . stripslashes($regs[2]) . "\", "; } } if ($option != "exact") { if ($ignore_common && $ignore_common_flag > 1) { $ignore_commess = $ignore_common . ' ' . phpdigMsg('w_common_plur'); } elseif ($ignore_common) { $ignore_commess = $ignore_common . ' ' . phpdigMsg('w_common_sing'); } } $timer->stop('parsing strings'); if ($ncrit && is_array($strings)) { $query = "SET OPTION SQL_BIG_SELECTS = 1"; mysql_query($query, $id_connect); $my_spider2site_array = array(); $my_sitecount_array = array(); for ($n = 0; $n < $ncrit; $n++) { $timer->start('spider queries'); $query = "SELECT spider.spider_id,sum(weight) as weight, spider.site_id\r\n FROM " . PHPDIG_DB_PREFIX . "keywords as k," . PHPDIG_DB_PREFIX . "engine as engine, " . PHPDIG_DB_PREFIX . "spider as spider\r\n WHERE engine.key_id = k.key_id\r\n " . $kconds[$n] . "\r\n AND engine.spider_id = spider.spider_id {$wheresite} {$wherepath}\r\n GROUP BY spider.spider_id,spider.site_id "; $result = mysql_query($query, $id_connect); $num_res_temp = mysql_num_rows($result); $timer->stop('spider queries'); $timer->start('spider fills'); if ($num_res_temp > 0) { if (!isset($exclude[$n])) { $num_res[$n] = $num_res_temp; while (list($spider_id, $weight, $site_id) = mysql_fetch_array($result)) { $s_weight[$n][$spider_id] = $weight; $my_spider2site_array[$spider_id] = $site_id; $my_sitecount_array[$site_id] = 0; } } else { $num_exclude[$n] = $num_res_temp; while (list($spider_id, $weight) = mysql_fetch_array($result)) { $s_exclude[$n][$spider_id] = 1; } mysql_free_result($result); } } elseif (!isset($exclude[$n])) { $num_res[$n] = 0; $s_weight[$n][0] = 0; } $timer->stop('spider fills'); } $timer->start('reorder results'); if ($option != "any") { if (is_array($num_res)) { asort($num_res); list($id_most) = each($num_res); reset($s_weight[$id_most]); while (list($spider_id, $weight) = each($s_weight[$id_most])) { $weight_tot = 1; reset($num_res); while (list($n) = each($num_res)) { settype($s_weight[$n][$spider_id], 'integer'); $weight_tot *= sqrt($s_weight[$n][$spider_id]); } if ($weight_tot > 0) { $final_result[$spider_id] = $weight_tot; } } } } else { if (is_array($num_res)) { asort($num_res); while (list($spider_id, $site_id) = each($my_spider2site_array)) { $weight_tot = 0; reset($num_res); while (list($n) = each($num_res)) { settype($s_weight[$n][$spider_id], 'integer'); $weight_tot += sqrt($s_weight[$n][$spider_id]); } if ($weight_tot > 0) { $final_result[$spider_id] = $weight_tot; } } } } if (isset($num_exclude) && is_array($num_exclude)) { while (list($id) = each($num_exclude)) { while (list($spider_id) = each($s_exclude[$id])) { if (isset($final_result[$spider_id])) { unset($final_result[$spider_id]); } } } } if ($option == "exact") { if (is_array($final_result) && count($final_result) > 0) { $exact_phrase_flag = 0; arsort($final_result); reset($final_result); $query_for_phrase_array = explode(" ", $query_for_phrase); $reg_strings = str_replace('@#@', ' ', phpdigPregQuotes(str_replace('\\', '', implode('@#@', $query_for_phrase_array)))); $stop_regs = "[][(){}[:blank:]=&?!&#%\$£*@+%:;,/\\.'\"]"; $reg_strings = "({$stop_regs}{1}|^)({$reg_strings})({$stop_regs}{1}|\$)"; while (list($spider_id, $weight) = each($final_result)) { $content_file = $relative_script_path . '/' . TEXT_CONTENT_PATH . $spider_id . '.txt'; if (is_file($content_file)) { $f_handler = fopen($content_file, 'r'); $extract_content = preg_replace("/([ ]{2,}|\n|\r|\r\n)/", " ", fread($f_handler, filesize($content_file))); if (!eregi($reg_strings, $extract_content)) { $exact_phrase_flag = 1; } fclose($f_handler); } if ($exact_phrase_flag == 1) { if (isset($final_result[$spider_id])) { unset($final_result[$spider_id]); } $exact_phrase_flag = 0; } } } } if (!$refine && NUMBER_OF_RESULTS_PER_SITE != -1) { if (is_array($final_result) && count($final_result) > 0) { arsort($final_result); reset($final_result); while (list($spider_id, $weight) = each($final_result)) { $site_id = $my_spider2site_array[$spider_id]; $current_site_counter = $my_sitecount_array[$site_id]; if ($current_site_counter < NUMBER_OF_RESULTS_PER_SITE) { $my_sitecount_array[$site_id]++; } else { if (isset($final_result[$spider_id])) { unset($final_result[$spider_id]); } } } } } $timer->stop('reorder results'); } $timer->stop('All backend'); $timer->start('All display'); if (is_array($final_result) && count($final_result) > 0) { arsort($final_result); $lim_start = max(0, $lim_start - $lim_start % $limite); $n_start = $lim_start + 1; $num_tot = count($final_result); if ($n_start + $limite - 1 < $num_tot) { $n_end = $lim_start + $limite; $more_results = 1; } else { $n_end = $num_tot; $more_results = 0; } if ($n_start > $n_end) { $n_start = 1; $n_end = min($num_tot, $limite); $lim_start = 0; if ($n_end < $num_tot) { $more_results = 1; } } // ereg for text snippets and highlighting if ($option == "exact") { $reg_strings = str_replace('@#@', ' ', phpdigPregQuotes(str_replace('\\', '', implode('@#@', $query_for_phrase_array)))); } else { $reg_strings = str_replace('@#@', '|', phpdigPregQuotes(str_replace('\\', '', implode('@#@', $strings)))); } $stop_regs = "[][(){}[:blank:]=&?!&#%\$£*@+%:;,/\\.'\"]"; switch ($option) { case 'any': $reg_strings = "({$stop_regs}{1}|^)({$reg_strings})()"; break; case 'exact': $reg_strings = "({$stop_regs}{1}|^)({$reg_strings})({$stop_regs}{1}|\$)"; break; default: $reg_strings = "({$stop_regs}{1}|^)({$reg_strings})()"; } $timer->start('Result table'); //fill the results table reset($final_result); for ($n = 1; $n <= $n_end; $n++) { list($spider_id, $s_weight) = each($final_result); if (!$maxweight) { $maxweight = $s_weight; } if ($n >= $n_start) { $timer->start('Display queries'); $query = "SELECT sites.site_url, sites.port, spider.path,spider.file,spider.first_words,sites.site_id,spider.spider_id,spider.last_modified,spider.md5 " . "FROM " . PHPDIG_DB_PREFIX . "spider AS spider, " . PHPDIG_DB_PREFIX . "sites AS sites " . "WHERE spider.spider_id={$spider_id} AND sites.site_id = spider.site_id"; $result = mysql_query($query, $id_connect); $content = mysql_fetch_array($result, MYSQL_ASSOC); mysql_free_result($result); if ($content['port']) { $content['site_url'] = ereg_replace('/$', ':' . $content['port'] . '/', $content['site_url']); } $weight = sprintf("%01.2f", 100 * $s_weight / $maxweight); $url = eregi_replace("([" . $phpdig_words_chars[PHPDIG_ENCODING] . "])[/]{2,}", "\\1/", urldecode($content['site_url'] . $content['path'] . $content['file'])); $js_url = urlencode(eregi_replace("^[a-z]{3,5}://", "", $url)); $url = str_replace("\"", "%22", str_replace("'", "%27", str_replace(" ", "%20", trim($url)))); $l_site = "<a class='phpdig' href='" . SEARCH_PAGE . "?refine=1&template_demo=" . $template_demo . "&query_string=" . urlencode($my_query_string_link) . "&site=" . $content['site_id'] . "&limite={$limite}&option={$option}'>" . htmlspecialchars(urldecode($content['site_url']), ENT_QUOTES) . "</a>"; if ($content['path']) { $content['path'] = urlencode(urldecode($content['path'])); $content2['path'] = htmlspecialchars(urldecode($content['path']), ENT_QUOTES); $l_path = ", " . phpdigMsg('this_path') . " : <a class='phpdig' href='" . SEARCH_PAGE . "?refine=1&template_demo=" . $template_demo . "&query_string=" . urlencode($my_query_string_link) . "&site=" . $content['site_id'] . "&path=" . $content['path'] . "&limite={$limite}&option={$option}' >" . $content2['path'] . "</a>"; } else { $content2['path'] = ""; $l_path = ""; } $first_words = $content['first_words']; $timer->stop('Display queries'); $timer->start('Extracts'); $extract = ""; //Try to retrieve matching lines if the content-text is set to 1 if (CONTENT_TEXT == 1 && DISPLAY_SNIPPETS) { $content_file = $relative_script_path . '/' . TEXT_CONTENT_PATH . $content['spider_id'] . '.txt'; if (is_file($content_file)) { $num_extracts = 0; $my_extract_size = SNIPPET_DISPLAY_LENGTH; $my_filesize_for_while = filesize($content_file); while ($num_extracts == 0 && $my_extract_size <= $my_filesize_for_while) { // *** $f_handler = fopen($content_file, 'r'); while ($num_extracts < DISPLAY_SNIPPETS_NUM && ($extract_content = preg_replace("/([ ]{2,}|\n|\r|\r\n)/", " ", fread($f_handler, $my_extract_size)))) { if (eregi($reg_strings, $extract_content)) { $match_this_spot = eregi_replace($reg_strings, "\\1<\\2>\\3", $extract_content); $first_bold_spot = strpos($match_this_spot, "<"); $first_bold_spot = max($first_bold_spot - round(SNIPPET_DISPLAY_LENGTH / 2, 0), 0); $extract_content = substr($extract_content, $first_bold_spot, max(SNIPPET_DISPLAY_LENGTH, 2 * strlen($query_string))); $extract .= ' ...' . phpdigHighlight($reg_strings, $extract_content) . '... '; $num_extracts++; } } fclose($f_handler); if ($my_extract_size < $my_filesize_for_while) { $my_extract_size *= 100; if ($my_extract_size > $my_filesize_for_while) { $my_extract_size = $my_filesize_for_while; } } else { $my_extract_size++; } } // ends *** } } list($title, $text) = explode("\n", $first_words); $title = htmlspecialchars(phpdigHighlight($reg_strings, urldecode($title)), ENT_QUOTES); $title = phpdigSpanReplace($title); $timer->stop('Extracts'); $table_results[$n] = array('weight' => $weight, 'img_tag' => '<img border="0" src="' . WEIGHT_IMGSRC . '" width="' . ceil(WEIGHT_WIDTH * $weight / 100) . '" height="' . WEIGHT_HEIGHT . '" alt="" />', 'page_link' => "<a class=\"phpdig\" href=\"" . $url . "\" onmousedown=\"return clickit(" . $n . ",'" . $js_url . "')\" target=\"" . LINK_TARGET . "\" >" . $title . "</a>", 'limit_links' => phpdigMsg('limit_to') . " " . $l_site . $l_path, 'filesize' => sprintf('%.1f', ereg_replace('.*_([0-9]+)$', '\\1', $content['md5']) / 1024), 'update_date' => ereg_replace('^([0-9]{4})[-]?([0-9]{2})[-]?([0-9]{2}).*', PHPDIG_DATE_FORMAT, $content['last_modified']), 'complete_path' => $url, 'link_title' => $title); $table_results[$n]['text'] = ''; if (DISPLAY_SUMMARY) { $table_results[$n]['text'] = htmlspecialchars(phpdigHighlight($reg_strings, preg_replace("/([ ]{2,}|\n|\r|\r\n)/", " ", ereg_replace('(@@@.*)', '', wordwrap($text, SUMMARY_DISPLAY_LENGTH, '@@@')))), ENT_QUOTES); $table_results[$n]['text'] = phpdigSpanReplace($table_results[$n]['text']); } if (DISPLAY_SUMMARY && DISPLAY_SNIPPETS) { $table_results[$n]['text'] .= "\n<br/><br/>\n"; } if (DISPLAY_SNIPPETS) { if ($extract) { $extract = htmlspecialchars($extract, ENT_QUOTES); $extract = phpdigSpanReplace($extract); $table_results[$n]['text'] .= $extract; } else { if (!$table_results[$n]['text']) { $table_results[$n]['text'] = htmlspecialchars(phpdigHighlight($reg_strings, preg_replace("/([ ]{2,}|\n|\r|\r\n)/", " ", ereg_replace('(@@@.*)', '', wordwrap($text, SUMMARY_DISPLAY_LENGTH, '@@@')))), ENT_QUOTES); $table_results[$n]['text'] = phpdigSpanReplace($table_results[$n]['text']); } } } } } $timer->stop('Result table'); $timer->start('Final strings'); $url_bar = SEARCH_PAGE . "?template_demo={$template_demo}&browse=1&query_string=" . urlencode($my_query_string_link) . "{$refine_url}&limite={$limite}&option={$option}&lim_start="; if ($lim_start > 0) { $previous_link = $url_bar . ($lim_start - $limite); $nav_bar .= "<a class=\"phpdig\" href=\"{$previous_link}\" ><<" . phpdigMsg('previous') . "</a> \n"; } $tot_pages = ceil($num_tot / $limite); $actual_page = $lim_start / $limite + 1; $page_inf = max(1, $actual_page - 5); $page_sup = min($tot_pages, max($actual_page + 5, 10)); for ($page = $page_inf; $page <= $page_sup; $page++) { if ($page == $actual_page) { $nav_bar .= " <span class=\"phpdigHighlight\">{$page}</span> \n"; $pages_bar .= " <span class=\"phpdigHighlight\">{$page}</span> \n"; } else { $nav_bar .= " <a class=\"phpdig\" href=\"" . $url_bar . ($page - 1) * $limite . "\" >{$page}</a> \n"; $pages_bar .= " <a class=\"phpdig\" href=\"" . $url_bar . ($page - 1) * $limite . "\" >{$page}</a> \n"; } } if ($more_results == 1) { $next_link = $url_bar . ($lim_start + $limite); $nav_bar .= " <a class=\"phpdig\" href=\"{$next_link}\" >" . phpdigMsg('next') . ">></a>\n"; } $mtime = explode(' ', microtime()); $search_time = sprintf('%01.2f', $mtime[0] + $mtime[1] - $start_time); $result_message = stripslashes(ucfirst(phpdigMsg('results')) . " {$n_start}-{$n_end}, {$num_tot} " . phpdigMsg('total') . ", " . phpdigMsg('on') . " \"" . htmlspecialchars($query_string, ENT_QUOTES) . "\" ({$search_time} " . phpdigMsg('seconds') . ")"); $timer->stop('Final strings'); } else { if (is_array($strings)) { $strings = array_values($strings); $num_in_strings_arr = count($strings); } else { $num_in_strings_arr = 0; } $leven_final = ""; $leven_sum = 0; if ($num_in_strings_arr > 0 && strlen($path) == 0) { for ($i = 0; $i < $num_in_strings_arr; $i++) { $soundex_query = "SELECT keyword FROM " . PHPDIG_DB_PREFIX . "keywords WHERE SOUNDEX(CONCAT('Q',keyword)) = SOUNDEX(CONCAT('Q','" . $strings[$i] . "')) LIMIT 500"; $soundex_results = mysql_query($soundex_query, $id_connect); if (mysql_num_rows($soundex_results) > 0) { $leven_ind = 0; $leven_amt1 = 256; $leven_keyword = array(); while (list($soundex_keyword) = mysql_fetch_array($soundex_results)) { $leven_amt2 = min(levenshtein(stripslashes($strings[$i]), $soundex_keyword), $leven_amt1); if ($leven_amt2 < $leven_amt1 && $leven_amt2 >= 0 && $leven_amt2 <= 5) { $leven_keyword[$leven_ind] = stripslashes($soundex_keyword); $leven_ind++; } $leven_amt1 = $leven_amt2; } $leven_count = count($leven_keyword); $leven_sum = $leven_sum + $leven_amt1; if ($leven_count > 0) { $leven_final .= $leven_keyword[$leven_count - 1] . " "; } if (isset($leven_keyword)) { unset($leven_keyword); } } } } $num_tot = 0; $result_message = phpdigMsg('noresults'); if (strlen(trim($leven_final)) > 0 && $leven_sum > 0) { $leven_query = trim($leven_final); $result_message .= ". " . phpdigMsg('alt_try') . " <a class=\"phpdigMessage\" href=\"" . SEARCH_PAGE . "?template_demo={$template_demo}&query_string=" . urlencode($leven_query) . "\"><i>" . htmlspecialchars($leven_query, ENT_QUOTES) . "</i></a>?"; } } if (isset($tempresult)) { mysql_free_result($tempresult); } $title_message = stripslashes(ucfirst(phpdigMsg('s_results')) . " " . phpdigMsg('on') . " " . htmlspecialchars($query_string, ENT_QUOTES)); } else { $title_message = 'PhpDig ' . PHPDIG_VERSION; $result_message = phpdigMsg('no_query') . '.'; } $timer->start('Logs'); if (PHPDIG_LOGS == true && !$browse && !$refine && $adlog_flag == 0) { if (is_array($final_result)) { phpdigAddLog($id_connect, $option, $strings, $exclude, count($final_result), $search_time); } else { phpdigAddLog($id_connect, $option, $strings, $exclude, 0, $search_time); } } $timer->stop('Logs'); $timer->start('Template parsing'); $powered_by_link = "<font size=\"1\" face=\"verdana,arial,sans-serif\">"; if (ALLOW_RSS_FEED == true && !defined('LIST_LINKS') && $num_tot > 0) { $powered_by_link .= "<a href=\"" . $rssdf . "\">" . phpdigMsg('viewRSS') . "</a><br>"; } if (LIST_ENABLE == true && !defined('LIST_LINKS')) { if (!empty($template_demo)) { $listplate = "template_demo={$template_demo}"; } else { $listplate = ""; } $powered_by_link .= "<a href=\"" . LIST_PAGE . "?{$listplate}\">" . phpdigMsg('viewList') . "</a><br>"; } $powered_by_link .= "<a href=\"http://www.phpdig.net/\">" . phpdigMsg('powered_by') . "</a><br></font>"; if (is_array($strings)) { $js_string = implode(" ", $strings); } else { $js_string = ""; } $js_for_clicks = "\r\n<script language=\"JavaScript\">\r\n<!--\r\nfunction clickit(cn,clink) {\r\n if(document.images) {\r\n (new Image()).src=\"clickstats.php?num=\"+cn+\"&url=\"+clink+\"&val=" . urlencode($js_string) . "\";\r\n }\r\n return true;\r\n}\r\n//-->\r\n</script>\r\n"; if ($template == 'array' || is_file($template)) { $phpdig_version = PHPDIG_VERSION; $t_mstrings = compact('js_for_clicks', 'powered_by_link', 'title_message', 'phpdig_version', 'result_message', 'nav_bar', 'ignore_message', 'ignore_commess', 'pages_bar', 'previous_link', 'next_link'); $t_fstrings = phpdigMakeForm($query_string, $option, $limite, SEARCH_PAGE, $site, $path, 'template', $template_demo, $num_tot, $refine); if ($template == 'array') { return array_merge($t_mstrings, $t_fstrings, array('results' => $table_results)); } else { $t_strings = array_merge($t_mstrings, $t_fstrings); phpdigParseTemplate($template, $t_strings, $table_results); } } else { include $relative_script_path . '/libs/htmlheader.php'; ?> <head> <title><?php print $title_message; ?> </title> <?php include $relative_script_path . '/libs/htmlmetas.php'; ?> <style> .phpdigHighlight {color:<?php print HIGHLIGHT_COLOR; ?> ; background-color:<?php print HIGHLIGHT_BACKGROUND; ?> ; font-weight:bold; } .phpdigMessage {padding:1px;background-color:#002288;color:white;} </style> <script language="JavaScript"> <!-- function clickit(cn,clink) { if(document.images) { (new Image()).src="clickstats.php?num="+cn+"&url="+clink+"&val=<?php echo urlencode($js_string); ?> "; } return true; } //--> </script> </head> <body bgcolor="white"> <div align="center"> <img src="phpdig_logo_2.png" width="200" height="114" alt="phpdig <?php print PHPDIG_VERSION; ?> " border="0" /> <br /> <?php phpdigMakeForm($query_string, $option, $limite, SEARCH_PAGE, $site, $path, 'classic', $template_demo, $num_tot, $refine); ?> <h3><span class="phpdigMsg"><?php print $result_message; ?> </span> <br /><span class="phpdigAlert"><?php print $ignore_message; ?> </span> <br /><span class="phpdigAlert"><?php print $ignore_commess; ?> </span> </h3> </div> <?php if (is_array($table_results)) { while (list($n, $t_result) = each($table_results)) { print "<p style='background-color:#CCDDFF;'>\n"; print "<b>{$n}. <font style='font-size:10;'>[" . $t_result['weight'] . " %]</font> " . $t_result['page_link'] . "</b>\n<br />\n"; print "<font style='font-size:10;background-color:#BBCCEE;'>" . $t_result['limit_links'] . "</font>\n<br />\n"; print "</p>\n"; print "<blockquote style='background-color:#EEEEEE;font-size:10;'>\n"; print $t_result['text']; print "</blockquote>\n"; } } print "<p style='text-align:center;background-color:#CCDDFF;font-weight:bold'>\n"; print $nav_bar; print "</p>\n"; ?> <hr /> <div align="center"> <?php if ($query_string) { phpdigMakeForm($query_string, $option, $limite, SEARCH_PAGE, $site, $path, 'classic', $template_demo, $num_tot, $refine); } ?> </div> <div align='center'> <a href='http://www.phpdig.net/' target='_blank'><img src='phpdig_powered_2.png' width='88' height='28' border='0' alt='Powered by PhpDig' /></a> </div> </body> </html> <?php } $timer->stop('Template parsing'); $timer->stop('All display'); $timer->stop('All'); //$timer->display(); }
function phpdigMakeForm($query_string = "", $option = "start", $limite = 10, $result_page = "search.php", $site = "", $path = "", $mode = 'classic', $template_demo = "", $num_tot = 0, $refine = 0) { // $result_page is SEARCH_PAGE from config.php file // $mode is 'template' from search_function.php file $result = array(); if (!isset($option)) { $option = 'start'; } settype($limite, 'integer'); if ($limite == 0) { $limite = 10; } $check_start = array('start' => 'checked="checked"', 'any' => '', 'exact' => ''); $check_any = array('start' => '', 'any' => 'checked="checked"', 'exact' => ''); $check_exact = array('start' => '', 'any' => '', 'exact' => 'checked="checked"'); $limit10 = array(10 => 'selected="selected"', 30 => '', 100 => ''); $limit30 = array(10 => '', 30 => 'selected="selected"', 100 => ''); $limit100 = array(10 => '', 30 => '', 100 => 'selected="selected"'); $limitopt_flag = 0; if (!in_array(SEARCH_DEFAULT_LIMIT, array(10, 30, 100))) { $limitopt_flag = 1; $limit10[SEARCH_DEFAULT_LIMIT] = ""; $limit30[SEARCH_DEFAULT_LIMIT] = ""; $limit100[SEARCH_DEFAULT_LIMIT] = ""; $limitdef = array(10 => '', 30 => '', 100 => '', SEARCH_DEFAULT_LIMIT => 'selected="selected"'); $limitoptdef = "<option " . $limitdef[$limite] . ">" . SEARCH_DEFAULT_LIMIT . "</option>"; } $limitopt10 = "<option " . $limit10[$limite] . ">10</option>"; $limitopt30 = "<option " . $limit30[$limite] . ">30</option>"; $limitopt100 = "<option " . $limit100[$limite] . ">100</option>"; $limitselectopts = array(10 => $limitopt10, 30 => $limitopt30, 100 => $limitopt100); if ($limitopt_flag == 1) { $limitselectopts[SEARCH_DEFAULT_LIMIT] = $limitoptdef; ksort($limitselectopts); } $selectoptlist = ""; foreach ($limitselectopts as $selectopt) { $selectoptlist .= $selectopt; } $query_string2 = urlencode(stripslashes($query_string)); if (defined('LIST_LINKS')) { $result['templates_links'] = "\r\n<b>" . phpdigMsg('choose_temp') . "</b> : \r\n<a href='" . LIST_PAGE . "?template_demo=phpdig.html&" . LIST_ACTION . "&page=" . LIST_PAGES . "'><u>phpdig.html</u></a> \r\n<a href='" . LIST_PAGE . "?template_demo=black.html&" . LIST_ACTION . "&page=" . LIST_PAGES . "'><u>black.html</u></a> \r\n<a href='" . LIST_PAGE . "?template_demo=simple.html&" . LIST_ACTION . "&page=" . LIST_PAGES . "'><u>simple.html</u></a> \r\n<a href='" . LIST_PAGE . "?template_demo=green.html&" . LIST_ACTION . "&page=" . LIST_PAGES . "'><u>green.html</u></a> \r\n<a href='" . LIST_PAGE . "?template_demo=grey.html&" . LIST_ACTION . "&page=" . LIST_PAGES . "'><u>grey.html</u></a> \r\n<a href='" . LIST_PAGE . "?template_demo=yellow.html&" . LIST_ACTION . "&page=" . LIST_PAGES . "'><u>yellow.html</u></a> \r\n<a href='" . LIST_PAGE . "?template_demo=bluegrey.html&" . LIST_ACTION . "&page=" . LIST_PAGES . "'><u>bluegrey.html</u></a> \r\n<a href='" . LIST_PAGE . "?template_demo=terminal.html&" . LIST_ACTION . "&page=" . LIST_PAGES . "'><u>terminal.html</u></a> \r\n<a href='" . LIST_PAGE . "?template_demo=linear.html&" . LIST_ACTION . "&page=" . LIST_PAGES . "'><u>linear.html</u></a> \r\n<a href='" . LIST_PAGE . "?template_demo=lightgreen.html&" . LIST_ACTION . "&page=" . LIST_PAGES . "'><u>lightgreen.html</u></a> \r\n<a href='" . LIST_PAGE . "?template_demo=newspaper.html&" . LIST_ACTION . "&page=" . LIST_PAGES . "'><u>newspaper.html</u></a> \r\n<a href='" . LIST_PAGE . "?template_demo=corporate.html&" . LIST_ACTION . "&page=" . LIST_PAGES . "'><u>corporate.html</u></a> \r\n<a href='" . LIST_PAGE . "?template_demo=gaagle.html&" . LIST_ACTION . "&page=" . LIST_PAGES . "'><u>gaagle.html</u></a> \r\n"; } else { $result['templates_links'] = "\r\n<b>" . phpdigMsg('choose_temp') . "</b> : \r\n<a href='" . SEARCH_PAGE . "?template_demo=phpdig.html&result_page=" . SEARCH_PAGE . "&browse=1&query_string={$query_string2}&limite={$limite}&option={$option}'><u>phpdig.html</u></a> \r\n<a href='" . SEARCH_PAGE . "?template_demo=black.html&result_page=" . SEARCH_PAGE . "&browse=1&query_string={$query_string2}&limite={$limite}&option={$option}'><u>black.html</u></a> \r\n<a href='" . SEARCH_PAGE . "?template_demo=simple.html&result_page=" . SEARCH_PAGE . "&browse=1&query_string={$query_string2}&limite={$limite}&option={$option}'><u>simple.html</u></a> \r\n<a href='" . SEARCH_PAGE . "?template_demo=green.html&result_page=" . SEARCH_PAGE . "&browse=1&query_string={$query_string2}&limite={$limite}&option={$option}'><u>green.html</u></a> \r\n<a href='" . SEARCH_PAGE . "?template_demo=grey.html&result_page=" . SEARCH_PAGE . "&browse=1&query_string={$query_string2}&limite={$limite}&option={$option}'><u>grey.html</u></a> \r\n<a href='" . SEARCH_PAGE . "?template_demo=yellow.html&result_page=" . SEARCH_PAGE . "&browse=1&query_string={$query_string2}&limite={$limite}&option={$option}'><u>yellow.html</u></a> \r\n<a href='" . SEARCH_PAGE . "?template_demo=bluegrey.html&result_page=" . SEARCH_PAGE . "&browse=1&query_string={$query_string2}&limite={$limite}&option={$option}'><u>bluegrey.html</u></a> \r\n<a href='" . SEARCH_PAGE . "?template_demo=terminal.html&result_page=" . SEARCH_PAGE . "&browse=1&query_string={$query_string2}&limite={$limite}&option={$option}'><u>terminal.html</u></a> \r\n<a href='" . SEARCH_PAGE . "?template_demo=linear.html&result_page=" . SEARCH_PAGE . "&browse=1&query_string={$query_string2}&limite={$limite}&option={$option}'><u>linear.html</u></a> \r\n<a href='" . SEARCH_PAGE . "?template_demo=lightgreen.html&result_page=" . SEARCH_PAGE . "&browse=1&query_string={$query_string2}&limite={$limite}&option={$option}'><u>lightgreen.html</u></a> \r\n<a href='" . SEARCH_PAGE . "?template_demo=newspaper.html&result_page=" . SEARCH_PAGE . "&browse=1&query_string={$query_string2}&limite={$limite}&option={$option}'><u>newspaper.html</u></a> \r\n<a href='" . SEARCH_PAGE . "?template_demo=corporate.html&result_page=" . SEARCH_PAGE . "&browse=1&query_string={$query_string2}&limite={$limite}&option={$option}'><u>corporate.html</u></a> \r\n<a href='" . SEARCH_PAGE . "?template_demo=gaagle.html&result_page=" . SEARCH_PAGE . "&browse=1&query_string={$query_string2}&limite={$limite}&option={$option}'><u>gaagle.html</u></a> \r\n"; } if (DISPLAY_DROPDOWN) { $dropdown_flag = 0; $relative_script_path = '.'; if (is_file("{$relative_script_path}/includes/connect.php")) { include "{$relative_script_path}/includes/connect.php"; } else { die("Unable to find connect.php file for dropdown menu.\n"); } if ($num_tot == 0 || empty($site) && empty($path)) { $dropdown_flag = 1; $path = ""; } else { if (isset($site) && is_numeric($site) && $site > 0) { $site = (int) $site; if (DROPDOWN_URLS) { $dd_query = mysql_query('SELECT DISTINCT ' . PHPDIG_DB_PREFIX . 'sites.site_url AS ' . 'site_url,' . PHPDIG_DB_PREFIX . 'spider.path AS path ' . 'FROM ' . PHPDIG_DB_PREFIX . 'sites,' . PHPDIG_DB_PREFIX . 'spider ' . 'WHERE ' . PHPDIG_DB_PREFIX . 'sites.site_id = ' . $site . ' ' . 'AND ' . PHPDIG_DB_PREFIX . 'sites.site_id = ' . PHPDIG_DB_PREFIX . 'spider.site_id ' . 'AND ' . PHPDIG_DB_PREFIX . 'spider.path != ""', $id_connect); } else { $dd_query = mysql_query('SELECT DISTINCT path FROM ' . PHPDIG_DB_PREFIX . 'spider WHERE site_id = ' . $site . ' AND path != ""', $id_connect); } if (@mysql_num_rows($dd_query) > 0) { $result['form_head'] = "<form action='{$result_page}' method='post'>\r\n <input type='hidden' name='site' value='{$site}'/>\r\n <input type='hidden' name='refine' value='{$refine}'/>\r\n <input type='hidden' name='template_demo' value='{$template_demo}'/>\r\n <input type='hidden' name='result_page' value='{$result_page}'/>"; $result['form_dropdown'] = phpdigMsg('narrow_path') . ": <select name='path'>"; $result['form_dropdown'] .= "<option value='-###-'>" . phpdigMsg('searchall') . "</option>"; // value needs to be something unique, not blank while ($dd_data = mysql_fetch_array($dd_query)) { if ($path == $dd_data['path']) { $result['form_dropdown'] .= "<option value='" . $dd_data['path'] . "' selected>" . $dd_data['site_url'] . $dd_data['path'] . "</option>"; } else { $result['form_dropdown'] .= "<option value='" . $dd_data['path'] . "'>" . $dd_data['site_url'] . $dd_data['path'] . "</option>"; } } $result['form_dropdown'] .= "</select> <a href=\"{$result_page}\">" . phpdigMsg('restart') . "</a>"; } else { $dropdown_flag = 1; } } else { $dropdown_flag = 1; } } if ($dropdown_flag == 1) { if (DROPDOWN_URLS) { $dd_query = mysql_query('SELECT DISTINCT ' . PHPDIG_DB_PREFIX . 'sites.site_id AS ' . 'site_id,' . PHPDIG_DB_PREFIX . 'sites.site_url AS site_url,' . PHPDIG_DB_PREFIX . 'spider.path AS path ' . 'FROM ' . PHPDIG_DB_PREFIX . 'sites,' . PHPDIG_DB_PREFIX . 'spider ' . 'WHERE ' . PHPDIG_DB_PREFIX . 'sites.site_id = ' . PHPDIG_DB_PREFIX . 'spider.site_id', $id_connect); } else { $dd_query = mysql_query('SELECT site_id,site_url FROM ' . PHPDIG_DB_PREFIX . 'sites', $id_connect); } $result['form_head'] = "<form action='{$result_page}' method='post'>\r\n <input type='hidden' name='path' value='" . htmlspecialchars($path, ENT_QUOTES) . "'/>\r\n <input type='hidden' name='refine' value='{$refine}'/>\r\n <input type='hidden' name='template_demo' value='{$template_demo}'/>\r\n <input type='hidden' name='result_page' value='{$result_page}'/>"; $result['form_dropdown'] = phpdigMsg('select_site') . ": <select name='site'>"; $result['form_dropdown'] .= "<option value=''>" . phpdigMsg('searchall') . "</option>"; while ($dd_data = mysql_fetch_array($dd_query)) { $result['form_dropdown'] .= "<option value='" . $dd_data['site_id'] . "," . $dd_data['path'] . "'>" . $dd_data['site_url'] . $dd_data['path'] . "</option>"; } $result['form_dropdown'] .= "</select> <a href=\"{$result_page}\">" . phpdigMsg('restart') . "</a>"; } } else { $result['form_dropdown'] = ''; $result['form_head'] = "<form action='{$result_page}' method='post'>\r\n <input type='hidden' name='site' value='{$site}'/>\r\n <input type='hidden' name='path' value='" . htmlspecialchars($path, ENT_QUOTES) . "'/>\r\n <input type='hidden' name='template_demo' value='{$template_demo}'/>\r\n <input type='hidden' name='result_page' value='{$result_page}'/>\r\n "; } $result['form_foot'] = "</form>"; $result['form_title'] = phpdigMsg('search'); $result['form_field'] = "<input type='text' class='phpdiginputtext' size='" . SEARCH_BOX_SIZE . "' maxlength='" . SEARCH_BOX_MAXLENGTH . "' name='query_string' value='" . htmlspecialchars(stripslashes($query_string), ENT_QUOTES) . "'/>"; $result['form_select'] = phpdigMsg('display') . "\r\n <select name='limite' class='phpdigselect'>\r\n {$selectoptlist}\r\n </select>\r\n " . phpdigMsg('results') . "\r\n "; $result['form_button'] = "<input type='submit' class='phpdiginputsubmit' name='search' value='" . phpdigMsg('go') . "'/>"; $result['form_radio'] = "<input type=\"radio\" class='phpdiginputradio' name=\"option\" value=\"start\" " . $check_start[$option] . "/>" . phpdigMsg('w_begin') . " \r\n <input type=\"radio\" class='phpdiginputradio' name=\"option\" value=\"exact\" " . $check_exact[$option] . "/>" . phpdigMsg('w_whole') . " \r\n <input type=\"radio\" class='phpdiginputradio' name=\"option\" value=\"any\" " . $check_any[$option] . "/>" . phpdigMsg('w_part') . " \r\n "; if ($mode == 'classic') { extract($result); print $form_head; ?> <table class="borderCollapse"> <tr> <td class="blueForm"> <?php print $form_title; ?> </td> </tr> <tr> <td class="greyForm"> <?php print $form_field; ?> <?php print $form_button; ?> <?php print $form_select; ?> </td> </tr> <tr> <td class="greyForm"> <?php print $form_radio; ?> </td> </tr> </table> </form> <?php } else { return $result; } }
while (mysql_num_rows($result_id) > 0) { $query = "DELETE FROM " . PHPDIG_DB_PREFIX . "tempspider"; $result_id = mysql_query($query, $id_connect); sleep(5); // do not remove me as i MAY be needed to make a nice stop echo phpdigMsg('wait'); flush(); @ob_flush(); $query = "SELECT * FROM " . PHPDIG_DB_PREFIX . "tempspider"; $result_id = mysql_query($query, $id_connect); } } mysql_query('UPDATE ' . PHPDIG_DB_PREFIX . 'sites SET locked=0 WHERE locked=1', $id_connect); $query = "UPDATE " . PHPDIG_DB_PREFIX . "sites SET stopped=0"; $result_id = mysql_query($query, $id_connect); echo "<strong>" . phpdigMsg('done') . "</strong>"; } ?> <br /><br /> <a href="index.php?sid=<?php echo $sid; ?> " >[<?php phpdigPrnMsg('back'); ?> ]</a> <?php phpdigPrnMsg('to_admin'); ?> . <br /><br /> <a href='http://www.phpdig.net/' target='_blank'><img src='../phpdig_powered_2.png' width='88' height='28' border='0' alt='Powered by PhpDig' /></a>
"><?php print phpdigMsg('upd_sites'); ?> </a><br/> <a href="statistics.php?sid=<?php echo $sid; ?> "><?php print phpdigMsg('statistics'); ?> </a><br/> <a href="stop_spider.php?stop=1&sid=<?php echo $sid; ?> "><?php print phpdigMsg('StopSpider'); ?> </a><br/> </p> </td> </tr> <tr> <td colspan="2"> <p class="grey"> <?php phpdigPrnMsg('admin_msg_1'); ?> <br/> <?php phpdigPrnMsg('admin_msg_2'); ?>
phpdigFtpClose($ftp_id); print "Optimizing tables..." . $br; @mysql_query("OPTIMIZE TABLE " . PHPDIG_DB_PREFIX . "spider", $id_connect); @mysql_query("OPTIMIZE TABLE " . PHPDIG_DB_PREFIX . "engine", $id_connect); @mysql_query("OPTIMIZE TABLE " . PHPDIG_DB_PREFIX . "keywords", $id_connect); //display end of indexing phpdigPrnMsg('id_end'); if ($run_mode == 'http') { ?> <hr /> <a href="index.php?sid=<?php echo $sid; ?> " target="contentset">[<?php phpdigPrnMsg('back'); ?> ]</a> <?php phpdigPrnMsg('to_admin'); ?> . <?php if (isset($mode) && isset($site_id) && $mode == 'small') { print '<br /><a href="update_frame.php?sid=' . $sid . '&site_id=' . $site_id . '" target="contentset">[' . phpdigMsg('back') . ']</a> ' . phpdigMsg('to_update') . '.'; } ?> </body> </html> <?php } else { print $br; }
</h2> <?php $locks = phpdigMySelect($id_connect, 'SELECT locked FROM ' . PHPDIG_DB_PREFIX . 'sites WHERE locked = 1'); if (is_array($locks)) { phpdigPrnMsg('onelock'); } else { mysql_query('UPDATE ' . PHPDIG_DB_PREFIX . 'sites SET locked=1', $id_connect); $query = mysql_query("SELECT spider_id FROM " . PHPDIG_DB_PREFIX . "spider WHERE file = '';"); while ($row = mysql_fetch_array($query)) { mysql_query("DELETE FROM " . PHPDIG_DB_PREFIX . "engine WHERE spider_id=" . $row['spider_id'] . ";"); mysql_query("DELETE FROM " . PHPDIG_DB_PREFIX . "spider WHERE spider_id=" . $row['spider_id'] . ";"); phpdigDelText($relative_script_path, $row['spider_id']); $count++; echo $count . " "; } echo phpdigMsg('done'); mysql_query('UPDATE ' . PHPDIG_DB_PREFIX . 'sites SET locked=0', $id_connect); } ?> <br /><br /> <a href="index.php?sid=<?php echo $sid; ?> " >[<?php phpdigPrnMsg('back'); ?> ]</a> <?php phpdigPrnMsg('to_admin'); ?> . <br /><br />
function phpdigIndexFile($id_connect, $tempfile, $tempfilesize, $site_id, $origine, $localdomain, $path, $file, $content_type, $upddate, $last_modified, $tags, $ftp_id = '') { //globals global $allowed_link_chars, $phpdig_words_chars, $common_words, $relative_script_path, $s_yes, $s_no, $br; //current_date $date = date("YmdHis", time()); //settype($tempfile,'string'); if (!isset($tempfile) || !is_file($tempfile)) { return 0; } settype($page_desc, 'string'); settype($page_keywords, 'string'); if (APPEND_TITLE_META) { if (is_array($tags)) { if (isset($tags['description'])) { $page_desc = phpdigCleanHtml($tags['description']); } if (isset($tags['keywords'])) { $page_keywords = phpdigCleanHtml($tags['keywords']); } } } $file_content = file($tempfile); $textalts = ""; //verify the array $text is empty $n_chunk = 0; $n_cline = 0; $text[0] = ''; $exclude = false; foreach ($file_content as $num => $line) { if (trim($line)) { if ($content_type == 'HTML' && trim($line) == PHPDIG_EXCLUDE_COMMENT) { $exclude = true; } else { if (trim($line) == PHPDIG_INCLUDE_COMMENT) { $exclude = false; continue; } } if (!$exclude) { //extract alt attributes of images if (eregi("(alt=|title=)[[:blank:]]*[\\'\"][[:blank:]]*([ a-z0-9È-Ë]+)[[:blank:]]*[\\'\"]", $line, $regs)) { $textalts .= $regs[2]; } //extract the domains names not local and not banned to add in keywords while (eregi("<a([^>]*href[[:blank:]]*=[[:blank:]]*[\\'\"]?((([a-z]{3,5}://)+(([.a-zA-Z0-9-])+(:[0-9]+)*))*({$allowed_link_chars}\\[?{$allowed_link_chars}\\]?{$allowed_link_chars}))(#[.a-zA-Z0-9-]*)?[\\'\" ]?)", $line, $regs)) { $line = str_replace($regs[1], "", $line); if ($regs[5] && $regs[5] != $localdomain && !eregi(BANNED, $regs[2]) && ereg('[a-z]+', $regs[5])) { if (!isset($nbre_mots[$regs[5]])) { $nbre_mots[$regs[5]] = 1; } else { $nbre_mots[$regs[5]]++; } } } $n_cline++; //cut the text after $n_chunk characters if (strlen($text[$n_chunk]) > CHUNK_SIZE) { //cut only before an opening tag if ($content_type != 'HTML' or eregi("^[[:blank:]]*<[a-z]+[^>]*>", $line)) { $n_cline = 0; $n_chunk++; $text[$n_chunk] = " "; } } $text[$n_chunk] .= trim($line) . " "; } } } //store the number of chunks $max_chunk = $n_chunk; //free the array containing file content if (isset($file_content)) { unset($file_content); } $doc_title = ""; //purify from html tags and store the title if (is_array($text) && $content_type == 'HTML') { foreach ($text as $n_chunk => $chunk) { $chunk = phpdigCleanHtml($chunk); $text[$n_chunk] = trim($chunk['content']) . " "; $doc_title .= $chunk['title']; } } //set the title in order <title>, filename, or unknown if (isset($doc_title) && $doc_title) { $titre_resume = $doc_title; } elseif (isset($file) && $file) { $titre_resume = $file; } else { $titre_resume = "Untitled"; } //title and small description if (!is_array($page_desc)) { $page_desc['content'] = ''; } else { $page_desc['content'] = ' ' . $page_desc['content']; } $db_some_text = preg_replace("/([ ]{2,}|\n|\r|\r\n)/", " ", implode("", $text)); if (strlen($db_some_text) > SUMMARY_DISPLAY_LENGTH) { $db_some_text = substr($db_some_text, 0, SUMMARY_DISPLAY_LENGTH) . "..."; } $first_words = preg_replace("/([ ]{2,}|\n|\r|\r\n)/", " ", $titre_resume) . "\n" . preg_replace("/([ ]{2,}|\n|\r|\r\n)/", " ", $page_desc['content'] . $db_some_text) . "..."; //hashed string to detect doubles $md5 = md5($titre_resume . $page_desc['content'] . $text[$max_chunk]) . '_' . $tempfilesize; //double test : $phpdigTestDouble = phpdigTestDouble($id_connect, $site_id, $md5, $upddate, $last_modified); //if no double detected, continue indexing if ($phpdigTestDouble == 0) { $text_title = ""; //weight of title and description is there if (APPEND_TITLE_META) { for ($itl = 0; $itl < TITLE_WEIGHT; $itl++) { $text_title .= $doc_title . " " . $page_desc['content'] . " "; } $add_text = $text_title; if (is_array($textalts) && isset($textalts['content'])) { $add_text .= $textalts['content']; } if (is_array($page_keywords) && isset($page_keywords['content'])) { $add_text .= " " . $page_keywords['content']; } array_push($text, $add_text); } //words list and occurence of each of them $total = 0; foreach ($text as $n_chunk => $text2) { $text2 = phpdigEpureText($text2, SMALL_WORDS_SIZE); $separators = " "; if (isset($token)) { unset($token); } for ($token = strtok($text2, $separators); $token !== FALSE; $token = strtok($separators)) { if (!isset($nbre_mots[$token])) { $nbre_mots[$token] = 1; } else { $nbre_mots[$token]++; } $total++; } } $distinct_words = @count($nbre_mots); //modify the spider reccord $spider_id = phpdigUpdSpiderRow($id_connect, $site_id, $path, $file, $first_words, $upddate, $md5, $last_modified, $distinct_words, $tempfilesize); //here store extract the textual content (return a new ftp_id in case of reconnection) $ftp_id = phpdigWriteText($relative_script_path, $spider_id, $text, $ftp_id); //end of textual. //delete old engine reccord $query = "DELETE FROM " . PHPDIG_DB_PREFIX . "engine WHERE spider_id={$spider_id}"; mysql_query($query, $id_connect); //database insert $it = 0; $sqlvalues = ""; while (list($key, $value) = @each($nbre_mots)) { $key = trim($key); if (!get_magic_quotes_runtime()) { $key = addslashes($key); } //no small words nor stop words if (strlen($key) > SMALL_WORDS_SIZE and strlen($key) <= MAX_WORDS_SIZE and !isset($common_words[$key]) and ereg('^[' . $phpdig_words_chars[PHPDIG_ENCODING] . '#$]', $key)) { //if keyword exists, retrieve id, else insert it $requete = "SELECT key_id FROM " . PHPDIG_DB_PREFIX . "keywords WHERE keyword = '" . $key . "'"; $result_insert = mysql_query($requete, $id_connect); $num = mysql_num_rows($result_insert); if ($num == 0) { //inserts new keyword $requete = "INSERT INTO " . PHPDIG_DB_PREFIX . "keywords (keyword,twoletters) VALUES ('" . $key . "','" . addslashes(substr(str_replace('\\', '', $key), 0, 2)) . "')"; mysql_query($requete, $id_connect); $key_id = mysql_insert_id($id_connect); } else { //existing keyword $keyid = mysql_fetch_row($result_insert); mysql_free_result($result_insert); $key_id = $keyid[0]; } //New index record if ($it == 0) { $sqlvalues .= "({$spider_id},{$key_id},{$value})"; $it = 1; } else { $sqlvalues .= ",\n({$spider_id},{$key_id},{$value})"; } } } if (isset($nbre_mots)) { unset($nbre_mots); } //One query for the entire page $requete = "INSERT INTO " . PHPDIG_DB_PREFIX . "engine (spider_id,key_id, weight) VALUES {$sqlvalues}\n"; $result_insert = mysql_query($requete, $id_connect); print $s_yes; } else { $spider_id = -1; print $s_no . phpdigMsg('double') . $br; } if (isset($text)) { unset($text); } return $spider_id; }
$id_key = mysql_query($query, $id_connect); if (mysql_num_rows($id_key) < 1) { //if this key_id is not in engine database, delete it print "X "; $query_delete = "DELETE FROM " . PHPDIG_DB_PREFIX . "keywords WHERE key_id={$key_id}"; $id_del = mysql_query($query_delete, $id_connect); $del++; } else { print ". "; } mysql_free_result($id_key); } if ($del) { print "<br />{$del}" . phpdigMsg('keywordsnotok'); } else { print "<br />" . phpdigMsg('keywordsok'); } mysql_query('UPDATE ' . PHPDIG_DB_PREFIX . 'sites SET locked=0', $id_connect); } ?> <br /> <a href="index.php?sid=<?php echo $sid; ?> ">[<?php phpdigPrnMsg('back'); ?> ]</a> <?php phpdigPrnMsg('to_admin'); ?> .
echo $tdhtm, ' '; ?> </td> <td> <?php if (api_is_allowed_to_edit()) { echo '<textarea name="mdsc" id="mdsc" rows="10" cols="60">', htmlspecialchars($pkwc), '</textarea>'; } else { echo '<textarea name="mdsc" id="mdsc" class="dvc"> </textarea>'; } ?> </td> <td> <input type="hidden" id="kwdswere_string" name="kwdswere_string"/> <input type="submit" id="form_submit" onClick="document.getElementById('kwdswere_string').value = document.getElementById('kwds_string').value; return prepSearch(event);" value="<?php echo phpdigMsg('go'); ?> "/> </td> </tr> </table> <div id="popup" noWrap="1" class="pup"> Working... </div> </form> <div> </div>
<input type="text" id="kwds_string" class="kwl" onKeyUp="takeTypeIn(this, 150, -100, '60%'); checkEnter(event); return true;"/> <form action="<?php echo api_get_self()?>" method="post"> <table> <tr> <td><?php echo $tdhtm, ' '; ?> </td> <td> <?php if (api_is_allowed_to_edit()) echo '<textarea name="mdsc" id="mdsc" rows="10" cols="60">', htmlspecialchars($pkwc), '</textarea>'; else echo '<textarea name="mdsc" id="mdsc" class="dvc"> </textarea>'; ?> </td> <td> <input type="hidden" id="kwdswere_string" name="kwdswere_string"/> <input type="submit" id="form_submit" onClick="document.getElementById('kwdswere_string').value = document.getElementById('kwds_string').value; return prepSearch(event);" value="<?php echo phpdigMsg('go')?>"/> </td> </tr> </table> <div id="popup" noWrap="1" class="pup"> Working... </div> </form> <div> </div> <div noWrap="1" id="maindiv"> <?php if ($keywordscache == '') { ?>   <?php } else { ?>
} } } if (!$locked) { if ($path_name) { print "<a href='update.php?path=" . urlencode($path_name) . "&site_id={$site_id}&sid={$sid}&deny=1&sup=1#{$aname2}' target='_self' ><img src='deny.gif' width='10' height='10' border='0' align='middle' alt='" . phpdigMsg('exclude') . "' /></a> \n"; } else { print "<img src='fill.gif' width='10' height='10' border='0' align='middle' alt='' /> \n"; } print "<a href='update.php?site_id={$site_id}&sid=" . $sid . "&path=" . urlencode($path_name) . "&sup=1#{$aname2}' target='_self'><img src='no.gif' width='10' height='10' border='0' align='middle' alt='" . phpdigMsg('delete') . "' /></a> \n"; print "<a href='update.php?path=" . urlencode($path_name) . "&sid=" . $sid . "&site_id={$site_id}&exp=1' target='contentset'><img src='yes.gif' width='10' height='10' border='0' align='middle' alt='" . phpdigMsg('reindex') . "' /></a> \n"; } if ($path_name == "") { $path_name_aff = "<i><b style='color:red;'>" . phpdigMsg('root') . "</b></i>"; } print '<code>' . $path_name_aff . "</code> <a href='files.php?path=" . urlencode($path_name) . "&site_id={$site_id}&sid=" . $sid . "' target='files' ><img src='details.gif' width='10' height='10' border='0' align='middle' alt='" . phpdigMsg('files') . "' /></a><br />\n"; } ?> </p> <a href="index.php?sid=<?php echo $sid; ?> " target='contentset'>[<?php phpdigPrnMsg('back'); ?> ]</a> <?php phpdigPrnMsg('to_admin'); ?> . </body> </html>
$id_spider = mysql_query($query, $id_connect); if (mysql_num_rows($id_spider) < 1) { //if no-existent in the spider page, delete from engine $del++; print "X "; $query_delete = "DELETE FROM " . PHPDIG_DB_PREFIX . "engine WHERE spider_id={$spider_id}"; $id_del = mysql_query($query_delete, $id_connect); } else { print "- "; } mysql_free_result($id_spider); } if ($del) { print "<br />{$del}" . phpdigMsg('enginenotok'); } else { print "<br />" . phpdigMsg('engineok'); } mysql_query('UPDATE ' . PHPDIG_DB_PREFIX . 'sites SET locked=0', $id_connect); } ?> <br /> <a href="index.php?sid=<?php echo $sid; ?> " >[<?php phpdigPrnMsg('back'); ?> ]</a> <?php phpdigPrnMsg('to_admin'); ?> .
$res = mysql_query($query, $id_connect); if ($res) { while (list($key_id) = mysql_fetch_row($res)) { //delete references to this keyword in the engine table $query = "DELETE FROM " . PHPDIG_DB_PREFIX . "engine WHERE key_id={$key_id}"; mysql_query($query, $id_connect); $numdel = mysql_affected_rows($id_connect); print "{$numdel}" . phpdigMsg('deletedfor') . " " . stripslashes($common) . " ({$key_id})<br />"; $numtot += $numdel; } //delete this common word from the keywords table $query = "DELETE from " . PHPDIG_DB_PREFIX . "keywords where keyword like '{$common}'"; } mysql_query($query, $id_connect); } print "<h3>" . phpdigMsg('cleanuptotal') . "{$numtot}" . phpdigMsg('cleaned') . "</h3>"; mysql_query('UPDATE ' . PHPDIG_DB_PREFIX . 'sites SET locked=0', $id_connect); } ?> <br /><br /> <a href="index.php?sid=<?php echo $sid; ?> ">[<?php phpdigPrnMsg('back'); ?> ]</a> <?php phpdigPrnMsg('to_admin'); ?> . <br /><br />