function flattenTree($rootNode, $type, $allMessages, &$flattenedTree) { $status = 'ok'; $flattenedTree = array(); $IDN = new idna_convert(); foreach ($rootNode as $treeNode) { if (!isset($treeNode['message'])) { $flattenedSubtree = null; $result = flattenTree($treeNode, $type, $allMessages, $flattenedSubtree); foreach ($flattenedSubtree as $subtreeNode) { $flattenedTree[] = $subtreeNode; if ('warn' == $result && 'ok' == $status) { $status = 'warn'; } elseif ('error' == $result) { $status = 'error'; } } } elseif ('WARNING' == $treeNode['level'] || 'ERROR' == $treeNode['level'] || ('INFO' == $treeNode['level'] || 'NOTICE' == $treeNode['level']) && $allMessages) { if (is_null($treeNode['formatstring'])) { $caption = "-"; } else { $caption = sprintf($treeNode['formatstring'], $IDN->decode($treeNode['arg0']), $IDN->decode($treeNode['arg1']), $IDN->decode($treeNode['arg2']), $IDN->decode($treeNode['arg3']), $IDN->decode($treeNode['arg4']), $IDN->decode($treeNode['arg5']), $IDN->decode($treeNode['arg6']), $IDN->decode($treeNode['arg7']), $IDN->decode($treeNode['arg8']), $IDN->decode($treeNode['arg9'])); } $className = ''; switch ($treeNode['level']) { case 'WARNING': $className = 'warn'; break; case 'ERROR': $className = 'error'; break; case 'NOTICE': $className = 'notice'; break; } $flattenedTreeItem = array('type' => $type, 'class' => $className, 'caption' => $caption, 'subtree' => array()); if (!is_null($treeNode['description'])) { $flattenedTreeItem['description'] = $treeNode['description']; } $flattenedTree[] = $flattenedTreeItem; if ('WARNING' == $treeNode['level'] && 'ok' == $status) { $status = 'warn'; } elseif ('ERROR' == $treeNode['level']) { $status = 'error'; } } } return $status; }
public function punydecode($inputtext) { require_once 'assets/php/vendors/idna_convert_060/idna_convert.class.php'; require_once 'assets/php/vendors/idna_convert_060/transcode_wrapper.php'; $IDN = new idna_convert(); return $IDN->decode($this->response['inputtext']); }
/** * Decode IDN Punycode to UTF-8 domain name * * @param string $value Punycode * @return string Domain name in UTF-8 charset * * @author Igor V Belousov <*****@*****.**> * @copyright 2013 Igor V Belousov * @license http://opensource.org/licenses/LGPL-2.1 LGPL v2.1 * @link http://belousovv.ru/myscript/phpIDN */ public static function DecodePunycodeIDN($value) { Yii::import('application.vendors.punicode.*'); require_once Yii::getPathOfAlias('application.vendors.punicode') . '/idna_convert.class.php'; $IDN = new idna_convert(); // Encode it to its punycode presentation $output = $IDN->decode($value); return $output; }
public static function checkIdna($ref) { $content = ''; if ($ref == "") { $content .= "<font color=grey>неизвестно</font>"; } else { $content .= "<a target=_blank href=\"" . $ref . "\">"; if (stristr(urldecode($ref), "xn--")) { $IDN = new idna_convert(array('idn_version' => 2008)); $content .= $IDN->decode(urldecode($ref)); } else { $content .= urldecode($ref); } $content .= "</a>"; } return $content; }
/** * Converts given punycode to the IDN. * @param $value punycode to be converted. * @return string resulting IDN. * @since 1.1.13 */ private function decodeIDN($value) { require_once Yii::getPathOfAlias('system.vendors.idna_convert') . DIRECTORY_SEPARATOR . 'idna_convert.class.php'; $idnaConvert = new idna_convert(); return $idnaConvert->decode($value); }
header('Content-Type: text/html; charset=utf-8'); require_once 'idna_convert.class.php'; $idn_version = isset($_REQUEST['idn_version']) && $_REQUEST['idn_version'] == 2003 ? 2003 : 2008; $IDN = new idna_convert(array('idn_version' => $idn_version)); $version_select = '<select size="1" name="idn_version"><option value="2003">IDNA 2003</option><option value="2008"'; if ($idn_version == 2008) { $version_select .= ' selected="selected"'; } $version_select .= '>IDNA 2008</option></select>'; if (isset($_REQUEST['encode'])) { $decoded = isset($_REQUEST['decoded']) ? stripslashes($_REQUEST['decoded']) : ''; $encoded = $IDN->encode($decoded); } if (isset($_REQUEST['decode'])) { $encoded = isset($_REQUEST['encoded']) ? stripslashes($_REQUEST['encoded']) : ''; $decoded = $IDN->decode($encoded); } $lang = 'en'; if (isset($_REQUEST['lang'])) { if ('de' == $_REQUEST['lang'] || 'en' == $_REQUEST['lang']) { $lang = $_REQUEST['lang']; $add .= '<input type="hidden" name="lang" value="' . $lang . '" />' . "\n"; } } ?> <!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd"> <html xmlns="http://www.w3.org/1999/xhtml"> <head> <title>phlyLabs Punycode Converter</title> <meta name="author" content="phlyLabs" /> <meta http-equiv="content-type" content="text/html; charset=utf-8" />
public function actionDetail($detail) { $group = $_GET['group']; if (file_exists("detail.dat") and $group != "false") { $group = "true"; } if ($group != "true") { echo "<table id=table align=center width=100% cellpadding=5 cellspacing=1 border=0><tr class=h><td width=35>Время</td><td>Referer</td><td width=90>IP-адрес <a class=d href=\"?detail=" . $detail . "&group=true\"\"\">±</a></td><td>Хост</td><td>User-Agent</td><td>Страница</td></tr>"; // $r = mysql_query("SELECT tm,refer,ip,proxy,host,lang,user,req FROM cms_surf WHERE dt='" . $detail . "' ORDER BY i DESC"); $sql = "SELECT tm,refer,ip,proxy,host,lang,user,req FROM cms_surf WHERE dt='" . $detail . "' ORDER BY i DESC"; $command = Yii::app()->db->createCommand($sql); foreach ($command->queryAll() as $row) { //while ($row = mysql_fetch_row($r)) { if ($s == "s2") { $s = "s1"; echo "<tr class=s1>"; } else { $s = "s2"; echo "<tr class=s2>"; } echo "<td>" . $row['tm'] . "</td>"; echo "<td align=left style='overflow: hidden;text-overflow: ellipsis;'>"; $refer = $this->Ref($row['refer']); if (is_array($refer)) { list($engine, $query) = $refer; if ($engine == "G" and !empty($query) and stristr($row['refer'], "/url?")) { $row['refer'] = str_replace("/url?", "/search?", $row['refer']); } echo_se($engine); if (empty($query)) { $query = "<font color=grey>неизвестно</font>"; } echo ": <a target=_blank href=\"" . $row['refer'] . "\">" . $query . "</a></td>"; } else { if ($refer == "") { echo "<font color=grey>неизвестно</font>"; } else { echo "<a target=_blank href=\"" . $row['refer'] . "\">"; if (stristr(urldecode($row['refer']), "xn--")) { $IDN = new idna_convert(array('idn_version' => 2008)); echo $IDN->decode(urldecode($row['refer'])); } else { echo urldecode($row['refer']); } echo "</a></td>"; } } if ($row['ip'] != "unknown") { echo "<td><a target=_blank href=\"?item=ip&qs=" . $row['ip'] . "\">" . $row['ip'] . "</a>"; } else { echo "<td><font color=grey>неизвестно</font>"; } if ($row['proxy'] != "") { echo "<br><a target=_blank href=\"?item=ip&qs=" . $row['proxy'] . "\">через proxy</a>"; } echo "</td>"; if ($row['host'] == "") { echo "<td><font color=grey>неизвестно</font>"; } else { echo "<td><a target=_blank href=\"http://www.tcpiputils.com/browse/ip-address/" . ($row['proxy'] != "" ? $row['proxy'] : $row['ip']) . "\">" . $row['host'] . "</a>"; } if ($row['lang'] != "") { echo "<br>Язык: " . (!empty(StatsHelper::$LANG[mb_strtoupper($row['lang'])]) ? StatsHelper::$LANG[mb_strtoupper($row['lang'])] : "<font color=grey>неизвестно</font>"); if (file_exists("/stats/flags/" . mb_strtolower(StatsHelper::$LANG[mb_strtoupper($row['lang'])]) . ".gif")) { echo " <img align=absmiddle src=/stats/flags/" . mb_strtolower(StatsHelper::$LANG[mb_strtoupper($row['lang'])]) . ".gif width=16 height=12>"; } } echo "</td>"; echo "<td align=left style='overflow: hidden;text-overflow: ellipsis;'>"; if (!$this->is_robot($row['user'], $row['host'])) { $brw = StatsHelper::GetBrowser($row['user']); if ($brw != "") { echo "<img src=/stats/browsers/{$brw} width=16 height=16 align=absmiddle> "; } } echo $row['user'] . "</td>"; echo "<td align=left style='overflow: hidden;text-overflow: ellipsis;' nowrap><a target=_blank href=" . $row['req'] . ">" . $row['req'] . "</a></td>"; echo "</tr>"; } echo "<tr class=h><td></td><td></td><td></td><td></td><td></td><td></td></tr></table>"; //norm(0); } else { echo "<table id=table align=center width=100% cellpadding=5 cellspacing=1 border=0><tr class=h><td width=90>IP-адрес <a class=d href=\"?detail=" . $detail . "&group=false\"\"\">±</a></td><td>Хост</td><td>User-Agent</td><td width=30%>Referer</td><td width=35>Время</td><td>Страница</td></tr>"; $sql = "SELECT tm,refer,ip,proxy,host,lang,user,req FROM cms_surf WHERE dt='" . $detail . "' ORDER BY i DESC"; $command = Yii::app()->db->createCommand($sql); foreach ($command->queryAll() as $r) { //print_r($r); //die; //$rs = mysql_query("SELECT tm,refer,ip,proxy,host,lang,user,req FROM cms_surf WHERE dt='" . $detail . "' ORDER BY i DESC"); // while ($r = mysql_fetch_row($rs)) $row[$r['ip']][] = array($r['tm'], $r['refer'], $r['ip'], $r['proxy'], $r['host'], $r['lang'], $r['user']); foreach ($row as $ip => $val) { if ($s == "s2") { $s = "s1"; echo "<tr class=s1>"; } else { $s = "s2"; echo "<tr class=s2>"; } if ($ip != "unknown") { echo "<td rowspan=" . count($val) . "><a target=_blank href=\"?item=ip&qs=" . $ip . "\">" . $ip . "</a>"; } else { echo "<td><font color=grey>неизвестно</font>"; } if ($val[0][2] != "") { echo "<br><a target=_blank href=\"?item=ip&qs=" . $val[0][2] . "\">через proxy</a>"; } echo "</td>"; $skip = 0; foreach ($val as $k => $rw) { if ($skip != 0) { echo "<tr class=" . $s . ">"; } $skip = 1; if ($rw[3] == "") { echo "<td><font color=grey>неизвестно</font>"; } else { echo "<td><a target=_blank href=\"http://www.tcpiputils.com/browse/ip-address/" . ($rw[2] != "" ? $rw[2] : $ip) . "\">" . $rw[3] . "</a>"; } if ($rw[4] != "") { echo "<br>Язык: " . (!empty(StatsHelper::$LANG[mb_strtoupper($rw[4])]) ? $lang[mb_strtoupper($rw[4])] : "<font color=grey>неизвестно</font>"); if (file_exists("flags/" . mb_strtolower(StatsHelper::$LANG[mb_strtoupper($rw[4])]) . ".gif")) { echo " <img align=absmiddle src=flags/" . mb_strtolower(StatsHelper::$LANG[mb_strtoupper($rw[4])]) . ".gif width=16 height=12>"; } } echo "</td>"; echo "<td align=left style='overflow: hidden;text-overflow: ellipsis;'>"; if (!$this->is_robot($rw[5], $rw[3])) { $brw = StatsHelper::GetBrowser($rw[5]); if ($brw != "") { echo "<img src=browsers/{$brw} width=16 height=16 align=absmiddle> "; } } echo $rw[5] . "</td>"; echo "<td align=left style='overflow: hidden;text-overflow: ellipsis;'>"; $refer = $this->Ref($rw[1]); if (is_array($refer)) { list($engine, $query) = $refer; if ($engine == "G" and !empty($query) and stristr($rw[1], "/url?")) { $rw[1] = str_replace("/url?", "/search?", $rw[1]); } echo_se($engine); if (empty($query)) { $query = "<font color=grey>неизвестно</font>"; } echo ": <a target=_blank href=\"" . $rw[1] . "\">" . $query . "</a></td>"; } else { if ($refer == "") { echo "<font color=grey>неизвестно</font>"; } else { echo "<a target=_blank href=\"" . $row[1] . "\">"; if (stristr(urldecode($rw[1]), "xn--")) { $IDN = new idna_convert(array('idn_version' => 2008)); echo $IDN->decode(urldecode($rw[1])); } else { echo urldecode($rw[1]); } echo "</a></td>"; } } echo "<td>" . $rw[0] . "</td>"; echo "<td align=left style='overflow: hidden;text-overflow: ellipsis;' nowrap><a target=_blank href=" . $rw[6] . ">" . $rw[6] . "</a></td>"; echo "</tr>"; } } } echo "<tr class=h><td></td><td></td><td></td><td></td><td></td><td></td></tr></table>"; } }
<tr> <th>Дата</th> <th>Последние <?php echo $n; ?> других сайта</th> <th>Время / Страница</th> </tr> </thead> <?php foreach ($items as $ref) { echo "<tr>"; echo "<td title=" . StatsHelper::$MONTH[substr($ref['dt'], 4, 2)] . ">" . StatsHelper::$DAY[$ref['day']] . StatsHelper::dtconv($ref['dt']) . "</td>"; echo "<td class='textL'><a target=_blank href=\"" . $ref['refer'] . "\">"; if (stristr(urldecode($ref['refer']), "xn--")) { $IDN = new idna_convert(array('idn_version' => 2008)); echo $IDN->decode(urldecode($ref['refer'])); } else { echo urldecode($ref['refer']); } echo "</a></td>"; echo "<td class='textL'>" . $ref['tm'] . " <a target=_blank href=" . $ref['req'] . ">" . $ref['req'] . "</a></td></tr>"; } ?> </table> <?php Yii::app()->tpl->closeWidget(); ?>
define('CSP_PO_BASE_URL', WP_PLUGIN_URL . CSP_PO_PLUGINPATH); //Bugfix: ensure valid JSON requests at IDN locations! //Attention: Google Chrome and Safari behave in different way (shared WebKit issue or all other are wrong?)! if (stripos($_SERVER['HTTP_USER_AGENT'], 'chrome') !== false || stripos($_SERVER['HTTP_USER_AGENT'], 'safari') !== false || version_compare(phpversion(), '5.2.1', '<')) { if (function_exists("admin_url")) { define('CSP_PO_ADMIN_URL', rtrim(strtolower(admin_url()), '/')); } else { define('CSP_PO_ADMIN_URL', rtrim(strtolower(get_option('siteurl')) . '/wp-admin/', '/')); } } else { if (!class_exists('idna_convert')) { require_once 'includes/idna_convert.class.php'; } $idn = new idna_convert(); if (function_exists("admin_url")) { define('CSP_PO_ADMIN_URL', $idn->decode(rtrim(strtolower(admin_url()), '/'), 'utf8')); } else { define('CSP_PO_ADMIN_URL', $idn->decode(rtrim(strtolower(get_option('siteurl')) . '/wp-admin/', '/'), 'utf8')); } } define('CSP_PO_BASE_PATH', WP_PLUGIN_DIR . CSP_PO_PLUGINPATH); define('CSP_PO_MIN_REQUIRED_WP_VERSION', '2.5'); define('CSP_PO_MIN_REQUIRED_PHP_VERSION', '4.4.2'); register_activation_hook(__FILE__, 'csp_po_install_plugin'); } function csp_is_multisite() { return isset($GLOBALS['wpmu_version']) || function_exists('is_multisite') && is_multisite() || function_exists('wp_get_mu_plugins') && count(wp_get_mu_plugins()) > 0; } if (function_exists('csp_po_install_plugin')) { //rewrite and extend the error messages displayed at failed activation
if ($num & 1) { echo "\t<p class='odrow'>\n"; } else { echo "\t<p class='evrow'>\n"; } echo "\n {$num}. {$url}<br />\n "; } // clean url if ($idna && strstr($url, "xn--")) { require_once "{$include_dir}/idna_converter.php"; // Initialize the converter class $IDN = new idna_convert(array('idn_version' => 2008)); // The input string, if input is not UTF-8 or UCS-4, it must be converted before //$input = utf8_encode($url); // Decode it to its readable presentation $url = $IDN->decode($url); } $url = urldecode($db_con->real_escape_string($url)); $compurl = parse_url("" . $url); if ($compurl['path'] == '') { $url = $url . "/"; } $sql_query = "SELECT site_ID from " . $mysql_table_prefix . "sites where url='{$url}'"; $result = $db_con->query($sql_query); if ($debug && $db_con->errno) { $err_row = __LINE__ - 2; printf("<p><span class='red'> MySQL failure: %s \n<br /></span></p>", $db_con->error); if (__FUNCTION__) { printf("<p><span class='red'> Found in script: " . __FILE__ . " row: {$err_row} in function(): " . __FUNCTION__ . " <br /></span></p>"); } else { printf("<p><span class='red'> Found in script: " . __FILE__ . " row: {$err_row} <br /></span></p>");
function handle_url_tag($url, $link = '', $bbcode = false) { $return = ($hook = get_hook('ps_handle_url_tag_start')) ? eval($hook) : null; if ($return != null) { return $return; } $full_url = str_replace(array(' ', '\'', '`', '"'), array('%20', '', '', ''), $url); if (strpos($url, 'www.') === 0) { // If it starts with www, we add http:// $full_url = 'http://' . $full_url; } else { if (strpos($url, 'ftp.') === 0) { // Else if it starts with ftp, we add ftp:// $full_url = 'ftp://' . $full_url; } else { if (!preg_match('#^([a-z0-9]{3,6})://#', $url)) { // Else if it doesn't start with abcdef://, we add http:// $full_url = 'http://' . $full_url; } } } if (defined('FORUM_SUPPORT_PCRE_UNICODE') && defined('FORUM_ENABLE_IDNA')) { static $idn; static $cached_encoded_urls = null; if (is_null($cached_encoded_urls)) { $cached_encoded_urls = array(); } // Check in cache $cache_key = md5($full_url); if (isset($cached_encoded_urls[$cache_key])) { $full_url = $cached_encoded_urls[$cache_key]; } else { if (!isset($idn)) { $idn = new idna_convert(); $idn->set_parameter('encoding', 'utf8'); $idn->set_parameter('strict', false); } $full_url = $idn->encode($full_url); $cached_encoded_urls[$cache_key] = $full_url; } } // Ok, not very pretty :-) if (!$bbcode) { if (defined('FORUM_SUPPORT_PCRE_UNICODE') && defined('FORUM_ENABLE_IDNA')) { $link_name = $link == '' || $link == $url ? $url : $link; if (preg_match('!^(https?|ftp|news){1}' . preg_quote('://xn--', '!') . '!', $link_name)) { $link = $idn->decode($link_name); } } $link = $link == '' || $link == $url ? utf8_strlen($url) > 55 ? utf8_substr($url, 0, 39) . ' … ' . utf8_substr($url, -10) : $url : stripslashes($link); } $return = ($hook = get_hook('ps_handle_url_tag_end')) ? eval($hook) : null; if ($return != null) { return $return; } if ($bbcode) { if (defined('FORUM_SUPPORT_PCRE_UNICODE') && defined('FORUM_ENABLE_IDNA')) { if (preg_match('!^(https?|ftp|news){1}' . preg_quote('://xn--', '!') . '!', $link)) { $link = $idn->decode($link); } } if ($full_url == $link) { return '[url]' . $link . '[/url]'; } else { return '[url=' . $full_url . ']' . $link . '[/url]'; } } else { return '<a href="' . $full_url . '">' . $link . '</a>'; } }
<?php $rewrite = array(); $ttl = substr($_SERVER['REQUEST_URI'], 1, strpos($_SERVER['REQUEST_URI'], '.') - 1); $table = array('Й' => 'y', 'й' => 'y', 'Ц' => 'c', 'ц' => 'c', 'У' => 'u', 'у' => 'u', 'К' => 'k', 'к' => 'k', 'Е' => 'e', 'е' => 'e', 'Н' => 'n', 'н' => 'n', 'Г' => 'g', 'г' => 'g', 'Ш' => 'sh', 'ш' => 'sh', 'Щ' => 'sch', 'щ' => 'sch', 'З' => 'z', 'з' => 'z', 'Х' => 'h', 'х' => 'h', 'ъ' => '', 'Ъ' => '', 'Ф' => 'f', 'ф' => 'f', 'Ы' => 'y', 'ы' => 'y', 'В' => 'v', 'в' => 'v', 'А' => 'a', 'а' => 'a', 'П' => 'p', 'п' => 'p', 'Р' => 'r', 'р' => 'r', 'О' => 'o', 'о' => 'o', 'Л' => 'l', 'л' => 'l', 'Д' => 'd', 'д' => 'd', 'Ж' => 'zh', 'ж' => 'zh', 'Э' => 'e', 'э' => 'e', 'Я' => 'ya', 'я' => 'ya', 'Ч' => 'ch', 'ч' => 'ch', 'С' => 's', 'с' => 's', 'М' => 'm', 'м' => 'm', 'И' => 'i', 'и' => 'i', 'Т' => 't', 'т' => 't', 'Ь' => '', 'ь' => '', 'Б' => 'b', 'б' => 'b', 'Ю' => 'yu', 'ю' => 'yu', ' ' => '-', '.' => '', ',' => '', '?' => '', '!' => '', 'І' => 'i', 'і' => 'i', 'Ї' => 'yi', 'ї' => 'yi'); include_once 'idna_convert.class.php'; $idn = new idna_convert(); $h = $idn->decode($_SERVER['HTTP_HOST']); $h = substr($h, 0, strpos($h, '.')); $dh = opendir('./articles/'); while (false !== ($file = readdir($dh))) { if ($file != '.' && $file != '..' && substr($file, -4) == '.txt') { $handle = fopen("./articles/{$file}", "r"); $buffer = trim(fgets($handle)); fclose($handle); $b2 = trim(mb_strtolower(preg_replace('/[^\\w\\d\\p{Cyrillic}]+/u', '-', iconv('cp1251', 'utf-8', $buffer)), 'UTF-8'), '-'); if ($ttl == strtr($buffer, $table) || $b2 == $h) { if (!in_array($_SERVER['REQUEST_URI'], $rewrite) && preg_match('#^(www\\.)?mcdvo\\.com\\.ua$#', $_SERVER['HTTP_HOST'])) { header("HTTP/1.1 301 Moved Permanently"); header("Location: http://" . $idn->encode($b2) . '.mcdvo.com.ua/'); exit; } $content = file_get_contents('articles/' . $file); $title = substr($content, 0, strpos($content, "\n")); $content = '<p>' . str_replace("\n", "\n<br />", substr($content, strpos($content, "\n") + 1)) . "</p>"; break; } } } ?> <!DOCTYPE html>
public static function getInstanceFromServerType($type,$componentName) { // SalesPlatform.ru begin require_once 'includes/SalesPlatform/NetIDNA/idna_convert.class.php'; // SalesPlatform.ru end $db = PearDatabase::getInstance(); $query = 'SELECT * FROM '.self::tableName.' WHERE server_type=?'; $params = array($type); $result = $db->pquery($query,$params); try{ $modelClassName = Vtiger_Loader::getComponentClassName('Model', $componentName, 'Settings:Vtiger'); }catch(Exception $e) { $modelClassName = self; } $instance = new $modelClassName(); if($db->num_rows($result) > 0 ){ $rowData = $db->query_result_rowdata($result,0); $instance->setData($rowData); } // SalesPlatform.ru begin $idn = new idna_convert(); $mail_server_username = $idn->decode($instance->get('server_username')); $from_email_field = $idn->decode($instance->get('from_email_field')); $instance->set('server_username', $mail_server_username); $instance->set('from_email_field', $from_email_field); // SalesPlatform.ru end return $instance; }
} if ($pdf_orientation == 'L' or $pdf_orientation == 'l') { $pdf_width = 285; } else { $pdf_width = 195; } //Titel $this->SetFont('Arial', '', 12); $this->Cell($pdf_width - 15, 2, utf8_decode($fromname), 0, 1, 'C'); $this->SetFont('Arial', 'B', 8); // Include the class //include_once('idna_convert.class.php'); if (!class_exists('idna_convert')) { $path = clm_core::$path . DS . "includes" . DS . "idna_convert.class" . '.php'; require_once $path; } // Instantiate it (depending on the version you are using) with $IDN = new idna_convert(); // The input string $input = $_SERVER['HTTP_HOST']; // Encode it to its punycode presentation $output = $IDN->decode($input); $this->Cell($pdf_width - 15, 5, utf8_decode($output), 0, 1, 'C'); //Logo der Organisation (Landesverband, Verein, ...; über Einstellungen vorgegeben) rechts $file_headers = @get_headers($org_logo); if ($org_logo != '' and $file_headers[0] != 'HTTP/1.1 404 Not Found') { $this->Image($org_logo, $pdf_width - 20, 6, 15); } //Linie mit Zeilenumbruch $this->Line(15, 20, $pdf_width, 20); $this->Ln(5);
function gen_users_list(&$tpl, $reseller_id) { global $sql; global $cr_user_id; $query = <<<SQL_QUERY select admin_id from admin where admin_type = 'user' and created_by = ? order by admin_id SQL_QUERY; $ar = exec_query($sql, $query, array($reseller_id)); if ($ar->RowCount() == 0) { set_page_message(tr('You have no user records.')); header("Location: domain_alias.php"); die; $tpl->assign('USER_ENTRY', ''); return false; } $i = 1; while ($ad = $ar->FetchRow()) { // Process all founded users $admin_id = $ad['admin_id']; $selected = ''; // Get domain data $query = <<<SQL_QUERY select domain_id, IFNULL(domain_name, '') as domain_name from domain where domain_admin_id = ? SQL_QUERY; $dr = exec_query($sql, $query, array($admin_id)); $dd = $dr->FetchRow(); $domain_id = $dd['domain_id']; $domain_name = $dd['domain_name']; if ('' == $cr_user_id && $i == 1) { $selected = 'selected'; } else { if ($cr_user_id == $domain_id) { $selected = 'selected'; } } $IDN = new idna_convert(); $domain_name = $IDN->decode($domain_name); $domain_name = utf8_decode($domain_name); $tpl->assign(array('USER' => $domain_id, 'USER_DOMAIN_ACCOUN' => $domain_name, 'SELECTED' => $selected)); $i++; $tpl->parse('USER_ENTRY', '.user_entry'); } //End of loop return true; }
function decodeIdna($domain) { if (strpos($domain, 'xn--') !== FALSE) { require_once _XE_PATH_ . 'libs/idna_convert/idna_convert.class.php'; $IDN = new idna_convert(array('idn_version' => 2008)); $domain = $IDN->decode($domain); } return $domain; }
public static function getHost() { // если вызван из командной строки // ожидаем параметр с именем домена, например команда для CRON // php -f /path_to_site/cron.php site.ru if (PHP_SAPI == 'cli') { global $argv; return isset($argv[1]) ? $argv[1] : ''; } // если интернационализованный домен if (mb_strpos($_SERVER['HTTP_HOST'], 'xn--') !== false) { self::loadClass('idna_convert'); $IDN = new idna_convert(); return $IDN->decode($_SERVER['HTTP_HOST']); } return $_SERVER['HTTP_HOST']; }
//классы стилей добавлять в frontend/web/css/our-cars.css //$accountModel->name Название аккаунта //$accountModel->phone телефон //$accountModel->url - ссылка на сайт //$accountModel->address - адрес салона //$accountModel->balance - баланс use yii\helpers\Html; use yii\helpers\Url; use yii\widgets\ListView; use yii\web\View; $this->registerCssFile('/css/ourCars.css', ['depends' => [\yii\bootstrap\BootstrapAsset::className()], 'position' => View::POS_END]); $Punycode = new \idna_convert(array('idn_version' => 2008)); $siteUrl = parse_url($accountModel->url); if (isset($siteUrl['host'])) { $siteUrl['host'] = $Punycode->decode($siteUrl['host']); $siteUrl = Html::encode($siteUrl['scheme'] . '://' . $siteUrl['host']); } else { $siteUrl = Html::encode($accountModel->url); } ?> <div class="container system-our-cars"> <div class="row" style="margin-bottom:20px;"> <div class="col-xs-12"> <?php echo $this->render('_breadscrumb2', ['model' => $accountModel]); ?> </div> </div>
function addsite($url, $title, $short_desc, $cat, $def_include, $smap_url, $authent, $prior_level) { global $db_con, $mysql_table_prefix, $debug, $dba_act, $common_dir, $add_auth, $home_charset, $curl; global $depth, $domaincb, $use_prefcharset, $include_dir, $idna, $conv_puny, $cyrillic; if ($conv_puny && strstr($url, "xn--")) { require_once "{$include_dir}/idna_converter.php"; // Initialize the converter class $IDN = new idna_convert(array('idn_version' => 2008)); // Decode it to its readable presentation $url = $IDN->decode($url); } $url = urldecode($url); // get it readable if ($cyrillic) { $url = to_utf8($url); // because of the bug in PHP function urldecode() we need special processing for CP1252 charset } $compurl = parse_url("" . $url); // we will need all details of the URL // https scheme requires cURL extension if (!$curl && $compurl['scheme'] == "https") { $message = "<p class='msg cntr'><br /><br /><span class='warnadmin'>Sorry, but in order to index URLs containing the https scheme,<br />you need to install the cURL extension on your server.</span><br /><br /><br /></p>"; echo "{$message}"; addsiteform(); exit; } // find out whether the URL contains www. or only basic domain // also remove scheme (http <-> https // only one will be accepted as new URL to be added to the database $url1 = $compurl['host'] . "" . $compurl['path']; $url1 = str_replace("www.", "", $url1); if ($compurl['path'] == '') { $url1 = $url1 . "/"; } if (strpos($url1, "?")) { $url1 = substr($url1, 0, strpos($url1, "?")); // remove arguments } $url1 = $db_con->real_escape_string($url1); // now check against already existing site URLs $sql_query = "SELECT site_ID from " . $mysql_table_prefix . "sites where url like'%{$url1}%'"; $result = $db_con->query($sql_query); if ($debug && $db_con->errno) { $err_row = __LINE__ - 2; printf("<p><span class='red'> MySQL failure: %s \n<br /></span></p>", $db_con->error); if (__FUNCTION__) { printf("<p><span class='red'> Found in script: " . __FILE__ . " row: {$err_row} in function(): " . __FUNCTION__ . " <br /></span></p>"); } else { printf("<p><span class='red'> Found in script: " . __FILE__ . " row: {$err_row} <br /></span></p>"); } printf("<p><span class='red'> Script execution aborted. <br /></span>"); printf("<p><strong>Invalid query string, which caused the SQL error:</strong></p>"); echo "<p> {$sql_query} </p>"; exit; } $rows = $result->num_rows; if ($rows == 0) { $must_include = ''; $must_not_include = ''; if ($def_include == '1') { // get default values for URL 'must_include' and 'must_not_include' $must_include = addslashes(@file_get_contents("{$common_dir}/must_include.txt")); $must_not_include = addslashes(@file_get_contents("{$common_dir}/must_not_include.txt")); } // valid sitemap url? if (!preg_match("/http:\\/\\//", $smap_url)) { $smap_url = 'NULL'; } $sql_query = "SELECT * from " . $mysql_table_prefix . "sites"; $result = $db_con->query($sql_query); if ($debug && $db_con->errno) { $err_row = __LINE__ - 2; printf("<p><span class='red'> MySQL failure: %s \n<br /></span></p>", $db_con->error); if (__FUNCTION__) { printf("<p><span class='red'> Found in script: " . __FILE__ . " row: {$err_row} in function(): " . __FUNCTION__ . " <br /></span></p>"); } else { printf("<p><span class='red'> Found in script: " . __FILE__ . " row: {$err_row} <br /></span></p>"); } printf("<p><span class='red'> Script execution aborted. <br /></span>"); printf("<p><strong>Invalid query string, which caused the SQL error:</strong></p>"); echo "<p> {$sql_query} </p>"; exit; } $count = $result->num_rows; // current count of URLs in table 'sites' // clean admin's input $url = $db_con->real_escape_string(substr(trim($url), 0, 1024)); if ($title) { $title = $db_con->real_escape_string(cleaninput(substr(trim($title), 0, 255))); } if ($short_desc) { $short_desc = $db_con->real_escape_string(cleaninput(trim($short_desc))); } if ($disallowed) { $disallowed = $db_con->real_escape_string(cleaninput(trim($disallowed))); } if ($smap_url) { $smap_url = $db_con->real_escape_string(substr(trim($smap_url), 0, 1024)); } if ($authent) { $authent = $db_con->real_escape_string(cleaninput(substr(trim($authent), 0, 255))); } // insert new URL into sites table $sql_query = "INSERT INTO " . $mysql_table_prefix . "sites (url, title, short_desc, spider_depth, required, disallowed, can_leave_domain, db, smap_url, authent, use_prefcharset, prior_level)\n VALUES ('{$url}', '{$title}', '{$short_desc}', '{$depth}', '{$must_include}', '{$must_not_include}', '{$domaincb}', '{$dba_act}', '{$smap_url}', '{$authent}', '{$use_prefcharset}', '{$prior_level}')"; $db_con->query($sql_query); if ($debug && $db_con->errno) { $err_row = __LINE__ - 2; printf("<p><span class='red'> MySQL failure: %s \n<br /></span></p>", $db_con->error); if (__FUNCTION__) { printf("<p><span class='red'> Found in script: " . __FILE__ . " row: {$err_row} in function(): " . __FUNCTION__ . " <br /></span></p>"); } else { printf("<p><span class='red'> Found in script: " . __FILE__ . " row: {$err_row} <br /></span></p>"); } printf("<p><span class='red'> Script execution aborted. <br /></span>"); printf("<p><strong>Invalid query string, which caused the SQL error:</strong></p>"); echo "<p> {$sql_query} </p>"; exit; } $sql_query = "SELECT site_ID from " . $mysql_table_prefix . "sites where url='{$url}'"; $result = $db_con->query($sql_query); if ($db_con->errno) { $err_row = __LINE__ - 2; printf("<p><span class='red'> MySQL failure: %s \n<br /></span></p>", $db_con->error); if (__FUNCTION__) { printf("<p><span class='red'> Found in script: " . __FILE__ . " row: {$err_row} in function(): " . __FUNCTION__ . " <br /></span></p>"); } else { printf("<p><span class='red'> Found in script: " . __FILE__ . " row: {$err_row} <br /></span></p>"); } printf("<p><span class='red'> Script execution aborted. <br /></span>"); echo "<p> {$sql_query} </p>"; exit; } $row = $result->fetch_array(MYSQLI_NUM); $site_id = $row[0]; $sql_query = "SELECT category_id from " . $mysql_table_prefix . "categories"; $result = $db_con->query($sql_query); if ($debug && $db_con->errno) { $err_row = __LINE__ - 2; printf("<p><span class='red'> MySQL failure: %s \n<br /></span></p>", $db_con->error); if (__FUNCTION__) { printf("<p><span class='red'> Found in script: " . __FILE__ . " row: {$err_row} in function(): " . __FUNCTION__ . " <br /></span></p>"); } else { printf("<p><span class='red'> Found in script: " . __FILE__ . " row: {$err_row} <br /></span></p>"); } printf("<p><span class='red'> Script execution aborted. <br /></span>"); printf("<p><strong>Invalid query string, which caused the SQL error:</strong></p>"); echo "<p> {$sql_query} </p>"; exit; } while ($row = $result->fetch_array(MYSQLI_NUM)) { $cat_id = $row[0]; if ($cat[$cat_id] == 'on') { $db_con->query("INSERT INTO " . $mysql_table_prefix . "site_category (site_id, category_id) values ('{$site_id}', '{$cat_id}')"); } } if (!$db_con->errno) { $message = "<p class='msg'> New Site added to database {$dba_act} ...</p>"; } } else { echo "<br />\n <p class='msg cntr'><span class='warnadmin'> {$url} </span></p>\n <br />\n <p class='msg cntr'><span class='warnadmin'> Site already in database </span></p>\n <br />\n "; addsiteform(); exit; } // delete all invalid URLs from table 'sites' $sql_query = "DELETE from " . $mysql_table_prefix . "sites where site_id='0' OR site_id=''"; $db_con->query($sql_query); if ($debug && $db_con->errno) { $err_row = __LINE__ - 2; printf("<p><span class='red'> MySQL failure: %s \n<br /></span></p>", $db_con->error); if (__FUNCTION__) { printf("<p><span class='red'> Found in script: " . __FILE__ . " row: {$err_row} in function(): " . __FUNCTION__ . " <br /></span></p>"); } else { printf("<p><span class='red'> Found in script: " . __FILE__ . " row: {$err_row} <br /></span></p>"); } printf("<p><span class='red'> Script execution aborted. <br /></span>"); printf("<p><strong>Invalid query string, which caused the SQL error:</strong></p>"); echo "<p> {$sql_query} </p>"; exit; } $sql_query = "SELECT* from " . $mysql_table_prefix . "sites"; $result = $db_con->query($sql_query); if ($debug && $db_con->errno) { $err_row = __LINE__ - 2; printf("<p><span class='red'> MySQL failure: %s \n<br /></span></p>", $db_con->error); if (__FUNCTION__) { printf("<p><span class='red'> Found in script: " . __FILE__ . " row: {$err_row} in function(): " . __FUNCTION__ . " <br /></span></p>"); } else { printf("<p><span class='red'> Found in script: " . __FILE__ . " row: {$err_row} <br /></span></p>"); } printf("<p><span class='red'> Script execution aborted. <br /></span>"); printf("<p><strong>Invalid query string, which caused the SQL error:</strong></p>"); echo "<p> {$sql_query} </p>"; exit; } $new_count = $result->num_rows; // count of URLs after adding new site if ($count == $new_count) { $message = "<p class='msg'> New Site not added to database {$dba_act}, because invlid</p>"; } return $message; }
function index_url($url, $level, $site_id, $md5sum, $domain, $indexdate, $sessid, $can_leave_domain, $reindex, $use_nofollow, $cl, $use_robot, $use_pref, $url_inc, $url_not_inc, $num) { global $db_con, $entities, $min_delay, $link_check, $command_line, $min_words_per_page, $dup_content, $dup_url, $quotes, $plus_nr, $use_prefcharset; global $min_words_per_page, $supdomain, $smp, $follow_sitemap, $max_links, $realnum, $local, $tmp_dir, $auto_add, $admin_email, $idna, $conv_puny; global $mysql_table_prefix, $user_agent, $tmp_urls, $delay_time, $domain_arr, $home_charset, $charSet, $url_status, $redir_count; global $debug, $common, $use_white1, $use_white2, $use_black, $whitelist, $blacklist, $clear, $abslinks, $utf8_verify, $webshot; global $index_media, $index_image, $suppress_suffix, $imagelist, $min_image_x, $min_image_y, $dup_media, $index_alt, $no_log, $index_rss; global $index_audio, $audiolist, $index_video, $videolist, $index_embeded, $rss_template, $index_csv, $delim, $ext, $index_id3, $dba_act; global $converter_dir, $dict_dir, $cn_seg, $jp_seg, $index_framesets, $index_iframes, $cdata, $dc, $preferred, $index_rar, $index_zip, $curl; global $docs, $only_docs, $only_links, $case_sensitive, $vowels, $noacc_el, $include_dir, $thumb_folder, $js_reloc, $server_char; global $latin_ligatures, $phon_trans, $liga; // Currently (2013.01.11) the variable $use_prefcharset as defined in Admin Settings 'Obligatory use preferred charset' is used. // and not the variable $use_pref as defined in Admin Settings as a varaiable used for addsite() in .../admin/admin.php error_reporting(E_ALL & ~E_DEPRECATED & ~E_WARNING & ~E_NOTICE & ~E_STRICT); $data = array(); $cn_data = array(); $url_parts = array(); $url_status = array(); $url_status['black'] = ''; $contents = array(); $links = array(); $wordarray = array(); $topic = ''; $url_reloc = ''; $js_link = ''; $document = ''; $file = ''; $file0 = ''; $raw_file = ''; $seg_data = ''; $index_url = $url; $comment = $db_con->real_escape_string("Automatically added during index procedure, as this domain is not yet available in 'Sites' menu."); $admin_email = $db_con->real_escape_string($admin_email); if ($debug == '0') { if (function_exists("ini_set")) { ini_set("display_errors", "0"); } error_reporting(0); } else { error_reporting(E_ERROR); // otherwise a non existing siemap.xml would always cause a warning message } $needsReindex = 1; $deletable = 0; $nohost = 1; $i = 0; $nohost_count = 5; // defines count of attempts to get in contact with the server // check URL status while ($i < $nohost_count && $nohost) { $url_status = url_status($url, $site_id, $sessid); if (!stristr($url_status['state'], "NOHOST")) { $nohost = ''; // reset for successfull attempt } $i++; } // check for emergency exit if ($url_status['aborted'] == '1' || stristr($url_status['state'], "NOHOST")) { return $url_status; } // check for UFO file or invalid suffix if (stristr($url_status['state'], "ufo")) { return $url_status; } // JFIELD here is right before we try to retrieve the URL and get the error // echo "<h3>F****E: $url</h3>\n"; // check for 'unreachable' links and if it is a known URL, delete all keyword relationships, former indexed from the meanwhile unreachable link if (stristr($url_status['state'], "unreachable")) { printStandardReport('unreachable', $command_line, $no_log); $sql_query = "SELECT link_id from " . $mysql_table_prefix . "links where url='{$url}'"; $result = $db_con->query($sql_query); if ($debug && $db_con->errno) { $err_row = __LINE__ - 2; printf("<p><span class='red'> MySQL failure: %s \n<br /></span></p>", $db_con->error); if (__FUNCTION__) { printf("<p><span class='red'> Found in script: " . __FILE__ . " row: {$err_row} in function(): " . __FUNCTION__ . " <br /></span></p>"); } else { printf("<p><span class='red'> Found in script: " . __FILE__ . " row: {$err_row} <br /></span></p>"); } printf("<p><span class='red'> Script execution aborted. <br /></span>"); printf("<p><strong>Invalid query string, which caused the SQL error:</strong></p>"); echo "<p> {$sql_query} </p>"; exit; } $row = $result->fetch_array(MYSQLI_NUM); $link_id = $row[0]; if ($link_id) { $sql_query = "DELETE from " . $mysql_table_prefix . "link_keyword where link_id={$link_id}"; $db_con->query($sql_query); if ($debug && $db_con->errno) { $err_row = __LINE__ - 2; printf("<p><span class='red'> MySQL failure: %s \n<br /></span></p>", $db_con->error); if (__FUNCTION__) { printf("<p><span class='red'> Found in script: " . __FILE__ . " row: {$err_row} in function(): " . __FUNCTION__ . " <br /></span></p>"); } else { printf("<p><span class='red'> Found in script: " . __FILE__ . " row: {$err_row} <br /></span></p>"); } printf("<p><span class='red'> Script execution aborted. <br /></span>"); printf("<p><strong>Invalid query string, which caused the SQL error:</strong></p>"); echo "<p> {$sql_query} </p>"; exit; } // here we should delete the keywords associated only to the unreachable link // but this takes too much time during index procedure // the admin is asked toc do it manually by using the regarding option in 'Clean' menue // // delete the meanwhile unreachable link from db $sql_query = "DELETE from " . $mysql_table_prefix . "links where link_id = {$link_id}"; $db_con->query($sql_query); if ($debug && $db_con->errno) { $err_row = __LINE__ - 2; printf("<p><span class='red'> MySQL failure: %s \n<br /></span></p>", $db_con->error); if (__FUNCTION__) { printf("<p><span class='red'> Found in script: " . __FILE__ . " row: {$err_row} in function(): " . __FUNCTION__ . " <br /></span></p>"); } else { printf("<p><span class='red'> Found in script: " . __FILE__ . " row: {$err_row} <br /></span></p>"); } printf("<p><span class='red'> Script execution aborted. <br /></span>"); printf("<p><strong>Invalid query string, which caused the SQL error:</strong></p>"); echo "<p> {$sql_query} </p>"; exit; } } return $url_status; } // check for overwritten URL, forced by the header, sending content PLUS any redirected URL if ($url_status['url_over'] && !$url_status['relocate']) { $url = $url_status['url_over']; } $url_parts = parse_all_url($url); $thislevel = $level - 1; // redirected URL ? if ($url_status['relocate']) { // if relocated, print message, verify the new URL, and redirect to new URL // check for redirection on an already indexed link $known_link = ''; $sql_query = "SELECT * from " . $mysql_table_prefix . "links where url='{$url}'"; $result = $db_con->query($sql_query); if ($debug && $db_con->errno) { $err_row = __LINE__ - 2; printf("<p><span class='red'> MySQL failure: %s \n<br /></span></p>", $db_con->error); if (__FUNCTION__) { printf("<p><span class='red'> Found in script: " . __FILE__ . " row: {$err_row} in function(): " . __FUNCTION__ . " <br /></span></p>"); } else { printf("<p><span class='red'> Found in script: " . __FILE__ . " row: {$err_row} <br /></span></p>"); } printf("<p><span class='red'> Script execution aborted. <br /></span>"); printf("<p><strong>Invalid query string, which caused the SQL error:</strong></p>"); echo "<p> {$sql_query} </p>"; exit; } $known_link = $result->num_rows; if ($known_link) { $urlo_status['state'] = "URL was redirected to an already indexed page.<br />In order to prevent infinite indexation, this is not supported by Sphider-plus.<br />Indexation aborted for this URL"; $url_status['aborted'] = 1; return $url_status; } // remove the original URL from temp table. The relocated URL will be added later on. mysqltest(); $sql_query = "DELETE from " . $mysql_table_prefix . "temp where link = '{$url}' AND id = '{$sessid}'"; $db_con->query($sql_query); if ($debug && $db_con->errno) { $err_row = __LINE__ - 2; printf("<p><span class='red'> MySQL failure: %s \n<br /></span></p>", $db_con->error); if (__FUNCTION__) { printf("<p><span class='red'> Found in script: " . __FILE__ . " row: {$err_row} in function(): " . __FUNCTION__ . " <br /></span></p>"); } else { printf("<p><span class='red'> Found in script: " . __FILE__ . " row: {$err_row} <br /></span></p>"); } printf("<p><span class='red'> Script execution aborted. <br /></span>"); printf("<p><strong>Invalid query string, which caused the SQL error:</strong></p>"); echo "<p> {$sql_query} </p>"; exit; } $new_url = $url_status['path']; // URL of first redirection // remove the redirected URL, which eventually is already stored in db // before finally storing in db, we need to check for correct redirection. $sql_query = "DELETE from " . $mysql_table_prefix . "temp where link = '{$new_url}' AND id = '{$sessid}'"; $db_con->query($sql_query); if ($debug && $db_con->errno) { $err_row = __LINE__ - 2; printf("<p><span class='red'> MySQL failure: %s \n<br /></span></p>", $db_con->error); if (__FUNCTION__) { printf("<p><span class='red'> Found in script: " . __FILE__ . " row: {$err_row} in function(): " . __FUNCTION__ . " <br /></span></p>"); } else { printf("<p><span class='red'> Found in script: " . __FILE__ . " row: {$err_row} <br /></span></p>"); } printf("<p><span class='red'> Script execution aborted. <br /></span>"); printf("<p><strong>Invalid query string, which caused the SQL error:</strong></p>"); echo "<p> {$sql_query} </p>"; exit; } // now special processing for relative links if (!strpos(substr($new_url, 0, 5), "ttp")) { $new_url = make_abs($new_url, $index_url); } if ($url == $new_url && $url_status['file']) { $url_status['relocate'] = ''; // remove this redirection, as it is 'in it selves' $url_status['state'] = "ok"; // try to index the conteent } $care_excl = '1'; // care file suffixed to be excluded $relocated = '1'; // URL is relocated if ($debug) { printRedirected($url_status['relocate'], $url_status['path'], $cl); } $count = "1"; while ($count <= $redir_count && $url_status['relocate'] && !$url_status['aborted']) { // check this redirection $url_status = url_status($new_url, $site_id, $sessid); if ($url_status['path']) { $new_url = $url_status['path']; // URL of another redirections // now special processing for relative links if (!strpos(substr($new_url, 0, 5), "ttp")) { $new_url = make_abs($new_url, $index_url); } } if ($debug) { printRedirected($url_status['relocate'], $url_status['path'], $cl); } $count++; } if ($url_status['relocate']) { $url_status['aborted'] = 1; $url_status['state'] = "<br />Indexation aborted because of too many redirections.<br />"; return $url_status; } if ($url_status['state'] != "ok") { $code = $url_status['state']; // check for most common client errors if (!preg_match("/401|402|403|404/", $code)) { $url_status['aborted'] = 1; // end indexing for cmplete site } else { $url_status['aborted'] = ''; // abort only for this page } if (strstr($code, "401")) { $code = "401 (Authentication required)"; } if (strstr($code, "403")) { $code = "403 (Forbidden)"; } if (strstr($code, "404")) { $code = "404 (Not found)"; } $url_status['state'] = "<br />Indexation aborted because of code: {$code}.<br />"; } // check final URL (which might be the 3. redirection) // and puriify final redirected URL $url = $db_con->real_escape_string(url_purify($new_url, $index_url, $can_leave_domain, $care_excl, $relocated, $local_redir)); // valid file suffix for the redirection?? if ($url) { if ($care_excl == '1') { // care about non-accepted suffixes reset($ext); while (list($id, $excl) = each($ext)) { if (preg_match("/\\.{$excl}(\$|\\?)/i", $url)) { // if suffix is at the end of the link, or followd by a question mark $url_status['state'] = 'Found: Not supported suffix'; // error message return $url_status; } } } } if (!$url) { $link_parts = parse_all_url($url); $host = $link_parts['host']; $sql_query = "DELETE from " . $mysql_table_prefix . "temp where link like '{$index_url}' AND id = '{$sessid}' OR relo_link like '{$url}'"; $db_con->query($sql_query); if ($debug && $db_con->errno) { $err_row = __LINE__ - 2; printf("<p><span class='red'> MySQL failure: %s \n<br /></span></p>", $db_con->error); if (__FUNCTION__) { printf("<p><span class='red'> Found in script: " . __FILE__ . " row: {$err_row} in function(): " . __FUNCTION__ . " <br /></span></p>"); } else { printf("<p><span class='red'> Found in script: " . __FILE__ . " row: {$err_row} <br /></span></p>"); } printf("<p><span class='red'> Script execution aborted. <br /></span>"); printf("<p><strong>Invalid query string, which caused the SQL error:</strong></p>"); echo "<p> {$sql_query} </p>"; exit; } $url_status['aborted'] = 1; $url_status['state'] = "<br />Indexation aborted because of undefined redirection error.<br />"; return $url_status; } // abort indexation, if the redirected URL is equal to calling URL if ($url == 'self') { $link_parts = parse_all_url($url); $host = $link_parts['host']; $sql_query = "DELETE from " . $mysql_table_prefix . "temp where link like '{$url}' AND id = '{$sessid}' OR relo_link like '{$url}'"; $db_con->query($sql_query); if ($debug && $db_con->errno) { $err_row = __LINE__ - 2; printf("<p><span class='red'> MySQL failure: %s \n<br /></span></p>", $db_con->error); if (__FUNCTION__) { printf("<p><span class='red'> Found in script: " . __FILE__ . " row: {$err_row} in function(): " . __FUNCTION__ . " <br /></span></p>"); } else { printf("<p><span class='red'> Found in script: " . __FILE__ . " row: {$err_row} <br /></span></p>"); } printf("<p><span class='red'> Script execution aborted. <br /></span>"); printf("<p><strong>Invalid query string, which caused the SQL error:</strong></p>"); echo "<p> {$sql_query} </p>"; exit; } $url_status['aborted'] = 1; $url_status['state'] = "<br />Indexation aborted for this page, because the redirection was a link in it selves.<br />Blocked by Sphider-plus, because this could end in an infinite indexation loop.<br />"; return $url_status; } // abort indexation, if the redirected URL contains invalid file suffix if ($url == 'excl') { $link_parts = parse_all_url($url); $host = $link_parts['host']; $sql_query = "DELETE from " . $mysql_table_prefix . "temp where link like '{$url}' AND id = '{$sessid}' OR relo_link like '{$url}'"; $db_con->query($sql_query); if ($debug && $db_con->errno) { $err_row = __LINE__ - 2; printf("<p><span class='red'> MySQL failure: %s \n<br /></span></p>", $db_con->error); if (__FUNCTION__) { printf("<p><span class='red'> Found in script: " . __FILE__ . " row: {$err_row} in function(): " . __FUNCTION__ . " <br /></span></p>"); } else { printf("<p><span class='red'> Found in script: " . __FILE__ . " row: {$err_row} <br /></span></p>"); } printf("<p><span class='red'> Script execution aborted. <br /></span>"); printf("<p><strong>Invalid query string, which caused the SQL error:</strong></p>"); echo "<p> {$sql_query} </p>"; exit; } $url_status['aborted'] = 1; $url_status['state'] = "<br />Indexation aborted because the redirected link does not meet the URL suffix conditions.<br />"; return $url_status; } // abort indexation, because purifing the redirected URL failed if (!strstr($url, "//")) { $sql_query = "DELETE from " . $mysql_table_prefix . "temp where link like '{$url}' AND id = '{$sessid}' OR relo_link like '{$url}'"; $db_con->query($sql_query); if ($debug && $db_con->errno) { $err_row = __LINE__ - 2; printf("<p><span class='red'> MySQL failure: %s \n<br /></span></p>", $db_con->error); if (__FUNCTION__) { printf("<p><span class='red'> Found in script: " . __FILE__ . " row: {$err_row} in function(): " . __FUNCTION__ . " <br /></span></p>"); } else { printf("<p><span class='red'> Found in script: " . __FILE__ . " row: {$err_row} <br /></span></p>"); } printf("<p><span class='red'> Script execution aborted. <br /></span>"); printf("<p><strong>Invalid query string, which caused the SQL error:</strong></p>"); echo "<p> {$sql_query} </p>"; exit; } $url_status['aborted'] = 1; $url_status['state'] = "<br />Indexation aborted because {$url} is not supported.<br />"; return $url_status; } // abort indexation, if redirected URL met 'must/must not include' string rule if (!check_include($url, $url_inc, $url_not_inc)) { $link_parts = parse_all_url($url); $host = $link_parts['host']; $sql_query = "DELETE from " . $mysql_table_prefix . "temp where link like '{$url}' AND id = '{$sessid}' OR relo_link like '{$url}'"; $db_con->query($sql_query); if ($debug && $db_con->errno) { $err_row = __LINE__ - 2; printf("<p><span class='red'> MySQL failure: %s \n<br /></span></p>", $db_con->error); if (__FUNCTION__) { printf("<p><span class='red'> Found in script: " . __FILE__ . " row: {$err_row} in function(): " . __FUNCTION__ . " <br /></span></p>"); } else { printf("<p><span class='red'> Found in script: " . __FILE__ . " row: {$err_row} <br /></span></p>"); } printf("<p><span class='red'> Script execution aborted. <br /></span>"); printf("<p><strong>Invalid query string, which caused the SQL error:</strong></p>"); echo "<p> {$sql_query} </p>"; exit; } $url_status['aborted'] = 1; $url_status['state'] = "<br />Indexation aborted because the redirected link does not meet<br />the URL 'must include' or 'must not include' conditions.<br />"; return $url_status; } // if redirected URL is already known and in database: abort $rows0 = ''; $rows1 = ''; mysqltest(); $sql_query = "SELECT url from " . $mysql_table_prefix . "sites where url like '{$url}'"; $result = $db_con->query($sql_query); if ($debug && $db_con->errno) { $err_row = __LINE__ - 2; printf("<p><span class='red'> MySQL failure: %s \n<br /></span></p>", $db_con->error); if (__FUNCTION__) { printf("<p><span class='red'> Found in script: " . __FILE__ . " row: {$err_row} in function(): " . __FUNCTION__ . " <br /></span></p>"); } else { printf("<p><span class='red'> Found in script: " . __FILE__ . " row: {$err_row} <br /></span></p>"); } printf("<p><span class='red'> Script execution aborted. <br /></span>"); printf("<p><strong>Invalid query string, which caused the SQL error:</strong></p>"); echo "<p> {$sql_query} </p>"; exit; } $rows0 = $result->num_rows; $sql_query = "SELECT * from " . $mysql_table_prefix . "links where url='{$url}'"; $result = $db_con->query($sql_query); if ($debug && $db_con->errno) { $err_row = __LINE__ - 2; printf("<p><span class='red'> MySQL failure: %s \n<br /></span></p>", $db_con->error); if (__FUNCTION__) { printf("<p><span class='red'> Found in script: " . __FILE__ . " row: {$err_row} in function(): " . __FUNCTION__ . " <br /></span></p>"); } else { printf("<p><span class='red'> Found in script: " . __FILE__ . " row: {$err_row} <br /></span></p>"); } printf("<p><span class='red'> Script execution aborted. <br /></span>"); printf("<p><strong>Invalid query string, which caused the SQL error:</strong></p>"); echo "<p> {$sql_query} </p>"; exit; } $known_link = $result->fetch_array(MYSQLI_NUM); $md5 = $known_link[8]; if ($clear == 1) { clean_resource($result, '02'); } if ($rows0) { $url_status['state'] = "<br />URL already in database (as a site URL). Index aborted.<br />"; $url_status['aborted'] = 1; return $url_status; } // if known link, which is already indexed (because containing the md5 checksum), enter here if ($known_link[8]) { $count = $known_link[15]; $count++; if ($count > $redir_count) { // abort indexation $url_status['state'] = "<br />{$count}. attempt to redirect in the same (already indexed) URL, <br />which is no longer accepted by Sphider-plus. Indexation aborted for this site.<br />"; $url_status['aborted'] = 1; return $url_status; } else { $sql_query = "UPDATE " . $mysql_table_prefix . "links set relo_count='{$count}' where url='{$url}'"; $db_con->query($sql_query); } } // add redirected URL to temp table, if not yet known $sql_query = "SELECT link from " . $mysql_table_prefix . "temp where link='{$url}' && id = '{$sessid}'"; $result = $db_con->query($sql_query); if ($debug && $db_con->errno) { $err_row = __LINE__ - 2; printf("<p><span class='red'> MySQL failure: %s \n<br /></span></p>", $db_con->error); if (__FUNCTION__) { printf("<p><span class='red'> Found in script: " . __FILE__ . " row: {$err_row} in function(): " . __FUNCTION__ . " <br /></span></p>"); } else { printf("<p><span class='red'> Found in script: " . __FILE__ . " row: {$err_row} <br /></span></p>"); } printf("<p><span class='red'> Script execution aborted. <br /></span>"); printf("<p><strong>Invalid query string, which caused the SQL error:</strong></p>"); echo "<p> {$sql_query} </p>"; exit; } $rows = $result->num_rows; if ($rows == 0) { $sql_query = "INSERT into " . $mysql_table_prefix . "temp (link, level, id, relo_count) values ('{$url}', '{$level}', '{$sessid}', '1')"; $db_con->query($sql_query); if ($debug && $db_con->errno) { $err_row = __LINE__ - 2; printf("<p><span class='red'> MySQL failure: %s \n<br /></span></p>", $db_con->error); if (__FUNCTION__) { printf("<p><span class='red'> Found in script: " . __FILE__ . " row: {$err_row} in function(): " . __FUNCTION__ . " <br /></span></p>"); } else { printf("<p><span class='red'> Found in script: " . __FILE__ . " row: {$err_row} <br /></span></p>"); } printf("<p><span class='red'> Script execution aborted. <br /></span>"); printf("<p><strong>Invalid query string, which caused the SQL error:</strong></p>"); echo "<p> {$sql_query} </p>"; exit; } } if ($clear == 1) { clean_resource($result, '02'); } // at the end of redirect, rebuild the url parts from the redirected URL. // This is the final URL, which will be indexed $url_parts = parse_all_url($url); } // end check any redirection/relocation // if a JavaScript file is currently indexed? $suffix = substr($url, strrpos($url, ".") + 1); $suffix = str_replace("/", "", $suffix); if (strlen($suffix) < "5") { if (preg_match("/js\$/", $suffix)) { $js_link = 1; // activate JS switch } } if ($smp != 1 && $follow_sitemap == 1) { // enter here if we don't already know a valid sitemap and if admin settings allowed us to do so $tmp_urls = get_temp_urls($sessid); // reload previous temp $url2 = remove_sessid(convert_url($url)); // get folder where sitemap should be and if exists, cut existing filename, suffix and subfolder $host = parse_addr($url2); $hostname = $host[host]; $more_sitemaps = array(); if ($hostname == 'localhost') { $host1 = str_replace($local, '', $url2); } $pos = strpos($host1, "/"); // on local server delete all behind the / if ($pos) { $host1 = substr($host1, 0, $pos); } // build full adress again, now only the host if ($hostname == 'localhost') { $url2 = "" . $local . "" . $host1 . ""; } else { $url2 = "{$host['scheme']}://{$hostname}"; } $sitemap_name = "sitemap"; // standard name for sitemap file $input_file = "{$url2}/{$sitemap_name}"; // create path to sitemap $log_file = './sitemaps/current_sitemap.xml'; // destination for sitemap log-file $smap_found = ''; $indexed_map = ''; $map_cont = ''; // try to fetch individual sitemap url from database mysqltest(); $sql_query = "SELECT smap_url from " . $mysql_table_prefix . "sites where site_id='{$site_id}'"; $result = $db_con->query($sql_query); if ($debug && $db_con->errno) { $err_row = __LINE__ - 2; printf("<p><span class='red'> MySQL failure: %s \n<br /></span></p>", $db_con->error); if (__FUNCTION__) { printf("<p><span class='red'> Found in script: " . __FILE__ . " row: {$err_row} in function(): " . __FUNCTION__ . " <br /></span></p>"); } else { printf("<p><span class='red'> Found in script: " . __FILE__ . " row: {$err_row} <br /></span></p>"); } printf("<p><span class='red'> Script execution aborted. <br /></span>"); printf("<p><strong>Invalid query string, which caused the SQL error:</strong></p>"); echo "<p> {$sql_query} </p>"; exit; } $row = $result->fetch_array(MYSQLI_NUM); if (preg_match("/http:\\/\\//", $row[0])) { // use the individual sitemap $input_file = preg_replace("/.xml.gz|.xml/i", "", $row[0]); } $file = "" . $input_file . ".xml"; if ($fd = @fopen($file, "r")) { // uncompressed ? //if ($zd = @gzopen("".$input_file.".xml", "r")) { // uncompressed ? $map_cont = @stream_get_contents($fd); if ($map_cont && strpos($map_cont, "schemas/sitemap")) { // if we were able to read it $smap_found = '1'; } fclose($fd); } $gz_file = "" . $input_file . ".xml.gz"; if (!$smap_found && ($zd = @fopen("compress.zlib://{$gz_file}", "r"))) { // compressed ? //if (!$smap_found && $zd = @gzopen("".$input_file.".xml.gz", "r")) { // compressed ? $map_cont = @gzread($zd, 10485760); // max. 10 MB (might be too large for some server) gzclose($zd); if ($map_cont && strpos($map_cont, "schemas/sitemap")) { $smap_found = '1'; } } //echo "\r\n\r\n<br>map_cont Array:<br><pre>";print_r($map_cont);echo "</pre>\r\n"; if ($smap_found) { if ($debug != '0') { // create a log-file of current sitemap.xml file_put_contents($log_file, $map_cont); } //$del = $db_con->query("DELETE from ".$mysql_table_prefix."temp"); // function get_sitemap and store_links will build a new temp table if (stristr($map_cont, "<sitemapindex")) { // if current sitemap file is an index file printStandardReport('validSitemapInd', $command_line, $no_log); $get_maps = simplexml_load_string($map_cont); if ($get_maps) { reset($get_maps); foreach ($get_maps as $map_x) { $new_links[] = $map_x->loc; // get all links to further sitemap files } if (is_array($new_links)) { // if we found more sitemap files $new_links = explode(",", implode(",", $new_links)); // destroy SimpleXMLElement Object and get the link array $new_links = array_slice($new_links, 0, $max_links); $indexed_map = '1'; $i = '0'; //echo "\r\n\r\n<br>new_links Array:<br><pre>";print_r($new_links);echo "</pre>\r\n"; foreach ($new_links as $input_file) { $these_links = get_sitemap($input_file, $indexed_map, $mysql_table_prefix); // now extract page links from this sitemap file //echo "\r\n\r\n<br>these_links Array:<br><pre>";print_r($these_links);echo "</pre>\r\n"; if ($these_links) { reset($these_links); store_newLinks($these_links, $level, $sessid); $smp = '1'; // there were valid sitemap files and we stored the new links $i++; } else { printStandardReport('invalidSecSitemap', $command_line, $no_log); // unable to extract links from secondary sitemap file } } printValidSecSmap($i, $cl); unset($input_file, $map_cont, $new_links); } else { printStandardReport('invalidSecSitemap', $command_line, $no_log); // unable to extract links from secondary sitemap file } } else { printStandardReport('invalidSitemapInd', $command_line, $no_log); // unable to extract links from sitemap INDEX file } } else { $links = get_sitemap($map_cont, $indexed_map, $mysql_table_prefix); // extract links from sitemap.xml (there was only one sitemap file) if ($links != '') { reset($links); //echo "\r\n\r\n<br>sitemmap links Array:<br><pre>";print_r($links);echo "</pre>\r\n"; store_newLinks($links, $level, $sessid); $smp = '1'; // there was one valid sitemap and we stored the new links printStandardReport('validSitemap', $command_line, $no_log); } else { printStandardReport('invalidSitemap', $command_line, $no_log); } unset($links); } } } if ($debug == '0') { if (function_exists("ini_set")) { ini_set("display_errors", "0"); } error_reporting(0); } else { error_reporting(E_ALL & ~E_DEPRECATED & ~E_WARNING & ~E_NOTICE & ~E_STRICT); } if ($url_status['state'] == 'ok') { $OKtoIndex = 1; $file_read_error = 0; if (time() - $delay_time < $min_delay) { sleep($min_delay - (time() - $delay_time)); } if ($url_status['file']) { $file = $url_status['file']; } else { $url_status['state'] = "Unable to read the content of the file.<br />{$url} does not deliver any content."; $realnum--; } } if ($url_status['state'] == 'ok') { // first attempt to define a charset $chrSet = ''; if ($use_prefcharset == '1') { // use preferred charset as defined in Admin settings $chrSet = $home_charset; //echo "<h1>USING PREFERRED CHARSET</h1>"; } else { if ($server_char && $url_status['charset']) { //echo "<h1>USING SERVER CHARSET</h1>"; $chrSet = $url_status['charset']; // use charset as supplied by the remote server } else { // try to extract the charset of this file //echo "<h1>USING CONTENT CHARSET</h1>"; //echo "<h1>" . substr($file, 0, 500) . "</h1>"; if (preg_match("'encoding=[\\'\"](.*?)[\\'\"]'si", substr($file, 0, 3000), $regs)) { //echo "<h1>1</h1>"; $chrSet = trim(strtoupper($regs[1])); // get encoding of current XML or XHTML file and use it furtheron } if (!$chrSet) { //echo "<h1>2</h1>"; if (preg_match("'charset=(.*?)[ \\/\\;\\'\"]'si", substr($file, 0, 3000), $regs)) { //echo "<h1>3</h1>"; $chrSet = trim(strtoupper($regs[1])); // get charset of current HTML file and use it furtheron } } if (!$chrSet) { //echo "<h1>4</h1>"; if (preg_match("'charset=[\\'\"](.*?)[\\'\"]'si", substr($file, 0, 3000), $regs)) { //echo "<h1>5</h1>"; $chrSet = trim(strtoupper($regs[1])); // get charset of current HTML file and use it furtheron } } // in assistance for all lazy webmasters $chrSet = preg_replace("/win-/si", "windows-", $chrSet); if ($chrSet == "1251") { //echo "<h1>6</h1>"; $chrSet = "windows-1251"; } if ($chrSet == '') { //echo "<h1>7</h1>"; $chrSet = $home_charset; // no charset found, we need to use default charset like for DOCs, PDFs, etc } } } //echo "<h1>CHRSET: $chrSet</h1>"; // if required, uncompress ZIP archives and make content of each file => text if ($url_status['content'] == 'zip' && $index_zip == '1' && $file) { file_put_contents("" . $tmp_dir . "/archiv.temp", $file); $zip = zip_open("" . $tmp_dir . "/archiv.temp"); if ($zip) { $url_status['content'] = "text"; // preventiv, if not another status will be detected for individual archiv files $file = ''; // starting with a blank file for all archive files $topic = 'zip'; if ($debug == '2') { printStandardReport('archivFiles', $command_line, $no_log); } while ($zip_entry = zip_read($zip)) { if (zip_entry_open($zip, $zip_entry, "r")) { $buf = zip_entry_read($zip_entry, zip_entry_filesize($zip_entry)); //uncompress the content of recent archiv file $name = zip_entry_name($zip_entry); // get filename of recent archive file if ($debug == '2') { // $report = "<strong> " . $name . "</strong>"; printThis($report, $cl); $size = (int) (zip_entry_filesize($zip_entry) / 1024); if ($size == 0) { $size = '1'; } $report = " - Unpacked size: " . $size . " kByte<br />"; printThis($report, $cl); } $buf = get_arch_content($buf, $name, $url, $chrSet); // if necessary, convert PDF, extract feed etc. for the recent file zip_entry_close($zip_entry); // done for this file in archiv $file .= "" . $buf . "<br /><br />"; // add all uncompressed and converted files together } } zip_close($zip); } unlink("" . $tmp_dir . "/archiv.temp"); } // if required, uncompress RAR archives and make content of each file => text if ($url_status['content'] == 'rar' && $index_rar == '1') { file_put_contents("" . $tmp_dir . "/archiv.temp", $file); $rar = rar_open("" . $tmp_dir . "/archiv.temp"); if ($rar) { $url_status['content'] = "text"; // preventiv, all individual archiv files willl be converted to 'text' $file = ''; // starting with a blank file for all archive files $topic = 'rar'; $entries = rar_list($rar); if ($rar) { if ($debug == '2') { printStandardReport('archivFiles', $command_line, $no_log); } foreach ($entries as $entry) { $name = $entry->getName(); if ($debug == '2') { $report = "<strong> " . $name . "</strong>"; printThis($report, $cl); $size = (int) ($entry->getPackedSize() / 1024); if ($size == 0) { $size = '1'; } $report = " - Packed size: " . $size . " kByte"; printThis($report, $cl); $size = (int) ($entry->getUnpackedSize() / 1024); if ($size == 0) { $size = '1'; } $report = " - Unpacked size: " . $size . " kByte<br />"; printThis($report, $cl); } $entry->extract('', "./" . $tmp_dir . "/" . $name . ""); // extract single file of archiv into temporary folder $buf = file_get_contents("./" . $tmp_dir . "/" . $name . ""); // read content of this intermediate file unlink("./" . $tmp_dir . "/" . $name . ""); // destroy this file if ($buf) { $buf = get_arch_content($buf, $name, $url, $chrSet); // if necessary, convert PDF, extract feed etc. for the recent file $file .= "" . $buf . "<br /><br />"; // add all uncompressed and converted files together } } } rar_close($rar); } unlink("" . $tmp_dir . "/archiv.temp"); } $file0 = $file; // rememberr the original (e.g. for doc2txt converter) // remove useless part of the content $file = purify_content($file); $valid_utf8 = '1'; $raw_file = $file; // kill eventually duplicate coding info in dynamic links if (stristr(substr($file, '0', '4000'), "encoding") && strstr(substr($file, '0', '4000'), "charset")) { $file = substr($file, strrpos($file, "<!DOCTYPE")); // subsstring starting at last found <!DOCTYPE } // we need to do it again for eventually new charset in archive $chrSet = ''; if ($use_prefcharset == '1') { // use preferred charset as defined in Admin settings $chrSet = $home_charset; } else { if ($server_char && $url_status['charset']) { $chrSet = $url_status['charset']; // use charset as supplied by the remote server } else { // try to extract the charset of this file if (preg_match("'encoding=[\\'\"](.*?)[\\'\"]'si", substr($file, 0, 3000), $regs)) { $chrSet = trim(strtoupper($regs[1])); // get encoding of current XML or XHTML file and use it furtheron } if (!$chrSet) { if (preg_match("'charset=(.*?)[ \\/\\;\\'\"]'si", substr($file, 0, 3000), $regs)) { $chrSet = trim(strtoupper($regs[1])); // get charset of current HTML file and use it furtheron } } if (!$chrSet) { if (preg_match("'charset=[\\'\"](.*?)[\\'\"]'si", substr($file, 0, 3000), $regs)) { $chrSet = trim(strtoupper($regs[1])); // get charset of current HTML file and use it furtheron } } // in assistance for all lazy webmasters $chrSet = preg_replace("/win-/si", "windows-", $chrSet); if ($chrSet == "1251") { $chrSet = "windows-1251"; } if ($chrSet == '') { $chrSet = $home_charset; // no charset found, we need to use default charset like for DOCs, PDFs, etc } } } if (strpos($chrSet, " ")) { // in the wild we have aloready seen a lot of variants $chrSet = substr($chrSet, 0, strpos($chrSet, " ")); } // some webmaster still use 'UNICODE' as name if (stristr($chrSet, "UNICODE")) { $chrSet = "UTF-8"; } // obsolete since 1990, but some (Italian) server still send it as charset . . . . if (stristr($chrSet, "8858")) { $chrSet = str_replace("8858", "8859", $chrSet); } // required coaching for some webmasters if (stristr($chrSet, "cp-")) { $chrSet = str_ireplace("CP-", "CP", $chrSet); } $contents['charset'] = $chrSet; if ($index_framesets == '1') { if (preg_match("@<frameset[^>]*>(.*?)<\\/frameset>@si", $file, $regs)) { printStandardReport('newFrameset', $command_line, $no_log); // separate the <frameset> ....</frameset> part of this file $frame = $regs[1]; $replace = get_frames($frame, $url, $can_leave_domain); $replace = "<body>" . $replace . "</body>"; // create the body tags for $file $contents['charset'] = $chrSet; // rebuild charset // include all replacements instead of the frameset tag into the actual file. This will become the body $file = preg_replace("@<frameset.*?</frameset>@si", "{$replace}", $file); } } if ($index_iframes == '1') { $links = array(); $regs = array(); $replace = ''; $get_charset = ''; $real_url = $url; if (preg_match_all("/(iframe[^>]*src[[:blank:]]*)=[[:blank:]]*[\\'\"]?(([[a-z]{3,5}:\\/\\/(([.a-zA-Z0-9-])+(:[0-9]+)*))*([+:%\\/?=&;\\\\(\\),._ a-zA-Z0-9-]*))(#[.a-zA-Z0-9-]*)?[\\'\" ]?/i", $file, $regs, PREG_SET_ORDER)) { printStandardReport('newIframe', $command_line, $no_log); // find all frames of the iframe; $care_excl = ''; // don't care file suffixed to be excluded $relocated = ''; // URL is not relocated foreach ($regs as $val) { if (($a = url_purify($val[2], $url, $can_leave_domain, $care_exel, $relocated, $local_redir)) != '') { $links[] = $a; // collect all iframe links } } if ($links) { foreach ($links as $url) { printNewLinks($url, $cl); if (preg_match("/.html|.htm|.xhtml|.xml|.php/i", $url)) { $frame = file_get_contents($url); // get content of this frame // separate the body part of this frame preg_match("@<body[^>]*>(.*?)<\\/body>@si", $frame, $regs); $body = $regs[1]; if ($abslinks == '1') { $body = make_abslinks($body, $url); // if required, correct links relative to found iframe } $replace = "" . $replace . "<br />" . $body . ""; } else { // might be an image $replace = "" . $replace . "<br /><img src=\"" . $url . "\">"; } } } // include all replacements instead of the iframe tag into the actual file $file = preg_replace("@<iframe.*?</iframe>@si", "{$replace}", $file); $contents['charset'] = $chrSet; // rebuild charset } $url = $real_url; } // in order to index RDF, RSD, RSS and ATOM feeds enter here if ($url_status['content'] == 'xml' && $index_rss == '1') { if (!preg_match("/<rss|atom|<feed|<rdf|<rsd/si", substr($file, 0, 400))) { printStandardReport('notRSS', $command_line, $no_log); // no valid feed detected $OKtoIndex = 0; $file_read_error = 1; $realnum--; } else { $html = ''; $xml = XML_IsWellFormed($file); // check for well-formed XML if ($xml != '1') { if ($debug > 0) { printNotWellFormedXML($xml, $cl); } $OKtoIndex = 0; $file_read_error = 1; $realnum--; } else { $rss = new feedParser(); // define options for feed parser $rss->limit = $max_links; // save time by limiting the items/entries to be processed $rss->in_cp = strtoupper($contents['charset']); // charset of actual file $rss->out_cp = 'UTF-8'; // convert all into this charset $rss->cache_dir = ''; // currently unused $rss->dc = $dc; // treat Dublin Core tags in RDF feeds $rss->pro = $preferred; // obey the PREFERRED directive in RSD feeds $rss->file = '1'; // use $file as feed (as a string, not URL) if ($cdata != 1) { $rss->CDATA = 'content'; // get it all (naughty) } else { $rss->CDATA = 'nochange'; // well educated crawler } // get feed as array if ($feed = $rss->get($url, $file)) { // if you want to see the feed during index procedure, uncomment the following row // echo "<br>FEED array:<br><pre>";print_r($feed);echo "</pre>"; $link = ''; $textinput_link = ''; $image_url = ''; $image_link = ''; $docs = ''; $subjects = ''; $count = ''; $type = $feed[type]; $count = $feed[sub_count]; $cached = $feed[cached]; // kill all no longer required values $feed[type] = ''; $feed[sub_count] = ''; $feed[encoding_in] = ''; $feed[encoding_out] = ''; $feed[items_count] = ''; $feed[cached] = ''; if (!$count) { $count = '0'; } if ($type == 'RSD') { // prepare all RSD APIs for ($i = 0; $i < $count; $i++) { $subjects .= '' . $feed['api'][$i]['name'] . '<br /> ' . $feed['api'][$i]['apiLink'] . '<br /> ' . $feed['api'][$i]['blogID'] . '<br /> ' . $feed['api'][$i]['settings_docs'] . '<br /> ' . $feed['api'][$i]['settings_notes'] . '<br />'; } } if ($type == 'Atom') { // prepare all Atom entries for ($i = 0; $i < $count; $i++) { $subjects .= '' . $feed['entries'][$i]['link'] . '<br /> ' . $feed['entries'][$i]['title'] . '<br /> ' . $feed['entries'][$i]['id'] . '<br /> ' . $feed['entries'][$i]['published'] . '<br /> ' . $feed['entries'][$i]['updated'] . '<br /> ' . $feed['entries'][$i]['summary'] . '<br /> ' . $feed['entries'][$i]['rights'] . '<br /> ' . $feed['entries'][$i]['author_name'] . ' ' . $feed['entries'][$i]['author_email'] . ' ' . $feed['entries'][$i]['author_uri'] . '<br /> ' . $feed['entries'][$i]['category_term'] . ' ' . $feed['entries'][$i]['category_label'] . ' ' . $feed['entries'][$i]['category_scheme'] . '<br /> ' . $feed['entries'][$i]['contributor_name'] . ' ' . $feed['entries'][$i]['contributor_email'] . ' ' . $feed['entries'][$i]['contributor_uri'] . '<br /> '; } } if ($type == 'RDF' | $type == 'RSS v.0.91/0.92' | $type == 'RSS v.2.0') { // For RDF and RSS feeds enter here // prepare channel image $image_url = $feed[image_url]; if ($image_url) { $width = $feed[image_width]; if (!$width || $width > '144') { $width = '88'; //set to default value } $height = $feed[image_height]; if (!$height || $height > '400') { $height = '31'; //set to default value } $feed[image_url] = "<img id=\"rss_007\" src=\"" . $image_url . "\" alt=\"" . $feed[image_title] . "\" width=\"" . $width . "\" height=\"" . $height . "\">"; } $image_link = $feed[image_link]; if ($image_link) { $feed[image_link] = "<a href=\"" . $image_link . "\">" . $image_link . "</a>"; } // prepare all RDF or RSS items for ($i = 0; $i < $count; $i++) { $subjects .= '' . $feed['items'][$i]['link'] . '<br /> ' . $feed['items'][$i]['title'] . '<br /> ' . $feed['items'][$i]['description'] . '<br /> ' . $feed['items'][$i]['author'] . '<br /> ' . $feed['items'][$i]['category'] . '<br /> ' . $feed['items'][$i]['guid'] . '<br /> ' . $feed['items'][$i]['comments'] . '<br /> ' . $feed['items'][$i]['pubDate'] . '<br /> ' . $feed['items'][$i]['source'] . '<br /> ' . $feed['items'][$i]['enclosure'] . '<br /> ' . $feed['items'][$i]['country'] . '<br /> ' . $feed['items'][$i]['coverage'] . '<br /> ' . $feed['items'][$i]['contributor'] . '<br /> ' . $feed['items'][$i]['date'] . '<br /> ' . $feed['items'][$i]['industry'] . '<br /> ' . $feed['items'][$i]['language'] . '<br /> ' . $feed['items'][$i]['publisher'] . '<br /> ' . $feed['items'][$i]['state'] . '<br /> ' . $feed['items'][$i]['subject'] . '<br /> '; } } // convert the channel/feed part into a string $feed_common = implode(" ", $feed); // build something that could be indexed $html .= "<html>\r\n<head>\r\n<title>" . $feed['title'] . "</title>\r\n<meta name=\"description\" content=\"" . $feed['description'] . " \">\r\n</head>\r\n"; $html .= "<body>\r\n" . $feed_common . "\r\n" . $subjects . "\r\n</body>\r\n</html>\r\n"; } if (strlen($html) < "130") { // can't be a valid feed if ($type == "unknown") { printInvalidFeedType($type, $cl); } else { printStandardReport('invalidRSS', $command_line, $no_log); } $OKtoIndex = 0; $file_read_error = 1; $realnum--; } else { $contents['charset'] = 'UTF-8'; // the feed reader converts all to utf-8 $file = $html; // use feed reader output if ($debug > 0) { printValidFeed($type, $count, $cl); } } } } } // duplicate here, but frames, iframes, or RSS might have added nonsense content $file = purify_content($file); // prepare CVS files if ($url_status['content'] == 'csv' && $index_csv == '1') { $file = str_replace(",", " ", $file); $file = str_replace(";", " ", $file); } //echo "\r\n\r\n<br>url_status Array:<br><pre>";print_r($url_status);echo "</pre>\r\n"; // for DOCs, PDFs, etc we need special text converter if ($url_status['content'] != 'text' && $url_status['content'] != 'xml' && $url_status['content'] != 'xhtml' && $url_status['content'] != 'csv') { $document = 1; $file = extract_text($file, $file0, $url_status['content'], $url, $chrSet); // because the converter already transferred the documents to UTF-8, we need to adjust it here $contents['charset'] = 'UTF-8'; $charSet = 'UTF-8'; if ($file == 'ERROR') { // if error, suppress further indexing $OKtoIndex = 0; $file_read_error = 1; $realnum--; } // reduce Pashtu and Urdu to the main Farsi letters if (strtolower($charSet) == 'windows-1256' && $url_status['content'] == 'pdf') { $f_letter0 = array("ﺎ", "�"); $f_letter1 = array("�", "�", "ﺑ", "ﺒ"); $f_letter2 = array("ï–", "ïÂÂÂâ€â€Â", "ïÂÂÂËœ", "ïÂÂÂâ„¢"); $f_letter3 = array("ﺕ", "ﺖ", "ïºâ€â€Â", "ﺘ"); $f_letter4 = array("ﺙ", "ﺚ", "ﺛ", "ﺜ"); $f_letter5 = array("�", "ﺞ", "ﺟ", "ﺠ"); $f_letter6 = array("ïº", "ï»", "ï¼", "ï½"); $f_letter7 = array("ﺡ", "ﺢ", "ﺣ", "ﺤ"); $f_letter8 = array("ﮋ", "ﮊ"); $f_letter9 = array("ﺥ", "ﺦ", "ﺧ", "ﺨ"); $f_letter10 = array("ﺩ", "ﺪ"); $f_letter11 = array("ﺫ", "ﺬ"); $f_letter12 = array("ïºÂÂÂ", "ﺮ"); $f_letter13 = array("ﺯ", "ﺰ"); $f_letter14 = array("ﺱ", "ﺲ", "ﺳ", "ﺴ"); $f_letter15 = array("ﺵ", "ﺶ", "ﺷ", "ﺸ"); $f_letter16 = array("ﺹ", "ﺺ", "ﺻ", "ﺼ"); $f_letter17 = array("ﺽ", "ﺾ", "ﺿ", "ﻀ"); $f_letter18 = array("�", "ﻂ", "ﻃ", "ﻄ"); $f_letter19 = array("ï»…", "ﻆ", "ﻇ", "ﻈ"); $f_letter20 = array("ﻉ", "ﻊ", "ﻋ", "ﻌ"); $f_letter21 = array("�", "ﻎ", "�", "�"); $f_letter22 = array("ﻑ", "ï»’", "ﻓ", "ï»â€ÂÂ"); $f_letter23 = array("ﻕ", "ï»–", "ï»â€â€Â", "ﻘ"); $f_letter24 = array("ï»™", "ﻚ", "ï»›", "ﻜ", "ﮎ", "�", "�", "ﮑ"); $f_letter25 = array("ï®’", "ﮓ", "ï®â€ÂÂ", "ﮕ"); $f_letter26 = array("�", "ﻞ", "ﻟ", "ï» "); $f_letter27 = array("ﻡ", "ﻢ", "ﻣ", "ﻤ"); $f_letter28 = array("ﻧ", "ﻨ", "ﻦ", "ﻥ"); $f_letter29 = array("ï»ÂÂÂ", "ï»®"); $f_letter30 = array("ﻩ", "ﻪ", "ﻫ", "ﻬ"); $f_letter31 = array("ﻯ", "ï»°", "ï»±", "ﻲ", "ﻳ", "ï»´"); $file = str_replace($f_letter0, "ا", $file); $file = str_replace($f_letter1, "ب", $file); $file = str_replace($f_letter2, "Ù¾", $file); $file = str_replace($f_letter3, "ت", $file); $file = str_replace($f_letter4, "Ø«", $file); $file = str_replace($f_letter5, "ج", $file); $file = str_replace($f_letter6, "Ú†", $file); $file = str_replace($f_letter7, "ØÂÂÂ", $file); $file = str_replace($f_letter8, "Ú˜", $file); $file = str_replace($f_letter9, "Ø®", $file); $file = str_replace($f_letter10, "د", $file); $file = str_replace($f_letter11, "Ø°", $file); $file = str_replace($f_letter12, "ر", $file); $file = str_replace($f_letter13, "ز", $file); $file = str_replace($f_letter14, "س", $file); $file = str_replace($f_letter15, "Ø´", $file); $file = str_replace($f_letter16, "ص", $file); $file = str_replace($f_letter17, "ض", $file); $file = str_replace($f_letter18, "Ø·", $file); $file = str_replace($f_letter19, "ظ", $file); $file = str_replace($f_letter20, "ع", $file); $file = str_replace($f_letter21, "غ", $file); $file = str_replace($f_letter22, "Ù�", $file); $file = str_replace($f_letter23, "Ù‚", $file); $file = str_replace($f_letter24, "Ú©", $file); $file = str_replace($f_letter25, "Ú¯", $file); $file = str_replace($f_letter26, "Ù„", $file); $file = str_replace($f_letter27, "Ù…", $file); $file = str_replace($f_letter28, "Ù†", $file); $file = str_replace($f_letter29, "Ùˆ", $file); $file = str_replace($f_letter30, "Ù‡", $file); $file = str_replace($f_letter31, "ÙŠ", $file); } } if ($OKtoIndex == 1) { $pageSize = number_format(strlen($file) / 1024, 2, ".", ""); printPageSizeReport($pageSize, $topic); } $charSet = strtoupper(trim($contents['charset'])); // final charset for UTF-8 converter if (stristr($charSet, "encoding") || strlen($charSet) < '3') { // must be invalid encountered charset $charSet = 'UTF-8'; } //echo "\r\n\r\n<br /> final charSet: '$charSet'<br />\r\n"; if ($charSet == "UTF-16") { $charSet = "UTF-8"; // content will be converted in function clean_file() } $dic = ''; // if Chinese or Korean text should be segmented enter here if ($cn_seg == '1' && $file && !$js_link && !stristr($charSet, "8859")) { if ($charSet == 'GB2312' || $charSet == 'GB18030' || $charSet == 'GBK') { $dic = "" . $dict_dir . "/cn_gb18030.dic"; // simplified Chinese } if ($charSet == 'BIG5') { $dic = "" . $dict_dir . "/cn_big5.dic"; // traditional Chinese } if ($charSet == 'ISO10646-1933') { $dic = "" . $dict_dir . "/kr_iso10646-1933.dic"; // Korean } if ($charSet == 'EUC-KR') { $dic = "" . $dict_dir . "/kr_euc-kr.dic"; // Korean } if ($charSet == 'UTF-8') { $dic = "" . $dict_dir . "/cn_utf-8.dic"; // Unicode } if ($dic) { // if dictionary is available for page charset, perform a segmentation $Segmentation = new Segmentation(); $Segmentation->load($dic); $Segmentation->setLowercase(FALSE); $cn_result = $Segmentation->segmentString($file); if ($cn_result && $charSet != 'UTF-8') { $iconv_file = @iconv($charSet, "UTF-8//IGNORE", $cn_result); if (trim($iconv_file) == "") { // iconv is not installed or input charSet not available. We need to use class ConvertCharset $NewEncoding = new ConvertCharset($charSet, "utf-8"); $NewFileOutput = $NewEncoding->Convert($cn_result); $cn_result = $NewFileOutput; } else { $cn_result = $iconv_file; } unset($iconv_file, $NewEncoding, $NewFileOutput); } $seg_data = clean_file($cn_result, $url, $url_status['content'], $charSet, $use_nofollow, $use_robot, $can_leave_domain); } else { printNoDictionary($charSet, $cl); // no dictionary found for this charset } } // if Japanese text should be segmented enter here. But not if a Chinese dictonary was already found if ($jp_seg == '1' && $file && !$js_link && !stristr($charSet, "ISO") && !$dic) { $dic = ''; if ($charSet == 'UTF-8' || $charSet == 'EUC-JP') { $file = @iconv($charSet, "SHIFT_JIS//IGNORE", $file); $charSet = "SHIFT_JIS"; } if ($charSet == 'SHIFT_JIS') { $dic = "" . $dict_dir . "/jp_shiftJIS.dic"; } if ($dic) { // if dictionary is available for page charset, perform a segmentation $Segmentation = new Segmentation(); $Segmentation->load($dic); $Segmentation->setLowercase(FALSE); $jp_result = $Segmentation->segmentString($file); //echo "\r\n\r\n<br /> jp_result: $jp_result<br />\r\n"; if ($jp_result && $charSet != 'UTF-8') { $iconv_file = @iconv($charSet, "UTF-8//IGNORE", $jp_result); if (trim($iconv_file) == "") { // iconv is not installed or input charSet not available. We need to use class ConvertCharset $NewEncoding = new ConvertCharset($charSet, "utf-8"); $NewFileOutput = $NewEncoding->Convert($jp_result); $jp_result = $NewFileOutput; } else { $jp_result = $iconv_file; } unset($iconv_file, $NewEncoding, $NewFileOutput); } $seg_data = clean_file($jp_result, $url, $url_status['content'], $charSet, $use_nofollow, $use_robot, $can_leave_domain); } else { printNoDictionary($charSet, $cl); // no dictionary found for this charset } } // enter here only, if site / file is not yet UTF-8 coded or had already been converted to UTF-8 if ($charSet != "UTF-8" && $file) { $file = convertToUTF8($file, $charSet, $char_Set, $converter_dir); } // if activated in Admin backend, check for correct converting of $file into UTF-8 if ($utf8_verify) { $valid_utf8 = @iconv('UTF-8', 'UTF-8', $file) === $file; } if (!$valid_utf8) { $url_status['state'] = "<br />Invalid charset definition placed in meta tags of HTML header. Unable to convert the text into UTF-8<br />Indexing aborted for {$url}"; if ($server_char) { $url_status['state'] = "<br />Invalid charset definition supplied via HTTP by the client server. Unable to convert the text into UTF-8<br />Indexing aborted for {$url}"; } if ($use_prefcharset) { $url_status['state'] = "<br />Invalid charset definition placed Admin Settings.<br />Site was created with another charset<br />Indexing aborted for {$url}"; } printUrlStatus($url_status['state'], $command_line, $no_log); $file = ''; $deletable = 1; } else { if ($index_media == '1') { $newmd5sum = md5($file); // get md5 including links and title of media files } $data = clean_file($file, $url, $url_status['content'], $charSet, $use_nofollow, $use_robot, $can_leave_domain); //echo "\r\n\r\n<br>data Array:<br><pre>";print_r($data);echo "</pre>\r\n"; // index only links and their titles if ($only_links) { $media_links = '0'; $my_links = get_link_details($file, $url, $can_leave_domain, $data['base'], $media_links, $use_nofollow, $local_redir); $data['content'] = $my_links[0][0]; // define new content $data['fulltext'] = $my_links[0][0]; // define new content also for 'full text'; } // combine raw words plus segmented words if ($cn_seg == 1 || $jp_seg == 1 && $dic && !$js_link) { if ($debug != '0') { $seg_add = $seg_data[count] - $data[count]; // calculate segmentation result if ($seg_add > '0') { if ($charSet == 'EUC-KR' || $charSet == 'ISO10646-1933') { printSegKR($seg_add, $cl); } if ($charSet == 'SHIFT_JIS') { printSegJA($seg_add, $cl); } else { printSegCN($seg_add, $cl); } } /* echo "<br /><pre>Results of word segmentation:</pre>"; echo "<br />Unsegmented title :<br><pre>";print_r($data[title]);echo "</pre>"; echo "<br />Segmented title :<br><pre>";print_r($seg_data[title]);echo "</pre>"; echo "<br />Unsegmented full text:<br />$data[fulltext]<br />"; echo "<br />Segmented full text:<br />$seg_data[fulltext]"; */ } $data[content] = "" . $data[content] . "" . $seg_data[content] . ""; //$data[title] ="".$data[title]."".$seg_data[title].""; $data[description] = "" . $data[description] . "" . $seg_data[description] . ""; $data[keywords] = "" . $data[keywords] . "" . $seg_data[keywords] . ""; } // check if canonical redirection was found in page head $cano_link = '0'; if ($data['cano_link']) { //echo "\r\n\r\n<br /> url: '$url'<br />\r\n"; $cano_link = $db_con->real_escape_string($data['cano_link']); //echo "\r\n\r\n<br /> cano_link: '$cano_link'<br />\r\n"; if ($url != $cano_link) { // only new cano links are accepted $OKtoIndex = 0; $deletable = 1; $realnum--; if ($cano_link == "1") { printNoCanonical($cano_link, $cl); // if unable to extract redirection link } else { if ($data['refresh'] == '1') { printRefreshed($cano_link, $data['wait'], $cl); // if refresh meta tag was found in HTML head } else { printCanonical($cano_link, $cl); // if canonical link was found in HTML head } // do we already know this link in link-table $sql_query = "SELECT /* jfield 2 */ url from " . $mysql_table_prefix . "links where url like '{$cano_link}'"; $res = $db_con->query($sql_query); if ($debug && $db_con->errno) { $err_row = __LINE__ - 2; printf("<p><span class='red'> MySQL failure: %s \n<br /></span></p>", $db_con->error); if (__FUNCTION__) { printf("<p><span class='red'> Found in script: " . __FILE__ . " row: {$err_row} in function(): " . __FUNCTION__ . " <br /></span></p>"); } else { printf("<p><span class='red'> Found in script: " . __FILE__ . " row: {$err_row} <br /></span></p>"); } printf("<p><span class='red'> Script execution aborted. <br /></span>"); printf("<p><strong>Invalid query string, which caused the SQL error:</strong></p>"); echo "<p> {$sql_query} </p>"; exit; } $rows = $res->num_rows; if ($rows == 0) { // if not known in link-table, check if already known in temp-table $sql_query = "SELECT /* jfield 1 */ link from " . $mysql_table_prefix . "temp where link like '{$cano_link}'"; $res = $db_con->query($sql_query); if ($debug && $db_con->errno) { $err_row = __LINE__ - 2; printf("<p><span class='red'> MySQL failure: %s \n<br /></span></p>", $db_con->error); if (__FUNCTION__) { printf("<p><span class='red'> Found in script: " . __FILE__ . " row: {$err_row} in function(): " . __FUNCTION__ . " <br /></span></p>"); } else { printf("<p><span class='red'> Found in script: " . __FILE__ . " row: {$err_row} <br /></span></p>"); } printf("<p><span class='red'> Script execution aborted. <br /></span>"); printf("<p><strong>Invalid query string, which caused the SQL error:</strong></p>"); echo "<p> {$sql_query} </p>"; exit; } $rows = $res->num_rows; if ($rows == 0) { // not known in link-table, add new link if ($numoflinks <= $max_links) { $sql_query = "INSERT into " . $mysql_table_prefix . "temp (link, level, id) values ('{$cano_link}', '{$level}', '{$sessid}')"; $db_con->query($sql_query); } if ($debug && $db_con->errno) { $err_row = __LINE__ - 2; printf("<p><span class='red'> MySQL failure: %s \n<br /></span></p>", $db_con->error); if (__FUNCTION__) { printf("<p><span class='red'> Found in script: " . __FILE__ . " row: {$err_row} in function(): " . __FUNCTION__ . " <br /></span></p>"); } else { printf("<p><span class='red'> Found in script: " . __FILE__ . " row: {$err_row} <br /></span></p>"); } printf("<p><span class='red'> Script execution aborted. <br /></span>"); printf("<p><strong>Invalid query string, which caused the SQL error:</strong></p>"); echo "<p> {$sql_query} </p>"; exit; } } } } } $cano_link = '0'; // reset the cano flag } else { if ($index_media == '0') { $newmd5sum = md5($data['content']); // get md5 from cleaned full text only } if ($md5sum == $newmd5sum) { printStandardReport('md5notChanged', $command_line, $no_log); $OKtoIndex = 0; $realnum--; } else { mysqltest(); // check for duplicate page content $sql_query = "SELECT * from " . $mysql_table_prefix . "links where md5sum='{$newmd5sum}'"; $result = $db_con->query($sql_query); if ($debug && $db_con->errno) { $err_row = __LINE__ - 2; printf("<p><span class='red'> MySQL failure: %s \n<br /></span></p>", $db_con->error); if (__FUNCTION__) { printf("<p><span class='red'> Found in script: " . __FILE__ . " row: {$err_row} in function(): " . __FUNCTION__ . " <br /></span></p>"); } else { printf("<p><span class='red'> Found in script: " . __FILE__ . " row: {$err_row} <br /></span></p>"); } printf("<p><span class='red'> Script execution aborted. <br /></span>"); printf("<p><strong>Invalid query string, which caused the SQL error:</strong></p>"); echo "<p> {$sql_query} </p>"; exit; } if ($num_rows = $result->num_rows) { // display warning message and urls with duplicate content printStandardReport('duplicate', $command_line, $no_log); while ($row = $result->fetch_array(MYSQLI_ASSOC)) { $dups[] = $row['link_id']; } for ($i = 0; $i < $num_rows; $i++) { $link_id = $dups[$i]; //$num = $i+1; $sql_query = "SELECT * from " . $mysql_table_prefix . "links where link_id like '{$link_id}'"; $res1 = $db_con->query($sql_query); if ($debug && $db_con->errno) { $err_row = __LINE__ - 2; printf("<p><span class='red'> MySQL failure: %s \n<br /></span></p>", $db_con->error); if (__FUNCTION__) { printf("<p><span class='red'> Found in script: " . __FILE__ . " row: {$err_row} in function(): " . __FUNCTION__ . " <br /></span></p>"); } else { printf("<p><span class='red'> Found in script: " . __FILE__ . " row: {$err_row} <br /></span></p>"); } printf("<p><span class='red'> Script execution aborted. <br /></span>"); printf("<p><strong>Invalid query string, which caused the SQL error:</strong></p>"); echo "<p> {$sql_query} </p>"; exit; } $row = $res1->fetch_array(MYSQLI_NUM); $dup_url = urldecode($row[2]); $dup_url = $dup_url; $dup_url = @iconv($charSet, "UTF-8//IGNORE", $dup_url); if ($idna) { // Initialize the converter class $IDN = new idna_convert(array('idn_version' => 2008)); if ($conv_puny && strstr($dup_url, "xn--") && $idna) { $dup_url = $IDN->decode($dup_url); } } if ($clear == 1) { clean_resource($res, '03'); } printDupReport($dup_url, $command_line); } if ($dup_content == '0') { // enter here, if pages with duplicate content should not be indexed/re-indexed $OKtoIndex = 0; $realnum--; } else { $OKtoIndex = 1; } } } } //echo "\r\n\r\n<br>data array1:<br><pre>";print_r($data);echo "</pre>\r\n"; if (($md5sum != $newmd5sum || $reindex == 1) && $OKtoIndex == 1) { $urlparts = parse_addr($url); $newdomain = $urlparts['host']; $type = 0; if ($data['noindex'] == 1) { // remember this URlL, so it might not become another time a new link // check without scheme and www. $check_link = substr($check_link, stripos($url, "//") + 2); if (stristr($check_link, "www.")) { $check_link = substr($check_link, stripos($check_link, "www") + 4); } $sql_query = "SELECT url from " . $mysql_table_prefix . "links where url like '%{$check_link}'"; $res = $db_con->query($sql_query); if ($debug && $db_con->errno) { $err_row = __LINE__ - 2; printf("<p><span class='red'> MySQL failure: %s \n<br /></span></p>", $db_con->error); if (__FUNCTION__) { printf("<p><span class='red'> Found in script: " . __FILE__ . " row: {$err_row} in function(): " . __FUNCTION__ . " <br /></span></p>"); } else { printf("<p><span class='red'> Found in script: " . __FILE__ . " row: {$err_row} <br /></span></p>"); } printf("<p><span class='red'> Script execution aborted. <br /></span>"); printf("<p><strong>Invalid query string, which caused the SQL error:</strong></p>"); echo "<p> {$sql_query} </p>"; exit; } $known_link = $res->num_rows; if ($known_link != '1') { $sql_query = "INSERT into " . $mysql_table_prefix . "links (site_id, url, indexdate, size, md5sum, level) values ('{$site_id}', '{$url}', curdate(), '{$pageSize}', '{$newmd5sum}', '{$thislevel}')"; $db_con->query($sql_query); if ($debug && $db_con->errno) { $err_row = __LINE__ - 2; printf("<p><span class='red'> MySQL failure: %s \n<br /></span></p>", $db_con->error); if (__FUNCTION__) { printf("<p><span class='red'> Found in script: " . __FILE__ . " row: {$err_row} in function(): " . __FUNCTION__ . " <br /></span></p>"); } else { printf("<p><span class='red'> Found in script: " . __FILE__ . " row: {$err_row} <br /></span></p>"); } printf("<p><span class='red'> Script execution aborted. <br /></span>"); printf("<p><strong>Invalid query string, which caused the SQL error:</strong></p>"); echo "<p> {$sql_query} </p>"; exit; } } $OKtoIndex = 0; $deletable = 1; $realnum--; printStandardReport('metaNoindex', $command_line, $no_log); } if (!$js_link) { // JavaScript will not deliver keywords, only links are parsed $content = explode(" ", addslashes($data['content'])); //echo "\r\n\r\n<br>content array0:<br><pre>";print_r($content);echo "</pre>\r\n"; $acc_words[] = array(); $type = ''; // if Greek accents should be removed from Greek vowels if ($noacc_el) { foreach ($content as &$thisword) { $no_acc = remove_acc_el($thisword); if ($no_acc != $thisword) { $acc_words[] = $no_acc; } } } // if the other (Latin) accents should be removed from their vowels if ($vowels) { foreach ($content as $thisword) { $no_acc = remove_acc($thisword, ''); if ($no_acc != $thisword) { $acc_words[] = $no_acc; } } } // now add the words without accents to the total text content $content = array_merge($content, $acc_words); //echo "\r\n\r\n<br>content array0:<br><pre>";print_r($content);echo "</pre>\r\n"; // if ligatures should be equalized if ($liga) { $liga_words = array(); // will contain converted ligatures $phon_words = array(); // will contain converted phonetics // first: convert letters into latin ligatures foreach ($content as $thisword) { if ($thisword) { $liga_words[] = html_entity_decode($thisword, ENT_QUOTES, "UTF-8"); $thisword1 = $thisword; reset($latin_ligatures); while ($char = each($latin_ligatures)) { $thisword2 = preg_replace("/" . $char[0] . "/s", $char[1], $thisword1); // convert ligatures if ($thisword1 != $thisword2) { // break on first ligature $liga_words[] = html_entity_decode($thisword2, ENT_QUOTES, "UTF-8"); // collect new words with ligatures $thisword1 = $thisword2; // continue with the word, containing the ligatures //break; } } } } // second: convert all letters into phonetic transcriptions reset($liga_words); foreach ($liga_words as $thisword) { $thisword1 = $thisword; reset($phon_trans); while ($char = each($phon_trans)) { $thisword2 = preg_replace("/" . $char[0] . "/s", $char[1], $thisword1); // convert into phonetics if ($thisword1 != $thisword2) { // break on first ligature $phon_words[] = html_entity_decode($thisword2, ENT_QUOTES, "UTF-8"); // collect new words with phonetics $thisword1 = $thisword2; // continue with the word, containing the ligatures //break; } } } $liga_words = array_merge($liga_words, $phon_words); // add all phoneticss to the liga array // now vice versa: convert latin ligatures and phonetic transcriptions into standard letters reset($content); $not_liga_words = array(); foreach ($content as $thisword) { if ($thisword) { // first: convert latin ligatures into standard letters $thisword1 = superentities($thisword, ENT_QUOTES, "UTF-8"); reset($latin_ligatures); while ($char = each($latin_ligatures)) { $thisword2 = preg_replace("/" . $char[1] . "/s", $char[0], $thisword1); // re-convert ligatures if ($thisword1 != $thisword2) { $not_liga_words[] = html_entity_decode($thisword2, ENT_QUOTES, "UTF-8"); // collect new words without ligatures $thisword1 = $thisword2; // continue with the word, containing the ligature } } } //echo "\r\n\r\n<br>not_liga_words Array:<br><pre>";print_r($not_liga_words);echo "</pre>\r\n"; // second: convert phonetic transcriptions into standard letters reset($not_liga_words); $not_phon_words = array(); foreach ($not_liga_words as $thisword) { $thisword1 = superentities($thisword, ENT_QUOTES, "UTF-8"); reset($phon_trans); while ($char = each($phon_trans)) { $thisword2 = preg_replace("/" . $char[1] . "/s", $char[0], $thisword1); // re-convert sphonetic if ($thisword1 != $thisword2) { $not_phon_words[] = html_entity_decode($thisword2, ENT_QUOTES, "UTF-8"); // collect new words without phonetics $thisword1 = $thisword2; // continue with the word, containing the phonetic trans. } } } } $not_words = array_merge($not_liga_words, $not_phon_words); // add all together $content = array_merge($liga_words, $not_words); // add all ligatures and re-converted letters to the content array } $wordarray = unique_array($content); } //echo "\r\n\r\n<br>wordarray0:<br><pre>";print_r($wordarray);echo "</pre>\r\n"; if ($smp != 1) { if ($data['nofollow'] != 1 && $cano_link == '0') { $media_links = '0'; $links = array(); if (!$document) { // don't try to find links in PDFs and other pure documents $links = get_links($file, $url, $can_leave_domain, $data['base'], $media_links, $use_nofollow, $local_redir, $url_reloc, $charSet); } if ($links[0]) { $links = distinct_array($links); $all_links = count($links); if ($all_links > $max_links) { $all_links = $max_links; } $links = array_slice($links, 0, $max_links); if ($realnum < $max_links) { $numoflinks = 0; //if there are any new links, add to the temp table, but only if there isn't such url already if ($links[0]) { reset($links); $tmp_urls = get_temp_urls($sessid); // reload previous temp // echo "\r\n\r\n<br>tmp_urls array:<br><pre>";print_r($tmp_urls);echo "</pre>\r\n"; if ($debug == '2') { // if debug mode, show details printStandardReport('newLinks', $command_line, $no_log); } while ($thislink = each($links)) { // echo "\r\n\r\n<br>thislink array:<br><pre>";print_r($thislink);echo "</pre>\r\n"; // ignore error (message) links and self linking if (strstr($thislink[1], "//") && $thislink[1] != $url) { // find new domains for _addurl table if ($auto_add && $can_leave_domain) { $all_link = parse_all_url($thislink[1]); // only the domain will be stored as new URL into addurl table $dom_link = $all_link['host']; // reduce to domain name and tld $new_link = str_replace("www.", "", $dom_link); // use the complete URL //$dom_link = $thislink[1]; // use only the domain $dom_link = $all_link['scheme'] . "://" . $dom_link; $banned = ''; mysqltest(); // check whether URL is already known in sites table $sql_query = "SELECT url from " . $mysql_table_prefix . "sites where url like '%{$new_link}%'"; $res1 = $db_con->query($sql_query); // check whether URL is already known in addurl table $sql_query = "SELECT url from " . $mysql_table_prefix . "addurl where url like '%{$new_link}%'"; $res2 = $db_con->query($sql_query); // check whether URL is banned $sql_query = "SELECT domain from " . $mysql_table_prefix . "banned where domain like '%{$new_link}%'"; $res3 = $db_con->query($sql_query); if ($res3->num_rows) { $banned = "1"; } if ($res1->num_rows == 0 && $res2->num_rows == 0 && $res3->num_rows == 0) { // add new domain into _addurl table $sql_query = "INSERT into " . $mysql_table_prefix . "addurl (url, description, account) values ('{$dom_link}', '{$comment}', '{$admin_email}')"; $db_con->query($sql_query); } } // check whether thislink is already known as a link ( might happen by means of relocated URLs) $res4 = ''; $res5 = ''; $known_link = ''; $known_temp = ''; $check_link = $thislink[1]; // i don't believe the "like" is necessary here and it slows down indexing // // check without scheme and www. // $check_link = substr($check_link, stripos($check_link, "//")+2); // if (stristr($check_link, "www.")) { // $check_link = substr($check_link, stripos($check_link, "www")+4); // } // // $sql_query = "SELECT /* jfield 3 */ url from ".$mysql_table_prefix."links where url like '%$check_link'"; // $res4 = $db_con->query($sql_query); // // $known_link = $res4->num_rows;; // // $sql_query = "SELECT /* jfield 4 */ link from ".$mysql_table_prefix."temp where link like '%$check_link'"; // $res5 = $db_con->query($sql_query); // if ($debug > 0 && $db_con->errno) { // printf("MySQL failure: %s\n", $db_con->error); // echo "<br />Script aborted."; // exit; // } // $known_temp = $res5->num_rows;; $sql_query = "SELECT /* jfield 3 */ url from " . $mysql_table_prefix . "links where url = '{$check_link}'"; $res4 = $db_con->query($sql_query); $known_link = $res4->num_rows; $sql_query = "SELECT /* jfield 4 */ link from " . $mysql_table_prefix . "temp where link = '{$check_link}'"; $res5 = $db_con->query($sql_query); if ($debug > 0 && $db_con->errno) { printf("MySQL failure: %s\n", $db_con->error); echo "<br />Script aborted."; exit; } $known_temp = $res5->num_rows; // if this is a new link not yet known or banned, add this new link to the temp table if ($tmp_urls[$thislink[1]] != 1 && !$res1 && !$known_link && !$known_temp && !$banned) { $tmp_urls[$thislink[1]] = 1; $numoflinks++; if ($debug == '2') { $act_link = rawurldecode($thislink[1]); // make it readable $act_link = stripslashes($act_link); printNewLinks($act_link, $cl); } mysqltest(); $sql_query = "INSERT into " . $mysql_table_prefix . "temp (link, level, id) values ('{$thislink['1']}', '{$level}', '{$sessid}')"; if ($numoflinks <= $max_links) { $db_con->query($sql_query); } } } } } } } } else { printStandardReport('noFollow', $command_line, $no_log); } unset($file); } // JFIELD at this point, the URL in the DB is good // echo "<h1>DONE</h1>"; // exit; // if we should index only the files as defined in docs list if ($only_docs) { $OKtoIndex = ''; foreach ($docs as $thisdoc) { if (strstr($urlparts['path'], $thisdoc)) { $OKtoIndex = "1"; } } if (!$OKtoIndex) { printStandardReport('noDoclist', $command_line, $no_log); } } if ($OKtoIndex == 1) { if ($link_check == 0) { $title = $data['title']; $host = $data['host']; $path = $data['path']; $fulltxt = $data['fulltext']; $desc = substr($data['description'], 0, 1024); // extract domain $url_parts = parse_all_url($url); $hostname = $url_parts[host]; // rebuild domain for localhost applications if ($hostname == 'localhost') { $host1 = str_replace($local, '', $url); } $pos = strpos($host1, "/"); // on local server delete all behind the / // will work for localhost URLs like http://localhost/publizieren/japan1/index.htm // will fail for localhost URLs like http://localhost/publizieren/externe/japan2/index.htm if ($pos) { $host1 = substr($host1, 0, $pos); // build full adress again, now only local domain } if ($hostname == 'localhost') { $domain_for_db = "" . $local . "" . $host1 . "/"; // complete URL $domain_for_db = str_replace("http://", "", $domain_for_db); //$domain_for_db = $host1; } else { //$domain_for_db = ("$url_parts[scheme]://".$hostname."/"); // complete URL $domain_for_db = $hostname; } if (isset($domain_arr[$domain_for_db])) { $dom_id = $domain_arr[$domain_for_db]; } else { mysqltest(); $sql_query = "INSERT into " . $mysql_table_prefix . "domains (domain) values ('{$domain_for_db}')"; $db_con->query($sql_query); if ($debug && $db_con->errno) { $err_row = __LINE__ - 2; printf("<p><span class='red'> MySQL failure: %s \n<br /></span></p>", $db_con->error); if (__FUNCTION__) { printf("<p><span class='red'> Found in script: " . __FILE__ . " row: {$err_row} in function(): " . __FUNCTION__ . " <br /></span></p>"); } else { printf("<p><span class='red'> Found in script: " . __FILE__ . " row: {$err_row} <br /></span></p>"); } printf("<p><span class='red'> Script execution aborted. <br /></span>"); printf("<p><strong>Invalid query string, which caused the SQL error:</strong></p>"); echo "<p> {$sql_query} </p>"; exit; } $dom_id = $db_con->insert_id; $domain_arr[$domain_for_db] = $dom_id; } if (!$js_link) { // JavaScript will not deliver keywords, only links are parsed reset($wordarray); if ($case_sensitive == '0') { foreach ($wordarray as &$value) { $value[1] = lower_ent($value[1]); $value[1] = lower_case($value[1]); // convert keywords to lower case } } $wordarray = calc_weights($wordarray, $title, $host, $path, $data['keywords'], $url_parts); } else { $wordarray = ''; } //if there are words to index, add the link to the database, get its id, and add the word + their relation if (is_array($wordarray) && count($wordarray) >= $min_words_per_page) { $OKtoSave = 1; if ($use_white1 == '1') { // check if content of page matches ANY word in whitelist $found = '0'; foreach ($whitelist as $key => $val1) { reset($wordarray); while ($thisword = each($wordarray)) { $word = trim($thisword[1][1]); if (strcasecmp($val1, $word) == 0) { $found = '1'; } } } if ($found == '0') { printStandardReport('noWhitelist', $command_line, $no_log); $OKtoSave = 0; $realnum--; } } if ($use_white2 == '1') { // check if content of page matches ALL words in whitelist $all = count($whitelist); $found = '0'; $found_this = '0'; foreach ($whitelist as $key => $val2) { reset($wordarray); while ($thisword = each($wordarray)) { $word = trim($thisword[1][1]); if (strcasecmp($val2, $word) == 0) { $found_this = '1'; } } if ($found_this != '0') { $found++; $found_this = '0'; } } if ($found != $all) { printStandardReport('noWhitelist', $command_line, $no_log); $OKtoSave = 0; $realnum--; } } if ($use_black == '1') { $found = '0'; // check if content of page matches ANY string in blacklist foreach ($blacklist as $key => $val3) { $met = stripos($data[fulltext], $val3); if ($met) { $found = '1'; } } if ($found == '1') { printStandardReport('matchBlacklist', $command_line, $no_log); $OKtoSave = 0; $realnum--; $url_status['black'] = 1; return $url_status; } } // if activated in Admin backend, create a thumbnail of this URL if ($OKtoSave && $hostname != 'localhost' && $webshot) { $shot = ''; // will contain the png webshot $img = new webshots(); $shot = $img->url_to_image($url); if ($debug && stristr($shot, "error: #")) { $shot_warn = "<br />Unable to create the webshot because of " . $shot; printWarning($shot_warn, $command_line, $no_log); } else { $shot = $db_con->real_escape_string($shot); } } if ($md5sum == '' || $md5sum == '' && $url_status['relocate']) { // enter here for new page (unknown link) OR for new relocated URL(so it will become a new link) // title, description and fulltxt are already escaped in function clean_file(); $url = $db_con->real_escape_string($url); // jfield says: messy char decoding earlier // leaves crap here that fudges up the works $title_enc = mb_detect_encoding($title); if (mb_detect_encoding($title) != "UTF-8") { $title = iconv($title_enc, "UTF-8", $title); } $fulltxt = substr($fulltxt, 0, 100000); // we've got to stop somewhere $fulltxt_enc = mb_detect_encoding($fulltxt); if (mb_detect_encoding($title) != "UTF-8") { $fulltxt = iconv($fulltxt_enc, "UTF-8", $fulltxt); } mysqltest(); $sql_query = "INSERT into " . $mysql_table_prefix . "links (site_id, url, title, description, fulltxt, indexdate, size, md5sum, level, webshot) values ('{$site_id}', '{$url}', '{$title}', left('{$desc}', 255), '{$fulltxt}', curdate(), '{$pageSize}', '{$newmd5sum}', '{$thislevel}', '{$shot}')"; $db_con->query($sql_query); if ($debug && $db_con->errno) { $err_row = __LINE__ - 2; printf("<p><span class='red'> MySQL failure: %s \n<br /></span></p>", $db_con->error); if (__FUNCTION__) { printf("<p><span class='red'> Found in script: " . __FILE__ . " row: {$err_row} in function(): " . __FUNCTION__ . " <br /></span></p>"); } else { printf("<p><span class='red'> Found in script: " . __FILE__ . " row: {$err_row} <br /></span></p>"); } printf("<p><span class='red'> Script execution aborted. <br /></span>"); printf("<p><strong>Invalid query string, which caused the SQL error:</strong></p>"); echo "<p> {$sql_query} </p>"; //exit; // jfield: let's keep going return; } $sql_query = "SELECT link_id from " . $mysql_table_prefix . "links where url='{$url}'"; $result = $db_con->query($sql_query); if ($debug && $db_con->errno) { $err_row = __LINE__ - 2; printf("<p><span class='red'> MySQL failure: %s \n<br /></span></p>", $db_con->error); if (__FUNCTION__) { printf("<p><span class='red'> Found in script: " . __FILE__ . " row: {$err_row} in function(): " . __FUNCTION__ . " <br /></span></p>"); } else { printf("<p><span class='red'> Found in script: " . __FILE__ . " row: {$err_row} <br /></span></p>"); } printf("<p><span class='red'> Script execution aborted. <br /></span>"); printf("<p><strong>Invalid query string, which caused the SQL error:</strong></p>"); echo "<p> {$sql_query} </p>"; exit; } $row = $result->fetch_array(MYSQLI_NUM); $link_id = $row[0]; if ($OKtoSave) { // store link details, if not yet known (during reindex) if ($only_links) { // extract domain of current page delivering the new links $url_parts = parse_all_url($url); $hostname = $url_parts[host]; if ($hostname == 'localhost') { // rebuild domain for localhost applications $host1 = str_replace($local, '', $url); } $pos = strpos($host1, "/"); // on local server delete all behind the / // will work for localhost URLs like http://localhost/publizieren/japan1/index.htm // will fail for localhost URLs like http://localhost/publizieren/externe/japan2/index.htm if ($pos) { $host1 = substr($host1, 0, $pos); // build full adress again, now only local domain } if ($hostname == 'localhost') { $domain_db = "" . $local . "" . $host1 . "/"; // complete URL $domain_db = str_replace("http://", "", $domain_db); //$domain_db = $host1; } else { //$domain_db = ("$url_parts[scheme]://".$hostname."/"); // complete URL $domain_db = $hostname; } // now store all link details into db foreach ($my_links as $found_link) { // but only if we have found a title if ($found_link[3]) { mysqltest(); // check whether URL is already known in sites table $sql_query = "SELECT title from " . $mysql_table_prefix . "link_details where link_id like '{$link_id}' and url like '%{$found_link['2']}%'"; $res1 = $db_con->query($sql_query); if ($res1->num_rows == 0) { // must be new link $sql_query = "INSERT into " . $mysql_table_prefix . "link_details (link_id, url, title, indexdate, domain) values ('{$link_id}', '{$found_link['2']}', '{$found_link['3']}', now(), '{$domain_db}')"; $db_con->query($sql_query); } } } } if ($debug == '2') { // if debug mode, show details printStandardReport('newKeywords', $command_line, $no_log); } save_keywords($wordarray, $link_id, $dom_id); } mysqltest(); if ($index_media == '1' && $OKtoSave) { // find media content only if there was no conflict with text (white and/or blacklist) include "index_media.php"; // try to find media files } mysqltest(); if ($debug == '2') { printStandardReport('indexed1', $command_line, $no_log); } else { printStandardReport('indexed', $command_line, $no_log); } } else { if ($md5sum != '' && $md5sum != $newmd5sum && $OKtoSave) { //if page has changed, start updating mysqltest(); $sql_query = "SELECT link_id from " . $mysql_table_prefix . "links where url='{$url}'"; $result = $db_con->query($sql_query); if ($debug && $db_con->errno) { $err_row = __LINE__ - 2; printf("<p><span class='red'> MySQL failure: %s \n<br /></span></p>", $db_con->error); if (__FUNCTION__) { printf("<p><span class='red'> Found in script: " . __FILE__ . " row: {$err_row} in function(): " . __FUNCTION__ . " <br /></span></p>"); } else { printf("<p><span class='red'> Found in script: " . __FILE__ . " row: {$err_row} <br /></span></p>"); } printf("<p><span class='red'> Script execution aborted. <br /></span>"); printf("<p><strong>Invalid query string, which caused the SQL error:</strong></p>"); echo "<p> {$sql_query} </p>"; exit; } $row = $result->fetch_array(MYSQLI_NUM); $link_id = $row[0]; $sql_query = "DELETE from " . $mysql_table_prefix . "link_keyword where link_id={$link_id}"; $db_con->query($sql_query); if ($debug && $db_con->errno) { $err_row = __LINE__ - 2; printf("<p><span class='red'> MySQL failure: %s \n<br /></span></p>", $db_con->error); if (__FUNCTION__) { printf("<p><span class='red'> Found in script: " . __FILE__ . " row: {$err_row} in function(): " . __FUNCTION__ . " <br /></span></p>"); } else { printf("<p><span class='red'> Found in script: " . __FILE__ . " row: {$err_row} <br /></span></p>"); } printf("<p><span class='red'> Script execution aborted. <br /></span>"); printf("<p><strong>Invalid query string, which caused the SQL error:</strong></p>"); echo "<p> {$sql_query} </p>"; exit; } if ($debug == '2') { // if debug mode, show details printStandardReport('newKeywords', $command_line, $no_log); } save_keywords($wordarray, $link_id, $dom_id); $sql_query = "UPDATE " . $mysql_table_prefix . "links set title='{$title}', description ='{$desc}', fulltxt = '{$fulltxt}', indexdate=now(), size = '{$pageSize}', md5sum='{$newmd5sum}', level='{$thislevel}', webshot='{$shot}' where link_id='{$link_id}'"; mysqltest(); $db_con->query($sql_query); if ($debug && $db_con->errno) { $err_row = __LINE__ - 2; printf("<p><span class='red'> MySQL failure: %s \n<br /></span></p>", $db_con->error); if (__FUNCTION__) { printf("<p><span class='red'> Found in script: " . __FILE__ . " row: {$err_row} in function(): " . __FUNCTION__ . " <br /></span></p>"); } else { printf("<p><span class='red'> Found in script: " . __FILE__ . " row: {$err_row} <br /></span></p>"); } printf("<p><span class='red'> Script execution aborted. <br /></span>"); printf("<p><strong>Invalid query string, which caused the SQL error:</strong></p>"); echo "<p> {$sql_query} </p>"; exit; } if ($index_media == '1') { include "index_media.php"; // try to find media files } if ($debug == '2') { printStandardReport('re-indexed1', $command_line, $no_log); } } } } else { if ($js_link) { printStandardReport('js_content', $command_line, $no_log); } else { printStandardReport('minWords', $command_line, $no_log); } $realnum--; } } else { printStandardReport('link_okay', $command_line, $no_log); } unset($file, $title, $fulltxt, $desc); $wordarray = array(); $data = array(); $seg_data = array(); } } } } else { $deletable = 1; //printUrlStatus($url_status['state'], $command_line, $no_log); } mysqltest(); if ($url_status['relocate']) { // remove this relocated URL from temp table, because it is indexed now $sql_query = "DELETE from " . $mysql_table_prefix . "temp where link = '{$url}' AND id = '{$sessid}'"; $db_con->query($sql_query); if ($debug && $db_con->errno) { $err_row = __LINE__ - 2; printf("<p><span class='red'> MySQL failure: %s \n<br /></span></p>", $db_con->error); if (__FUNCTION__) { printf("<p><span class='red'> Found in script: " . __FILE__ . " row: {$err_row} in function(): " . __FUNCTION__ . " <br /></span></p>"); } else { printf("<p><span class='red'> Found in script: " . __FILE__ . " row: {$err_row} <br /></span></p>"); } printf("<p><span class='red'> Script execution aborted. <br /></span>"); printf("<p><strong>Invalid query string, which caused the SQL error:</strong></p>"); echo "<p> {$sql_query} </p>"; exit; } } if ($reindex == 1 && $deletable == 1) { check_for_removal($url); } else { if ($reindex == 1) { } } if (!isset($all_links)) { $all_links = 0; } if (!isset($numoflinks)) { $numoflinks = 0; } // if valid sitemap found, or canonical link, or something else, no LinkReport if ($smp != 1 && $OKtoIndex == 1 && $url_status['state'] == 'ok') { printLinksReport($numoflinks, $all_links, $command_line); } // remove the URL, which haas been idexed now from temp table. mysqltest(); $sql_query = "DELETE from " . $mysql_table_prefix . "temp where link = '{$url}' AND id = '{$sessid}'"; $db_con->query($sql_query); if ($debug && $db_con->errno) { $err_row = __LINE__ - 2; printf("<p><span class='red'> MySQL failure: %s \n<br /></span></p>", $db_con->error); if (__FUNCTION__) { printf("<p><span class='red'> Found in script: " . __FILE__ . " row: {$err_row} in function(): " . __FUNCTION__ . " <br /></span></p>"); } else { printf("<p><span class='red'> Found in script: " . __FILE__ . " row: {$err_row} <br /></span></p>"); } printf("<p><span class='red'> Script execution aborted. <br /></span>"); printf("<p><strong>Invalid query string, which caused the SQL error:</strong></p>"); echo "<p> {$sql_query} </p>"; exit; } return $url_status; }
/** * Decode an internationalized domain name * * @param string $strDomain The domain name * * @return string The decoded domain name */ public static function decode($strDomain) { $objIdn = new \idna_convert(); return $objIdn->decode($strDomain); }
function parser($url, $url_str, $site, $str_query) { global $maxCountOfRequest, $countOfRequest, $count_tablename, $captcha_tablename, $activity_tablename, $time_start, $endUrl, $user_id, $delta, $countRequest, $seacher, $img_captcha; //-------------------проверяем чтобы количество обращений не превышало допустимое----------------------------------------------- if ($countOfRequest >= $maxCountOfRequest) { $query = "SELECT accessdate FROM {$count_tablename} WHERE user_id = '{$user_id}' AND seacher = '{$seacher}'"; $result = mysql_query($query); if (!$result) { error_message(mysql_error()); } $query_data = mysql_fetch_array($result); $accessdate = $query_data['accessdate']; // $todayU = intval(date("U")) + (!date('I')) * 60 * 60; $todayU = intval(date("U")); $accessU = intval(strtotime($accessdate)); // в россии нет зимнего времени // if (!date('I')) // $winter = 1; // else // $winter = 0; // $todayDate = mktime(date("H") + $winter, date("i"), date("s"), date("m"), date("d"), date("Y")); $todayDate = mktime(date("H"), date("i"), date("s"), date("m"), date("d"), date("Y")); $accessdate = date("Y-m-d H:i:s", $todayDate); $diff = $todayU - $accessU; /* * Если перерыв больше $delta сбрасываем счетчик и записываем новое время * при последующем обновлении count в $count_tablename время не обновляется до истечения времени $delta */ if ($diff > $delta) { $countOfRequest = 0; $query = "UPDATE {$count_tablename} SET count = '0', accessdate = '{$accessdate}' WHERE user_id = '{$user_id}' AND seacher = '{$seacher}'"; $result = mysql_query($query); if (!$result) { error_message(mysql_error()); } } else { $query = "UPDATE {$count_tablename} SET count = '{$countOfRequest}' WHERE user_id = '{$user_id}' AND seacher = '{$seacher}'"; $result = mysql_query($query); if (!$result) { error_message(mysql_error()); } $diff = $delta - $diff; $houer = floor($diff / (60 * 60)); $min = floor($diff / 60) - $houer * 60; $sec = $diff - $houer * 60 * 60 - $min * 60; $time = $houer . ":" . $min . ":" . $sec; echo $time; //---------------------Говорим что закончили парсинг--------------------- // $query = "UPDATE $activity_tablename SET bool = '0' WHERE seacher = '$seacher'"; // $result = mysql_query($query); // // if (!$result) { // error_message(mysql_error()); // } exit; } } //------------------------------------------------------------------------------------------------------------------------- $response = curl($url_str, $url); //подключаем регулярные выражения поиска сайтов и навигации require 'google_regexp.php'; if (count($matches[0]) == 0) { //парсим страницу с капчей preg_match_all('/<img src="([^>"]+?)"[^>]+?>/i', $response, $matches); //URL of image with captcha $imgSrc = 'http://www.' . $url . $matches[1][0]; preg_match_all('/<form action="([^>"]+?)"[^>]+?>/i', $response, $matches); //Value of Input.continue $action = $matches[1][0]; preg_match_all('/<input type="hidden" name="continue" value="([^>]+?)">/i', $response, $matches); //Value of Input.continue $continue = urlencode($matches[1][0]); preg_match_all('/<input type="hidden" name="id" value="([^>]+?)">/i', $response, $matches); //Value of Input.id $id = $matches[1][0]; preg_match_all('/<input type="submit" name="submit" value="([^>"]+?)"[^>]+?>/i', $response, $matches); //Value of Input.submit $submit = $matches[1][0]; if ($id && $continue) { // т.е страница с капчей //сохраним $countOfRequest в базу $query = "UPDATE {$count_tablename} SET count = '{$countOfRequest}' WHERE user_id = '{$user_id}' AND seacher = '{$seacher}'"; $result = mysql_query($query); if (!$result) { error_message(mysql_error()); } //---------------------Говорим что закончили парсинг---------------- // $query = "UPDATE $activity_tablename SET bool = '0' WHERE seacher = '$seacher'"; // $result = mysql_query($query); // // if (!$result) { // error_message(mysql_error()); // } // //---------------------Говорим что выдана страница скапчей---------- // // в россии нет зимнего времени // if (!date('I')) // $winter = 1; // else // $winter = 0; // $todayDate = mktime(date("H") + $winter, date("i"), date("s"), date("m"), date("d"), date("Y")); // $accessdate = date("Y-m-d H:i:s", $todayDate); // // $query = "UPDATE $captcha_tablename SET bool = '1', accessdate = '$accessdate' WHERE seacher = '$seacher'"; // $result = mysql_query($query); // // if (!$result) { // error_message(mysql_error()); // } // // echo 0; save_image($imgSrc, $url, $img_captcha); $img_captcha = dirname($_SERVER['PHP_SELF']) . '/' . $img_captcha; $todayU = intval(date("U")); $htmlCaptcha = "<form action='sendCaptcha.php' method='Get'>\n <img src='{$img_captcha}" . "?date={$todayU}'" . "/>\n <div id='reload' onClick='reloadGoogle(" . '"' . $url_str . '"' . ");'>reload</div><br/>\n <input type = 'text' id = 'captcha' name = 'captcha' value = '' size = '12'/><br/>\n <input type = 'hidden' id = 'action' name = 'action' value = '{$action}'/>\n <input type = 'hidden' id = 'continue' name = 'continue' value = '{$continue}'/>\n <input type = 'hidden' id = 'id' name = 'id' value = '{$id}'/>\n <input type = 'hidden' id = 'user_id' name = 'user_id' value = '{$user_id}'/>\n <input type = 'hidden' id = 'submitG' name = 'submitG' value = '{$submit}'/>\n <input value = 'Отправить' id = 'submit2' type = 'submit' onClick = 'SubmitCaptchaGoogle(" . '"' . $url_str . '"' . ");return false;'/><br/>\n\n </form>"; print_r($htmlCaptcha); exit; } else { echo 'regexp error'; return 0; } } $countOfRequest++; $countRequest++; foreach ($matches_nav[1] as $i => $v) { $href_nav[$page_nav[1][$i]] = "http://www." . $url . $v; } foreach ($matches[1] as $v) { $href[] = $v; } //$href - array of URL of site /* echo "url=".$url."</br>"; echo "str_query=".$str_query."</br>"; echo "url_str=".$url_str."</br></br>"; */ //Поиск сайта foreach ($href as $i => $value) { preg_match('/^(www.)?([^\\/]+)/i', $value, $matchesSite); preg_match('/^(www.)?(.+)/i', $value, $matchesSiteFull); $i++; //Пуникод - рускоязычные домены в utf-8 $punycode = $matchesSite[2]; $punycodeFull = $matchesSiteFull[2]; $idn = new idna_convert(array('idn_version' => 2008)); $punycode = stripos($punycode, 'xn--') !== false ? $idn->decode($punycode) : $idn->encode($punycode); $punycodeFull = stripos($punycodeFull, 'xn--') !== false ? $idn->decode($punycodeFull) : $idn->encode($punycodeFull); $arrayHref = explode('/', $value); $value = ''; foreach ($arrayHref as $val) { if ($val) { $val = stripos($val, 'xn--') !== false ? $idn->decode($val) : $idn->encode($val); $value .= $val . '/'; } } $punycode = mb_strtolower($punycode, "UTF-8"); $punycodeFull = mb_strtolower($punycodeFull, "UTF-8"); $site = mb_strtolower($site, "UTF-8"); // echo"<br/>"; // echo $punycode; // echo"<br/>"; // echo $site; // echo"<br/>"; if ($punycode == $site || $punycodeFull == $site) { $statistic = array("statistic", $i, $site, $value); return $statistic; } } if (is_array($href_nav)) { return $href_nav; } else { return 0; } //просмотр клиентских HTTP заголовков /* echo"<b>HTTP Headers:</b></br>"; foreach (getallheaders() as $name => $value) { echo "$name: $value</br>"; } echo"</br>"; */ //Вывод URL найденых сайтов и следующих сраниц выдачи /* $i=1; foreach($href as $val){ echo"<b>$i -- $val</b></br>"; $i++; } foreach($href_nav as $key => $val){ echo"<b>$key => $val</b></br>"; } */ }
/** * Transforms a Punycode string to a UTF-8 string * * @param string $punycodeString The Punycode string to transform * * @return string The UF-8 URL * * @since 3.1.2 */ public static function fromPunycode($punycodeString) { $idn = new idna_convert(); return $idn->decode($punycodeString); }
/* //else if ($forum_user['id'] == $id || ($forum_user['is_admmod'] && $user['email_setting'] == '2')) // $forum_page['user_private']['forum-mail'] = '<li><span>'.$lang_profile['E-mail'].': <a href="'.forum_link($forum_url['email'], $id).'">'.$lang_profile['Send forum e-mail'].'</a></span></li>'; */ // Website if ($user['url'] != '') { $url_source = $user['url']; // IDNA url handling if (defined('FORUM_SUPPORT_PCRE_UNICODE') && defined('FORUM_ENABLE_IDNA')) { // Load the IDNA class for international url handling require_once FORUM_ROOT . 'include/idna/idna_convert.class.php'; $idn = new idna_convert(); $idn->set_parameter('encoding', 'utf8'); $idn->set_parameter('strict', false); if (preg_match('!^(https?|ftp|news){1}' . preg_quote('://xn--', '!') . '!', $url_source)) { $user['url'] = $idn->decode($url_source); } else { $url_source = $idn->encode($url_source); } } if ($forum_config['o_censoring'] == '1') { $user['url'] = censor_words($user['url']); } $url_source = forum_htmlencode($url_source); $user['url'] = forum_htmlencode($user['url']); $forum_page['url'] = '<a href="' . $url_source . '" class="external url" rel="me">' . $user['url'] . '</a>'; $forum_page['user_contact']['website'] = '<li><span>' . $lang_profile['Website'] . ': ' . $forum_page['url'] . '</span></li>'; } if ($forum_user['is_admmod']) { $forum_page['user_private']['ip'] = '<li><span>' . $lang_profile['IP'] . ': <a href="' . forum_link($forum_url['get_host'], forum_htmlencode($user['registration_ip'])) . '">' . forum_htmlencode($user['registration_ip']) . '</a></span></li>'; }
/** * convert addresses into array with name/address * * @param string $_addresses * @param idna_convert $_punycodeConverter * @return array */ public static function convertAddresses($_addresses, $_punycodeConverter = NULL) { $result = array(); if (!empty($_addresses)) { $addresses = Tinebase_Mail::parseAdresslist($_addresses); if (is_array($addresses)) { foreach ($addresses as $address) { if ($_punycodeConverter !== NULL && preg_match('/@xn--/', $address['address'])) { $email = $_punycodeConverter->decode($address['address']); if (Tinebase_Core::isLogLevel(Zend_Log::DEBUG)) { Tinebase_Core::getLogger()->debug(__METHOD__ . '::' . __LINE__ . ' Converted email from punycode ' . $address['address'] . ' to ' . $email); } } else { $email = $address['address']; } $result[] = array('email' => trim($email), 'name' => $address['name']); } } } return $result; }
// Relative to ABSPATH. For back compat. define("CSP_PO_PLUGINPATH", "/" . dirname(plugin_basename(__FILE__))); define('CSP_PO_TEXTDOMAIN', 'codestyling-localization'); define('CSP_PO_BASE_URL', plugins_url(CSP_PO_PLUGINPATH)); //Bugfix: ensure valid JSON requests at IDN locations! //Attention: Google Chrome and Safari behave in different way (shared WebKit issue or all other are wrong?)! list($csp_domain, $csp_target) = csp_split_url(function_exists("admin_url") ? rtrim(admin_url(), '/') : rtrim(get_site_url() . '/wp-admin/', '/')); define('CSP_SELF_DOMAIN', $csp_domain); if (stripos($_SERVER['HTTP_USER_AGENT'], 'chrome') !== false || stripos($_SERVER['HTTP_USER_AGENT'], 'safari') !== false || version_compare(phpversion(), '5.2.1', '<')) { define('CSP_PO_ADMIN_URL', strtolower($csp_domain) . $csp_target); } else { if (!class_exists('idna_convert')) { require_once 'includes/idna_convert.class.php'; } $idn = new idna_convert(); define('CSP_PO_ADMIN_URL', $idn->decode(strtolower($csp_domain), 'utf8') . $csp_target); } define('CSP_PO_BASE_PATH', WP_PLUGIN_DIR . CSP_PO_PLUGINPATH); define('CSP_PO_MIN_REQUIRED_WP_VERSION', '2.5'); define('CSP_PO_MIN_REQUIRED_PHP_VERSION', '4.4.2'); register_activation_hook(__FILE__, 'csp_po_install_plugin'); add_action('plugins_loaded', 'csp_trace_php_errors', 0); } function csp_is_multisite() { return isset($GLOBALS['wpmu_version']) || function_exists('is_multisite') && is_multisite() || function_exists('wp_get_mu_plugins') && count(wp_get_mu_plugins()) > 0; } if (function_exists('csp_po_install_plugin')) { //rewrite and extend the error messages displayed at failed activation //fall trough, if it's a real code bug forcing the activation error to get the appropriated message instead if (isset($_GET['action']) && isset($_GET['plugin']) && $_GET['action'] == 'error_scrape' && $_GET['plugin'] == plugin_basename(__FILE__)) {
/** * Decode an internationalized domain name * @param string * @return string */ protected function idnaDecode($strDomain) { if (!class_exists('idna_convert', false)) { require_once TL_ROOT . '/plugins/idna/idna_convert.class.php'; } $objIdn = new idna_convert(); return $objIdn->decode($strDomain); }
function decode_idna(&$input) { $IDN = new idna_convert(); $output = $IDN->decode($input); $output = utf8_decode($output); if ($output == FALSE) { return $input; } else { return $output; } }
function gen_user_als_list(&$tpl, &$sql, $user_id) { $domain_id = get_user_domain_id($sql, $user_id); $query = <<<SQL_QUERY select alias_id, alias_name, alias_status, alias_mount, alias_ip_id, url_forward from domain_aliasses where domain_id = ? order by alias_name SQL_QUERY; $rs = exec_query($sql, $query, array($domain_id)); if ($rs->RecordCount() == 0) { $tpl->assign(array('ALS_MSG' => tr('Alias list is empty!'), 'ALS_LIST' => '')); $tpl->parse('ALS_MESSAGE', 'als_message'); } else { $counter = 0; while (!$rs->EOF) { if ($counter % 2 == 0) { $tpl->assign('ITEM_CLASS', 'content'); } else { $tpl->assign('ITEM_CLASS', 'content2'); } list($als_action, $als_action_script) = gen_user_als_action($rs->fields['alias_id'], $rs->fields['alias_status']); list($als_forward, $als_forward_script) = gen_user_als_forward($rs->fields['alias_id'], $rs->fields['alias_status'], $rs->fields['url_forward']); $IDN = new idna_convert(); $alias_name = $IDN->decode($rs->fields['alias_name']); $alias_name = utf8_decode($alias_name); $tpl->assign(array('ALS_NAME' => $alias_name, 'ALS_MOUNT' => $rs->fields['alias_mount'], 'ALS_STATUS' => translate_dmn_status($rs->fields['alias_status']), 'ALS_FORWARD' => $als_forward, 'ALS_FWD_SCRIPT' => $als_forward_script, 'ALS_ACTION' => $als_action, 'ALS_ACTION_SCRIPT' => $als_action_script)); $tpl->parse('ALS_ITEM', '.als_item'); $rs->MoveNext(); $counter++; } $tpl->parse('ALS_LIST', 'als_list'); $tpl->assign('ALS_MESSAGE', ''); } }
$admin->print_error($MESSAGE['GENERIC_SECURITY_ACCESS'], $backlink); } $admin->print_header(); // Update id, anchor and target if (isset($_POST['menu_link'])) { $iTargetPageId = intval($admin->get_post('menu_link')); $iRedirectType = intval($admin->get_post('r_type')); $anchor = $admin->get_post('page_target'); $sTarget = $admin->get_post('target'); $extern = ''; if (isset($_POST['extern'])) { include_once WB_PATH . '/include/idna_convert/idna_convert.class.php'; $oIdn = new idna_convert(); $extern = $oIdn->encode($_POST['extern']); $extern = filter_var($extern, FILTER_VALIDATE_URL) === false ? '' : $extern; $extern = $oIdn->decode($extern); unset($oIdn); } else { $extern = ''; } $table_pages = TABLE_PREFIX . 'pages'; $sql = 'UPDATE `' . TABLE_PREFIX . 'pages` SET ' . '`target` = \'' . $database->escapeString($sTarget) . '\' ' . 'WHERE `page_id` = ' . $page_id; $database->query($sql); $sql = 'UPDATE `' . TABLE_PREFIX . 'mod_menu_link` SET ' . '`target_page_id` = ' . $iTargetPageId . ', ' . '`redirect_type` = ' . $iRedirectType . ', ' . '`anchor` = \'' . $database->escapeString($anchor) . '\', ' . '`extern` = \'' . $database->escapeString($extern) . '\' ' . 'WHERE `page_id` = ' . $page_id; $database->query($sql); } // Check if there is a database error, otherwise say successful if ($database->is_error()) { $admin->print_error($database->get_error(), $js_back); } else { $admin->print_success($MESSAGE['PAGES_SAVED'], $backlink);