function getrobotmeg($referurl, $sourcehtml) { //global $_SCONFIG; $searchcursory = array("/\\<(script|style|textarea)[^\\>]*?\\>.*?\\<\\/(\\1)\\>/si", "#<!--([\\s\\S]*?)-->#si", "/\\<!*(--|doctype|html|head|meta|link|body)[^\\>]*?\\>/si", "/<\\/(html|head|meta|link|body)\\>/si", "/([\r\n])\\s+/", '#<(table|div|p|span)[^>]*?display:\\s*none[^>]*?>.*?</(\\1)>#si', "/\\<(table|div)[^\\>]*?\\>/si", "/\\<\\/(table|div)\\>/si"); $replacecursory = array("", "", "", "", "\\1", '', "\n\n###table div explode###\n\n", "\n\n###table div explode###\n\n"); $searchaborative = array("/\\<(iframe)[^\\>]*?\\>.*?\\<\\/(\\1)\\>/si", "/\\<[\\/\\!]*?[^\\<\\>]*?\\>/si", "/\t/", "/[\r\n]+/", "/(^[\r\n]|[\r\n]\$)+/", "/&(quot|#34);/i", "/&(amp|#38);/i", "/&(lt|#60);/i", "/&(gt|#62);/i", "/&(nbsp|#160|\t);/i", "/&(iexcl|#161);/i", "/&(cent|#162);/i", "/&(pound|#163);/i", "/&(copy|#169);/i", "/&#(\\d+);/e"); $replaceaborative = array("", "", "", "\n", "", "\"", "&", "<", ">", " ", chr(161), chr(162), chr(163), chr(169), "chr(\\1)"); $arrayrobotmeg = array(); $sourcetext = getimageurl($referurl, preg_replace($searchcursory, $replacecursory, $sourcehtml)); $arraysource = explode("\n\n###table div explode###\n\n", $sourcetext); //mb_regex_encoding('utf-8'); $arraycell = array(); foreach ($arraysource as $value) { $cell = array('code' => $value, 'text' => preg_replace("/[\n\r\\s]*?/is", "", preg_replace($searchaborative, $replaceaborative, $value)), 'pr' => 0, 'title' => '', 'process' => ''); if ($cell['text'] != '') { $arraycell[] = getpr($cell, $searchaborative, $replaceaborative); } } $arraysubject = $arraymessage = array(); $leachsubject = $leachmessage = ''; foreach ($arraycell as $value) { if ($value['title'] == 'title') { $arraysubject[] = $value; } elseif ($value['pr'] >= 0) { $arraymessage[] = $value['code']; } } $pr = ''; foreach ($arraysubject as $value) { if ($pr < $value['pr'] || empty($pr)) { $leachsubject = $value['text']; } $pr = $value['pr']; } $leachmessage = preg_replace("/\\<(p|br)[^\\>]*?\\>/si", "\n", implode("\n", $arraymessage)); $arraymessage = explode("\n", preg_replace($searchaborative, $replaceaborative, $leachmessage)); $leachmessage = ''; foreach ($arraymessage as $value) { $value = preg_replace('/^(\\s| |\\x{3000})+/u', '', $value); if (trim($value) != '') { $leachmessage .= "<p style=\"text-indent:2em\">" . trim($value) . "</p>"; } } $arrayrobotmeg['leachsubject'] = $leachsubject; $arrayrobotmeg['leachmessage'] = $leachmessage; return $leachmessage; }
function getrobotmeg($referurl, $robotlevel = 2) { global $_SCONFIG; $searchcursory = array("/\\<(script|style|textarea)[^\\>]*?\\>.*?\\<\\/(\\1)\\>/si", "/\\<!*(--|doctype|html|head|meta|link|body)[^\\>]*?\\>/si", "/<\\/(html|head|meta|link|body)\\>/si", "/([\r\n])\\s+/", "/\\<(table|div)[^\\>]*?\\>/si", "/\\<\\/(table|div)\\>/si"); $replacecursory = array("", "", "", "\\1", "\n\n###table div explode###\n\n", "\n\n###table div explode###\n\n"); $searchaborative = array("/\\<(iframe)[^\\>]*?\\>.*?\\<\\/(\\1)\\>/si", "/\\<[\\/\\!]*?[^\\<\\>]*?\\>/si", "/\t/", "/[\r\n]+/", "/(^[\r\n]|[\r\n]\$)+/", "/&(quot|#34);/i", "/&(amp|#38);/i", "/&(lt|#60);/i", "/&(gt|#62);/i", "/&(nbsp|#160|\t);/i", "/&(iexcl|#161);/i", "/&(cent|#162);/i", "/&(pound|#163);/i", "/&(copy|#169);/i", "/&#(\\d+);/e"); $replaceaborative = array("", "", "", "\n", "", "\"", "&", "<", ">", " ", chr(161), chr(162), chr(163), chr(169), "chr(\\1)"); $arrayrobotmeg = array(); $sourcehtml = sreadfile($referurl, 'r', 1); //¶ÁÈ¡ÍøÒ³ $sourcecharset = postget('charset'); if (empty($sourcecharset) && $sourcecharset == '') { preg_match_all("/\\<meta[^\\<\\>]+charset=([^\\<\\>\"\\'\\s]+)[^\\<\\>]*\\>/i", $sourcehtml, $temp, PREG_SET_ORDER); $sourcecharset = isset($temp) && !empty($temp) ? trim(strtoupper($temp[0][1])) : $_SCONFIG['charset']; } $sourcehtml = encodeconvert($sourcecharset, $sourcehtml); $sourcetext = getimageurl($referurl, preg_replace($searchcursory, $replacecursory, $sourcehtml)); if ($robotlevel == 1) { $leachsubject = ''; preg_match_all("/\\<title[^\\>]*?\\>(.*)\\<\\/title\\>/is", $sourcetext, $temp, PREG_SET_ORDER); $leachsubject = $temp[0][1]; $sourcetext = preg_replace("/\n\n###table div explode###\n\n/", '', $sourcetext); $leachmessage = preg_replace("/[\r\n]+/", '<br />', preg_replace($searchaborative, $replaceaborative, $sourcetext)); } elseif ($robotlevel == 2) { $arraysource = explode("\n\n###table div explode###\n\n", $sourcetext); $arraycell = array(); foreach ($arraysource as $value) { $cell = array('code' => $value, 'text' => preg_replace("/[\n\r\\s]*?/is", "", preg_replace($searchaborative, $replaceaborative, $value)), 'pr' => 0, 'title' => '', 'process' => ''); if ($cell['text'] != '') { $arraycell[] = getpr($cell, $searchaborative, $replaceaborative); } } $arraysubject = $arraymessage = array(); $leachsubject = $leachmessage = ''; foreach ($arraycell as $value) { if ($value['title'] == 'title') { $arraysubject[] = $value; } elseif ($value['pr'] >= 0) { $arraymessage[] = $value['code']; } } $pr = ''; foreach ($arraysubject as $value) { if ($pr < $value['pr'] || empty($pr)) { $leachsubject = $value['text']; } $pr = $value['pr']; } $leachmessage = preg_replace("/\\<(p|br)[^\\>]*?\\>/si", "\n", implode("\n", $arraymessage)); $arraymessage = explode("\n", preg_replace($searchaborative, $replaceaborative, $leachmessage)); $leachmessage = ''; foreach ($arraymessage as $value) { if (trim($value) != '') { $leachmessage .= "<p>\t" . trim($value) . "</p>"; } } } $arrayrobotmeg['leachsubject'] = $leachsubject; $arrayrobotmeg['leachmessage'] = $leachmessage; $arrayrobotmeg['charset'] = $sourcecharset; return $arrayrobotmeg; }
function printbody($forecast) { function printdate($datetime) { return date('n月j日', $datetime) . ' ' . getday($datetime); } $date = getdatetime($forecast['date_y'], $forecast['fchh']); $images = getimageurl($forecast); $length = 7; $body = "<ul>\n<li>" . date('Y-m-d H时', $date) . "发布</li><br />\n"; if ($forecast['fchh'] == '18') { $date += 86400; $forecast = fixforecast($forecast); $length = 6; } for ($i = 1; $i < $length; $i++) { $body .= '<li>' . printdate($date) . "<br />"; $body .= '<img src="' . $images[$i * 2 - 1] . '" /><img src="' . $images[$i * 2] . "\" /><br />"; $body .= "{$forecast["weather{$i}"]} {$forecast["temp{$i}"]}<br />{$forecast["wind{$i}"]}</li><br />\n"; $date += 86400; } return $body; }