function get_name($pic_arr) { //图片类型 $pattern_type = '/(/.(jpg|bmp|jpeg|gif|png))/'; foreach ($pic_arr as $pic_item) { //循环取出每幅图的地址 $num = preg_match_all($pattern_type, $pic_item, $match_type); $pic_name = ''; //图片名称 //以流的形式保存图片 $write_fd = @fopen($pic_name, "wb"); @fwrite($write_fd, CurlGet($pic_item)); @fclose($write_fd); echo "[OK]..!"; } return 0; }
function peer_status($aspid = false) { if ($GLOBALS["VERBOSE"]) { echo "peer_status();\n"; } $unix = new unix(); $cacheFile = "/usr/share/artica-postfix/ressources/logs/web/squid.peers.db"; if (!is_dir(dirname($cacheFile))) { @mkdir(dirname($cacheFile), 0755, true); } if (!$GLOBALS["DUMP"]) { if (!$GLOBALS["VERBOSE"]) { $unix = new unix(); if (!$aspid) { $pidfile = "/etc/artica-postfix/pids/" . basename(__FILE__) . "." . __FUNCTION__ . ".pid"; $pid = $unix->get_pid_from_file($pidfile); if ($unix->process_exists($pid, basename(__FILE__))) { $time = $unix->PROCCESS_TIME_MIN($pid); if ($GLOBALS["OUTPUT"]) { echo "Stopping......: " . date("H:i:s") . "Already `task` running PID {$pid} since {$time}mn\n"; } return; } @file_put_contents($pidfile, getmypid()); } } } $timefile = $unix->file_time_min($cacheFile); if ($GLOBALS["VERBOSE"]) { echo basename($cacheFile) . " {$timefile}mn\n"; } if (!$GLOBALS["DUMP"]) { if (!$GLOBALS["FORCE"]) { if (!$GLOBALS["VERBOSE"]) { if ($timefile < 5) { return; } } } } if (!is_peer()) { if ($GLOBALS["DUMP"]) { echo "No cache_peer\n"; return; } if ($GLOBALS["VERBOSE"]) { echo "No cache_peer...\n"; } return; } $sock = new sockets(); $datas = trim(CurlGet("server_list")); if ($GLOBALS["DUMP"]) { echo $datas . "\n"; return; } if ($datas == null) { $GLOBALS["RECONFIGURE_COUNT"] = $GLOBALS["RECONFIGURE_COUNT"] + 1; SendLogs("No results for peer, reloading the server reconfigured {{$GLOBALS["RECONFIGURE_COUNT"]}} times"); $unix = new unix(); squid_admin_mysql(1, "Reconfiguring proxy service\n", null, __FILE__, __LINE__); reload_squid(true); $php5 = $unix->LOCATE_PHP5_BIN(); shell_exec("{$php5} " . __FILE__ . " --peer-status --reconfigure-count={$GLOBALS["RECONFIGURE_COUNT"]}"); return; } $tr = explode("\n", CurlGet("server_list")); while (list($num, $val) = each($tr)) { if ($GLOBALS["VERBOSE"]) { echo "Found: \"{$val}\"\n"; } if (preg_match("#Parent\\s+:(.+)#", $val, $re)) { $peer = trim($re[1]); continue; } if (preg_match("#(.+?)\\s+:(.*)#", $val, $re)) { $key = strtoupper(trim($re[1])); $array[$peer][$key] = trim($re[2]); } } if ($GLOBALS["VERBOSE"]) { echo count($array) . " peers detected\n"; } @unlink($cacheFile); @file_put_contents($cacheFile, serialize($array)); @chmod($cacheFile, 0777); }
/** * 抓取文章 */ function CrawlView() { if (count($this->listNotCrawl) < 1) { //全部抓取完毕,不再抓取 //$this->isUp=FALSE; return; } if ($this->errorTime > 6) { //错误超过6次,跳过 array_splice($this->listNotCrawl, 0, 1); $this->errorTime = 0; $this->isUp = TRUE; return; } $articleId = $this->listNotCrawl[0]; $data = CurlGet('http://zhidao.baidu.com/question/' . $articleId . '.html'); $data = iconv("GBK", "UTF-8", $data); //标题 $title = ''; if (preg_match('%<title>([^>]+?)_%sim', $data, $arr)) { $title = strip_tags($arr[1]); } if ($title == '') { $this->isUp = TRUE; $this->errorTime += 1; return; } /*描述 $question_description=''; if(preg_match('%<pre id="question-content">([^<]+)</pre>%sim',$data,$arr)){ $question_description=trim($arr[1]); }*/ //最佳答案 $answers = ''; if (preg_match('%<pre id="best-answer-content[^>]+?>([^<]+?)</pre>%sim', $data, $arr)) { $answers[] = str_replace(array('\\r', '\\n', '\\r\\n'), '<br>', strip_tags(trim($arr[1]))); } //其他答案 if (preg_match_all('%<pre class="reply[^>]+?>([^<]+?)</pre>%sim', $data, $arr)) { foreach ($arr[1] as $r) { $answers[] = str_replace(array('\\r', '\\n', '\\r\\n'), '<br>', strip_tags(trim($r))); } } $data = null; $arr = null; unset($data, $arr); $arrBody = explode('<br>', $this->body); $arrBody = array_merge($arrBody, $answers); if (count($arrBody) <= 1) { $body = $arrBody[0]; } else { shuffle($arrBody); $body = implode('<br>', $arrBody); } $body = str_replace($title, '', $body); $arrBody = explode('<br>', $body); shuffle($arrBody); $body = implode('<br>', $arrBody); if ($this->title == '') { $this->title = $title; } $this->errorTime = 0; if (strlen($body) > 20000) { //body达到20K,不再抓取。 $body = substr_utf8($body, 0, 20000); $this->body = $body; $body = null; $this->listNotCrawl = array(); $this->isUp = TRUE; return; } $this->body = $body; $body = null; array_splice($this->listNotCrawl, 0, 1); //去掉成功的文章 $this->isUp = TRUE; }
function counters($aspid = false) { $cacheFile = "/usr/share/artica-postfix/ressources/logs/web/squid.counters.db"; if (!is_dir(dirname($cacheFile))) { @mkdir(dirname($cacheFile), 0755, true); } $unix = new unix(); if (!$GLOBALS["VERBOSE"]) { if (!$aspid) { $pidfile = "/etc/artica-postfix/pids/" . basename(__FILE__) . "." . __FUNCTION__ . ".pid"; $pid = $unix->get_pid_from_file($pidfile); if ($unix->process_exists($pid, basename(__FILE__))) { $time = $unix->PROCCESS_TIME_MIN($pid); if ($GLOBALS["OUTPUT"]) { echo "Stopping......: " . date("H:i:s") . "Already `task` running PID {$pid} since {$time}mn\n"; } return; } @file_put_contents($pidfile, getmypid()); } } $timefile = $unix->file_time_min($cacheFile); if ($GLOBALS["VERBOSE"]) { echo basename($cacheFile) . " {$timefile}mn\n"; } if (!$GLOBALS["FORCE"]) { if ($timefile < 5) { if (!$GLOBALS["VERBOSE"]) { return; } } } $sock = new sockets(); $datas = explode("\n", CurlGet("5min")); while (list($num, $ligne) = each($datas)) { $ligne = trim($ligne); if (!preg_match("#(.+?)=(.+)#", $ligne, $re)) { continue; } $ARRAY[trim($re[1])] = trim($re[2]); } $datas = explode("\n", CurlGet("active_requests")); @file_put_contents("/var/log/squid/monitor.sessions.cache", serialize($datas)); $c = 0; while (list($num, $ligne) = each($datas)) { $ligne = trim($ligne); if (!preg_match("#Connection:\\s+(.+)#", $ligne, $re)) { continue; } if (trim($re[1]) == "close") { continue; } $c++; } $ARRAY["active_requests"] = $c; $ARRAY["SAVETIME"] = time(); @unlink($cacheFile); @file_put_contents($cacheFile, serialize($ARRAY)); @chmod($cacheFile, 0775); peer_status(true); }