function getcontent_zh($keyword) { // 初始化一个 cURL 对象 $curl = curl_init(); // 设置你需要抓取的URL curl_setopt($curl, CURLOPT_URL, 'http://zh.wikipedia.org/wiki/' . urlencode($keyword)); // 设置header curl_setopt($curl, CURLOPT_HEADER, 0); curl_setopt($curl, CURLOPT_USERAGENT, "Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US; rv:1.8.1.1) Gecko/20061204 Firefox/2.0.0.1"); // 设置cURL 参数,要求结果保存到字符串中还是输出到屏幕上。 curl_setopt($curl, CURLOPT_RETURNTRANSFER, 1); //使用自动跳转 curl_setopt($curl, CURLOPT_AUTOREFERER, 1); // 设置cURL 参数,时间超时 curl_setopt($curl, CURLOPT_TIMEOUT, 4); // 运行cURL,请求网页 $data = curl_exec($curl); // 关闭URL请求 curl_close($curl); if ($data == false) { $go = new Trans(); $keyword = $go->c2t($keyword); $re = $this->getextracts_zh($keyword); if (preg_match("/REDIRECT/", trim($re))) { $index = strpos($re, ' '); $index2 = strpos($re, ' ', $index + 1); if ($index2 === false) { $keyword = substr($re, $index + 1); } else { $keyword = substr($re, $index + 1, $index2 - $index - 1); } $re = $this->getextracts_zh($keyword); } return $re; } $content = ''; while (true) { $index = strpos($data, '<p>'); if ($index === false) { break; } $index2 = strpos($data, '</p>', $index); if ($index2 === false) { break; } $tmp = substr($data, $index + 3, $index2 - $index - 3); $data = substr($data, $index2); $re = ''; while (strstr($tmp, '<')) { $index = strpos($tmp, '<'); $index2 = strpos($tmp, '>', $index); $tmp = substr($tmp, 0, $index) . substr($tmp, $index2 + 1); } $re = $tmp; if (strstr($re, '[') !== false) { $tmp = $re; $re = ''; while (strstr($tmp, '[')) { $index = strpos($tmp, '['); $index2 = strpos($tmp, ']'); if ($index == 0) { $tmp = trim(substr($tmp, $index2 + 1)); } else { $re .= trim(substr($tmp, 0, $index)); $tmp = substr($tmp, $index2 + 1); } } } // $re = str_replace(' ',"\n", $re); if (strpos($re, '坐标:') === 0) { continue; } else { $content .= $re . "\n"; } } return $content; }