Esempio n. 1
0
 function getcontent_zh($keyword)
 {
     // 初始化一个 cURL 对象
     $curl = curl_init();
     // 设置你需要抓取的URL
     curl_setopt($curl, CURLOPT_URL, 'http://zh.wikipedia.org/wiki/' . urlencode($keyword));
     // 设置header
     curl_setopt($curl, CURLOPT_HEADER, 0);
     curl_setopt($curl, CURLOPT_USERAGENT, "Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US; rv:1.8.1.1) Gecko/20061204 Firefox/2.0.0.1");
     // 设置cURL 参数,要求结果保存到字符串中还是输出到屏幕上。
     curl_setopt($curl, CURLOPT_RETURNTRANSFER, 1);
     //使用自动跳转
     curl_setopt($curl, CURLOPT_AUTOREFERER, 1);
     // 设置cURL 参数,时间超时
     curl_setopt($curl, CURLOPT_TIMEOUT, 4);
     // 运行cURL,请求网页
     $data = curl_exec($curl);
     // 关闭URL请求
     curl_close($curl);
     if ($data == false) {
         $go = new Trans();
         $keyword = $go->c2t($keyword);
         $re = $this->getextracts_zh($keyword);
         if (preg_match("/REDIRECT/", trim($re))) {
             $index = strpos($re, ' ');
             $index2 = strpos($re, ' ', $index + 1);
             if ($index2 === false) {
                 $keyword = substr($re, $index + 1);
             } else {
                 $keyword = substr($re, $index + 1, $index2 - $index - 1);
             }
             $re = $this->getextracts_zh($keyword);
         }
         return $re;
     }
     $content = '';
     while (true) {
         $index = strpos($data, '<p>');
         if ($index === false) {
             break;
         }
         $index2 = strpos($data, '</p>', $index);
         if ($index2 === false) {
             break;
         }
         $tmp = substr($data, $index + 3, $index2 - $index - 3);
         $data = substr($data, $index2);
         $re = '';
         while (strstr($tmp, '<')) {
             $index = strpos($tmp, '<');
             $index2 = strpos($tmp, '>', $index);
             $tmp = substr($tmp, 0, $index) . substr($tmp, $index2 + 1);
         }
         $re = $tmp;
         if (strstr($re, '[') !== false) {
             $tmp = $re;
             $re = '';
             while (strstr($tmp, '[')) {
                 $index = strpos($tmp, '[');
                 $index2 = strpos($tmp, ']');
                 if ($index == 0) {
                     $tmp = trim(substr($tmp, $index2 + 1));
                 } else {
                     $re .= trim(substr($tmp, 0, $index));
                     $tmp = substr($tmp, $index2 + 1);
                 }
             }
         }
         //	$re = str_replace(' ',"\n", $re);
         if (strpos($re, '坐标:') === 0) {
             continue;
         } else {
             $content .= $re . "\n";
         }
     }
     return $content;
 }