Ejemplo n.º 1
0
 function getPaterons($remote_html)
 {
     global $output;
     global $id;
     global $total;
     $last = True;
     // Create DOM from URL or file
     $html = str_get_html($remote_html);
     // Create a DOM object from a string
     // Find all links
     foreach ($html->find('a') as $element) {
         $link = $element->href;
         $name = $element->plaintext;
         if ($last == True) {
             $last = False;
         } else {
             $output .= "<br>";
             $output .= 'Name: ' . $name . '<br>';
             $output .= 'Profile: ' . $link . '<br>';
             $output .= "---------------------------";
             $output .= "<br>";
             $last = True;
         }
     }
     $total++;
     spider($id, $total);
 }
Ejemplo n.º 2
0
/**
 * 测试用主程序
 * @return  
 */
function main()
{
    var_dump(spider('http://movie.douban.com'));
    // $fp_puts = fopen('url.txt','ab'); // 记录URL列表
    // $fp_gets = fopen('url.txt','r'); // 保存URL列表
    // do {
    // 	$result_url_arr = spider($current_url);
    // 	if ($result_url_arr) {
    // 		foreach ($$result_url_arr as $url) {
    // 			fputs($fp_puts,$url.'\r\n');
    // 		}
    // 	}
    // }while ($current_url = fgets($fp_gets,1024));
}
Ejemplo n.º 3
0
             break;
         }
     }
     $ret = deleteFiles($assetsLCL . arg("target"));
     if (!$ret) {
         $err = $ERR_FILE_PERMISSION;
         break;
     }
     $ret = copyFiles($assetsLCL . arg("path"), $assetsLCL . arg("target"));
     if (!$ret) {
         $err = $ERR_FILE_PERMISSION;
         break;
     }
     break;
 case "spider":
     $ret = spider($assetsLCL . arg("path"), $assetsLCL);
     if (!$ret) {
         $err = $ERR_FILE_NOT_FOUND;
     }
     break;
 case "touch":
     if (!in_array(auth_get_class(), array("admin", "supervisor", "dirprod"))) {
         $err = $ERR_PERMISSION;
         break;
     }
     $ret = touch($assetsLCL . arg("path"));
     if (!$ret) {
         $err = $ERR_FILE_PERMISSION;
     }
     break;
     #case "rm":
Ejemplo n.º 4
0
    $header[] = "Cache-Control: no-cache";
    $header[] = "Accept: text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8";
    $header[] = "Accept-Encoding: gzip, deflate, sdch";
    $header[] = "Accept-Language: zh-CN,zh;q=0.8,en;q=0.6";
    $curlObj = curl_init();
    curl_setopt($curlObj, CURLOPT_USERAGENT, 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_10_1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/40.0.2214.93 Safari/537.36');
    // curl_setopt($curlObj, CURLOPT_HTTPHEADER, $header);//带上这个 header反而不对了??
    curl_setopt($curlObj, CURLOPT_URL, $url);
    curl_setopt($curlObj, CURLOPT_RETURNTRANSFER, 1);
    curl_setopt($curlObj, CURLOPT_PROXY, $proxy);
    curl_setopt($curlObj, CURLOPT_COOKIEJAR, $cookiefile);
    curl_setopt($curlObj, CURLOPT_COOKIEFILE, $cookiefile);
    curl_setopt($curlObj, CURLOPT_FOLLOWLOCATION, 1);
    //支持跳转 有才条 没有就不跳
    curl_setopt($curlObj, CURLOPT_TIMEOUT, 10);
    $outPut = curl_exec($curlObj);
    curl_close($curlObj);
    // echo $outPut;
    $filename = "./really_" . $page . ".html";
    // $filename = "./really_" . time() . ".html";
    file_put_contents($filename, $outPut);
    // sleep(5);//来个变态
    $i++;
    $page++;
    if ($i < 10) {
        $fun = __FUNCTION__;
        $fun();
    }
}
spider();