<?php require_once "entry.php"; $proxyModel = ModelUtil::GetProxyModel(); $proxyModel->GetRandomProxy($proxyIP, $proxyPORT); echo "IP=" . $proxyIP . "\n"; echo "PORT=" . $proxyPORT . "\n"; $model = ModelUtil::GetDouBanModel(); $config = FileUtil::GetGlobalConfig(); $lastPage = FileUtil::GetLastPage(); try { $model->GetTopicList(1, $proxyIP, $proxyPORT); $pageCount = $model->GetPageCount(); echo "pageCount=" . $pageCount . "\n"; for ($i = $lastPage; $i <= $pageCount; $i++) { echo "Page=" . $i . "\n"; FileUtil::SavePage($i); $proxyModel->GetRandomProxy($proxyIP, $proxyPORT); echo "IP=" . $proxyIP . "\n"; echo "PORT=" . $proxyPORT . "\n"; $topicList = $model->GetTopicList($i, $proxyIP, $proxyPORT); foreach ($topicList as $topicUrl) { echo "topicUrl=" . $topicUrl . "\n"; $cmd = $config['FRAMEWORK_DEFAULT']['PHP_PATH'] . " " . WEB_ROOT . "/per_topic.php " . $topicUrl . " " . $proxyIP . " " . $proxyPORT . " >/dev/null &"; exec($cmd); /* $proxyModel->GetRandomProxy($proxyIP,$proxyPORT); $imgList = $model->GetPhotoByTopic($topicUrl,$proxyIP,$proxyPORT);
public function GetPhotoByTopic($topicUrl, $ip, $port) { $data = $this->HttpRequestByProxy($topicUrl, $ip, $port); $this->mHtmlDom = new simple_html_dom(); $contentDom = $this->mHtmlDom->load($data); $linkReport = $contentDom->find("div[id=link-report]", 0); //没有加载到,可能是网络不通,或者ip被封 if (!isset($linkReport)) { //更换代理,重新尝试加载 $proxyModel = ModelUtil::GetProxyModel(); $proxyModel->GetRandomProxy($proxyIP, $proxyPORT); $data = $this->HttpRequestByProxy($topicUrl, $proxyIP, $proxyPORT); $contentDom = $this->mHtmlDom->load($data); $linkReport = $contentDom->find("div[id=link-report]", 0); //还是为空,则返回失败 if (!isset($linkReport)) { echo $topicUrl . " is forbidden\n"; return array(); } } $imgDoms = array(); if (isset($linkReport)) { $imgDoms = $linkReport->find("img"); } $photoList = array(); foreach ($imgDoms as $imgDom) { $imgUrl = $imgDom->src; $photoList[] = $imgUrl; } $this->mHtmlDom->clear(); return $photoList; }