Chainable.
public static newDocumentFile ( string $file, $contentType = null ) : phpQueryObject | QueryTemplatesSource | QueryTemplatesParse | QueryTemplatesSourceQuery | ||
$file | string | URLs allowed. See File wrapper page at php.net for more supported sources. |
리턴 | phpQueryObject | QueryTemplatesSource | QueryTemplatesParse | QueryTemplatesSourceQuery |
public function testFind() { $htmlPath = BASE_PATH . '/samples/zingmp3_playlist.html'; $document = phpQuery::newDocumentFile($htmlPath); $matches = $document->find('.item-song a.fn-name'); $item = pq($matches[0]); $this->assertEquals('http://mp3.zing.vn/bai-hat/The-First-Noel-David-Archuleta/IW6UUBOW.html', $item->attr('href'), "Attribute and Value are not found"); }
public function qihoo() { set_time_limit(0); import('Org.JAE.QueryList'); header("Content-type: text/html; charset=utf-8"); $page = 7600; $isend = false; while (true) { if ($isend) { break; } ob_end_flush(); echo $page . "<br/>"; flush(); $listurl = "http://wenda.haosou.com/chip/entanslist?pn=" . $page . "%0A&qid=1433735543"; $page++; $pagecontent = \phpQuery::newDocumentFile($listurl); $results = pq('li a')->find(); if (empty($results)) { continue; } foreach ($results as $result) { $url = pq($result)->attr('href'); $url = "http://wenda.haosou.com" . $url; $iscollect = D('ClCollect')->findUrl($url); if ($iscollect) { continue; } $content = \phpQuery::newDocumentFile($url); $title = pq('.js-ask-title')->text(); if (empty($title)) { continue; } $answer = pq('.resolved-cnt')->text(); $data['question_title'] = $title; $data['question_detail'] = $title; $data['published_uid'] = 0; $data['game_id'] = 7; $data['anonymous'] = 1; $data['is_recommend'] = 1; $Question = D('AsQuestion'); $question_id = $Question->addQuestion($data); if ($question_id) { $answer = trim($answer, "\r\n\t"); $adata['question_id'] = $question_id; $adata['answer_content'] = $answer; $adata['anonymous'] = 1; $answer_id = D('AsAnswer')->addAnswer($adata); D('AsQuestion')->saveCollectAnswer($question_id, $answer_id); $this->sendToBaidu($question_id); } $cdata['url'] = $url; $cdata['site'] = "360"; D('ClCollect')->addCollect($cdata); } } return; }
function search404($url) { phpQuery::newDocumentFile($url); $a_arr = pq("a"); $a_count = count($a_arr); for ($i = 0; $i < $a_count; $i++) { $url_arr[] = pq("a:eq({$i})")->attr("href"); } return $url_arr; }
public function actionIndex() { $url = 'http://android.myapp.com/myapp/detail.htm?apkName=me.thinknext.shufa'; phpQuery::newDocumentFile($url); $div = pq(".det-ins-num")->html(); $downloads = str_replace('下载', '', $div); if (preg_match("/^([1-9][0-9]*)\$/", $downloads) == true) { $ctime = date('Y-m-d H:i:s'); $sql = "insert into moyu_downloads(downloads,ctime) VALUES(" . $downloads . ",'" . $ctime . "')"; $command = Yii::app()->db->createCommand($sql); $command->execute(); } }
function run($page) { $destination = "http://www.ttpet.com/zixun/42/category-catid-42-{$page}.html"; echo 'Crawling ' . $destination . "\n"; phpQuery::newDocumentFile($destination); $articles = pq('#main_bg .zixunmain .p_lf .p_pad')->find('ul'); foreach ($articles as $article) { $m['title'] = pq($article)->find('dl dd a')->html(); $final[] = $m; } echo '=========== Page ===========> ' . $page . "\r\n"; print_r($final); }
function get_list($url) { $result = array(); $result["data"] = array(); $html = phpQuery::newDocumentFile($url); $lists = $html['ul.rstlist-info li .rstname-wrap strong']; foreach ($lists as $list) { $url = pq($list)->find('a')->attr('href'); $text = pq($list)->find('a')->text(); $result['data'][] = array("url" => $url, "name" => $text); } $next_url = $html['.next']->attr("href"); $result['paging'] = array("next" => $next_url); return $result; }
/** * 根据url获取skus * @param string $url * @return array */ private function _getIdsByUrl($url) { $ids = []; phpQuery::newDocumentFile($url); $a = pq('#plist .gl-item div[data-sku]'); foreach ($a as $value) { $sku = pq($value)->attr('data-sku'); if (strlen($sku) <= 7) { array_push($ids, trim($sku)); } if (count($ids) >= $this->count) { break; } } return $ids; }
public function getCategories($url, $excludes = []) { $categories = []; phpQuery::newDocumentFile($url); $title = pq('#J_selector [clstag="thirdtype|keycount|thirdtype|select"] b')->text(); $itemDOM = pq('#J_selector .sl-wrap'); foreach ($itemDOM as $value) { $key = trim(str_replace(':', '', pq($value)->find('.sl-key span')->text())); if (in_array($key, $excludes)) { continue; } if ($key !== '品牌') { $names = $this->getNames($value, 100); } else { $names = $this->getNames($value, 24); } array_push($categories, ['key' => $key, 'values' => $names]); } // dump($categories); return $categories; }
$p->dump(); print "\n"; $testName = 'WrapAll'; $testResult = 1; phpQuery::newDocumentFile('test.html')->find('p')->slice(1, 3)->wrapAll('<div class="wrapper">'); $result = pq('.wrapper'); if ($result->size() == $testResult) { print "Test '{$testName}' PASSED :)"; } else { print "Test '{$testName}' <strong>FAILED</strong> !!! "; } $result->dump(); print "\n"; $testName = 'WrapInner'; $testResult = 3; phpQuery::newDocumentFile('test.html')->find('li:first')->wrapInner('<div class="wrapper">'); $result = pq('.wrapper p'); if ($result->size() == $testResult) { print "Test '{$testName}' PASSED :)"; } else { print "Test '{$testName}' <strong>FAILED</strong> !!! "; } print $result->dump(); print "\n"; // TODO ! $testName = 'WrapAllTest'; /* $doc = phpQuery::newDocumentHTML('<div id="myDiv"></div>'); $doc['#myDiv']->append('hors paragraphe<p>Test</p>hors paragraphe') ->contents() ->not('[nodeType=1]')
function get_Author($url) { $productPage = phpQuery::newDocumentFile($url); return trim(str_replace('(Author)', '', text_prepare(pq($productPage)->find('#byline .author .a-popover-preload .a-size-medium')->text()))); }
//)); //function v87shs79d8fhs9d($html) { // $title = phpQuery::newDocument($html)->find('title'); // $testName = 'Simple AJAX'; // if ( strpos(strtolower($title->html()), 'wikipedia') !== false ) // print "Test '$testName' PASSED :)"; // else { // print "Test '$testName' <strong>FAILED</strong> !!! "; // print "<pre>"; // print_r($title->whois()); // print "</pre>\n"; // } // print "\n"; //} $testName = 'Load'; $test = phpQuery::newDocumentFile('test.html')->find('div:first')->load('http://wikipedia.org/ div[lang]'); if (pq('div[lang]')->size()) { print "Test '{$testName}' PASSED :)"; } else { print "Test '{$testName}' <strong>FAILED</strong> !!! "; print "<pre>"; print "</pre>\n"; } print "\n"; // http://code.google.com/p/phpquery/issues/detail?id=130 $pq = phpQuery::ajax(array('url' => 'http://' . $_SERVER['SERVER_NAME'] . preg_replace('@/[^/]+$@', '/test_ajax_data_1', $_SERVER['REQUEST_URI']), 'success' => 'a789fhasdui3124', 'error' => 'jhdbg786213u8dsfg7y')); function a789fhasdui3124($html) { $testName = 'AJAX request text node'; if ($html == 'hello world') { print "Test '{$testName}' PASSED :)";
} print_r($result->whois()); print "\n"; $testName = 'Filter with multiplie selectors'; $testResult = array('p.body'); $testDOM = phpQuery::newDocumentFile('test.html'); $single = $testDOM->find('p')->filter('.body')->add($testDOM->find('p')->filter('.title')); $double = $testDOM->find('p')->filter('.body, .title'); if ($single->length == count($double)) { print "Test '{$testName}' PASSED :)"; } else { print "Test '{$testName}' <strong>FAILED</strong> !!! "; } print "\n"; print_r($single->whois()); print "\n"; print_r($double->whois()); print "\n"; $testName = 'Attributes in HTML element'; $validResult = 'testValue'; $result = phpQuery::newDocumentFile('test.html')->find('html')->empty()->attr('test', $validResult); $result = phpQuery::newDocument($result->htmlOuter())->find('html')->attr('test'); //similar_text($result->htmlOuter(), $validResult, $similarity); if ($result == $validResult) { print "Test '{$testName}' PASSED :)"; } else { print "Test '{$testName}' <strong>FAILED</strong> !!! "; print "<pre>"; print $result; print "</pre>\n"; }
<?php require_once '../phpQuery/phpQuery.php'; phpQuery::$debug = true; $testName = 'ReplaceWith'; phpQuery::newDocumentFile('test.html')->find('p:eq(1)')->replaceWith("<p class='newTitle'>\n this is example title\n </p>"); $result = pq('p:eq(1)'); if ($result->hasClass('newTitle')) { print "Test '{$testName}' PASSED :)"; } else { print "Test '{$testName}' <strong>FAILED</strong> !!! "; } $result->dump(); print "\n"; $testName = 'ReplaceAll'; $testResult = 3; phpQuery::newDocumentFile('test.html'); pq('<div class="replacer">')->replaceAll('li:first p'); $result = pq('.replacer'); if ($result->size() == $testResult) { print "Test '{$testName}' PASSED :)"; } else { print "Test '{$testName}' <strong>FAILED</strong> !!! "; } $result->dump(); print "\n";
print "\n"; // CLONE $testName = 'Clone'; $testResult = 3; $document; $p = phpQuery::newDocumentFile('test.html')->toReference($document)->find('p:first'); foreach (array(0, 1, 2) as $i) { $p->clone()->addClass("clone-test")->addClass("class-{$i}")->insertBefore($p); } if (pq('.clone-test')->size() == $testResult) { print "Test '{$testName}' PASSED :)"; } else { print "Test '{$testName}' <strong>FAILED</strong> !!! "; } print "\n"; // SIBLINGS $testName = 'Next'; $testResult = 3; $document; $result = phpQuery::newDocumentFile('test.html')->find('li:first')->next()->next()->prev()->is('#testID'); if ($result) { print "Test '{$testName}' PASSED :)"; } else { print "Test '{$testName}' <strong>FAILED</strong> !!! "; } print "\n"; ?> <?php die;
error_reporting(0); header("Content-Type: text/html;charset=utf-8"); if (!isset($_GET['word'])) { header('Location: /'); } elseif ($_GET['word'] == '') { header('Location: /'); } else { $word = $_GET['word']; } if (isset($_GET['page'])) { $page = $_GET['page']; } else { $page = 1; } include './includes/phpQuery.php'; phpQuery::newDocumentFile(get_base() . 'fetch.php?word=' . urlencode($word) . '&page=' . $page); $list = pq('.g'); foreach ($list as $li) { $data['name'][] = pq($li)->find('.r')->find('a')->html(); $data['_url'][] = pq($li)->find('.r')->find('a')->attr('href'); $data['desc'][] = pq($li)->find('.s')->find('.st')->html(); } $data['num'][0] = pq('#resultStats')->html(); $data['num'][1] = get_number($data['num'][0]); foreach ($data['_url'] as $key => $value) { $data['url'][] = get_true_url($data['_url'][$key]); } foreach ($data['url'] as $key => $value) { if (substr($value, 0, 1) == '/') { unset($data['url'][$key], $data['name'][$key], $data['desc'][$key]); }
require APP . 'et/phpQuery/phpQuery.php'; echo "开始获取队列\n"; $table_name = 'spider_ecshop_url'; $contentModel = new ContentModel(); $configs = array('need_push' => 'yes'); $url_list = $contentModel->getUrlList($configs, 'LIMIT 15000 ', $table_name); $url_count = count($url_list); echo "获取到{$url_count}条要采集的内容... \n"; if (!empty($url_list)) { // $url_info = get_line(prepare('select * from task_list where id=?i limit 1', array($ko))); foreach ($url_list as $v) { if ($v['url']) { /** * 获取单个产品内容 */ phpQuery::newDocumentFile($v['url']); $goods_id = intval(pq('input[name="id"]')->attr('value')); // 说明源id大于47900是无水印的 http://www.tomdurrie.com/search.php?page=380 前判读吧.. // 63767 后面开始进行第二次采集 if ($goods_id > 63767) { // 删除旧产品数据和相册数据 delete('ecs_goods', array('goods_id' => $goods_id)); delete('ecs_goods_gallery', array('goods_id' => $goods_id)); $cat_name = trim(pq('#ur_here>.f_l>a:eq(1)')->html()); $brand_name = trim(pq('.props>dl:eq(1)>dd')->html()); $price_tmp = trim(pq('#ECS_SHOPPRICE')->html()); if (preg_match('(\\d+)', $price_tmp, $match)) { $price = $match[0]; } else { $price = 0; }
echo "\ntime less then 2015-01-01,so it stoped!\ncurrent:" . pq($value)->find('.c-title')->text() . date('Y-m-d H:i:s', $publishTime_unix) . "\n"; $continue = false; break; } if ($publishTime_unix <= strtotime('2015-12-31 23:59:59')) { $title = trim(pq($value)->find('.c-title')->text()); $str[] = $title; $str[] = trim($authorName); $showstr = implode("\t", $str) . "\n"; echo $showstr; file_put_contents($word . '.xls', $showstr, FILE_APPEND); } } foreach ($moreLink as $mk => $mv) { $showstr = ""; phpQuery::newDocumentFile('http://news.baidu.com' . pq($mv)->attr('href')); $l = pq("#content_left .result"); echo "总共:", count($l), "\n"; foreach ($l as $lk => $lv) { $str = array(); $title = pq($lv)->find('.c-title')->text(); $author = pq($lv)->find('.c-title-author')->text(); list($authorDate, $hour) = explode(' ', $author); list($authorName, $publishTime) = explode("201", $authorDate); $str[] = $title; $str[] = trim($authorName); $showstr = implode("\t", $str) . "\n"; echo $showstr; file_put_contents($word . '.xls', $showstr, FILE_APPEND); } }
//echo $url; try { @phpQuery::newDocumentFile($url); } catch (Exception $e) { echo 'have waring ' . $e; exit; } $get_arr = pq('.result.c-result.c-clk-recommend a:first-child'); foreach ($get_arr as $gKey => $gVal) { $get_Urls[] = pq($gVal)->attr('href'); } $get_Urls = array_unique($get_Urls); //exit; foreach ($get_Urls as $key => $val) { try { @phpQuery::newDocumentFile($val); } catch (Exception $e) { echo 'have waring ' . $e; exit; } // echo $val.'<br />'; $e_url = pq('script')->html(); @($ul_arr = explode('"', $e_url)); @($is_url[] = $ul_arr[1]); echo @$ul_arr[1]; if (@$ul_arr[1]) { echo '<br />'; } } } $pn += 10;
set_time_limit(0); // 定义应用目录 define('APP', dirname(dirname(__FILE__)) . DIRECTORY_SEPARATOR); // 载入框架引导文件 require APP . 'system/_shell.php'; require APP . 'funcs/spider.fn.php'; require APP . 'models/TaskModel.php'; require APP . 'et/phpQuery/phpQuery.php'; //获取链接列表 http://www.tomdurrie.com/search.php?page=380 $links = get_batch_link('http://www.tomdurrie.com/search.php?page=(*)', 1, 6, 1); if (!empty($links)) { foreach ($links as $target_url) { /** * 获取维美达链接列表 */ echo "正在获取链接{$target_url}下的产品链接\n"; phpQuery::newDocumentFile($target_url); $goods_list = pq('.hoverlist'); $lists_tmp = array(); foreach ($goods_list as $li) { $lists_tmp[] = array('url' => pq($li)->find('a')->attr('href'), 'thumb_img_org' => pq($li)->find('img')->attr('src')); } // 探测链接失败 if (empty($lists_tmp)) { system("echo -e '探测链接列表失败: \\033[31m" . $target_url . "\\033[0m'"); $result_errr = insert_log($target_url, '探测链接列表失败'); } else { insert_ec_urls($lists_tmp, 0, true, 'spider_ecshop_url'); } } }
function cool() { Vendor('phpQuery.phpQuery'); \phpQuery::newDocumentFile('http://job.blueidea.com'); $companies = pq('#hotcoms .coms')->find('div'); foreach ($companies as $company) { echo pq($company)->find('h3 a')->text() . "<br>"; } }
<?php include 'Lib/phpQuery/phpQuery.php'; include 'Lib/Smarty/Smarty.class.php'; $keywords = trim($_POST['keywords']); $urls = trim($_POST['urls']); if (empty($keywords) & empty($urls)) { echo '请输入争取参数'; exit; } $keyword_arr = explode("\n", $keywords); $url_arr = explode("\n", $urls); foreach ($keyword_arr as $key) { $randa = rand(100, 999); $url = 'http://wap.baidu.com/s?word=' . $key . '&ts=8173515&t_kt=46&rsv_iqid=13192952292074186' . $randa . '&sa=ib&rsv_sug4=3785&inputT=1951&ss=100'; phpQuery::newDocumentFile($url); $get_arr = pq(".ec_site"); foreach ($get_arr as $gKey => $gVal) { $gValHtml = pq($gVal)->html(); if (empty($gValHtml)) { continue; } elseif (!is_array($gValHtml)) { $gValHtmls[] = $gValHtml; } else { $gValHtmls = $gValHtml; } foreach ($url_arr as $uKey => $urlone) { if (in_array($urlone, $gValHtmls)) { // $sql = "INSERT INTO `mobile_baidu` (keyword,url,rank,`timestamp`) VALUES ($key,$gValHtml,$gKey+1,now())"; $rankVal['keyword'] = $key; $rankVal['url'] = $gValHtml;
$invalid_keys = ["HDOM", "DEFA", "MAX_"]; $function_list = get_defined_functions()["internal"]; sort($function_list); if (!isset($_GET['id'])) { $id = rand(0, sizeof($function_list)); } else { $id = $_GET['id']; } $file = fopen("php_functions.csv", "w"); $fields = []; error_reporting(E_ALL & ~E_WARNING); //sizeof($function_list) for ($i = 0; $i < 3; $i++) { $string = ""; $function_url = "http://php.net/manual/en/function." . str_replace("_", "-", $function_list[$i]) . ".php"; $html = phpQuery::newDocumentFile($function_url); if (strpos(error_get_last()["message"], "404 Not Found") == false) { $num_of_functions++; echo "Fetching {$function_url}... \n"; $name = $function_list[$i]; foreach (get_defined_constants(true)["user"] as $key => $value) { if (!in_array(substr($key, 0, 4), $invalid_keys)) { foreach (pq(constant($key)) as $element) { $string = $string . process_string($element->textContent, $name); } } } $info = $string; echo "<span style='font-weight:bold'>{$function_list[$id]}</span> <BR> {$info}"; fwrite($file, "\"{$name}\", \"{$info}\" \n"); } else {
// CALLBACKS class callbackClass { static function staticMethodCallback($node) { pq($node)->addClass('newClass'); } function methodCallback($node) { pq($node)->addClass('newClass'); } } function functionCallback($node) { pq($node)->addClass('newClass'); } $testResult = array('li.newClass', 'li#testID.newClass', 'li.newClass', 'li#i_have_nested_list.newClass', 'li.nested.newClass', 'li.second.newClass'); $tests = array('functionCallback', array('callbackClass', 'staticMethodCallback'), array(new callbackClass(), 'methodCallback')); foreach ($tests as $test) { $result = phpQuery::newDocumentFile('test.html')->find('li')->each($test); $testName = is_array($test) ? $test[1] : $test; if ($result->whois() == $testResult) { print "Test '{$testName}' PASSED :)"; } else { print "Test '{$testName}' <strong>FAILED</strong> !!! "; print "<pre>"; print_r($result->whois()); print "</pre>\n"; } print "\n"; }
function filter_cityUrl($link_provice) { phpQuery::newDocumentFile($link_provice); $l = pq(".wy_state_topNav")->find('div')->eq(9)->find('a')->eq(2)->attr("href"); phpQuery::newDocumentFile($l); //下层景点链接 $list = pq(".wy_state_model ")->find('dd'); // echo $list."nihai"; $a = array(); foreach ($list as $li) { $link = pq($li)->find('a:first')->attr("href"); array_push($a, $link); } // print_r($a) ; echo "--------get cityUrl success-----------" . PHP_EOL; return $a; }
public function crawlAction() { $destination = $this->get('destination', FALSE); if ($destination) { include LIB_PATH . '/phpQuery/phpQuery.php'; phpQuery::newDocumentFile($destination); $articles = pq('#main_bg .zixunmain .p_lf .p_pad')->find('ul'); foreach ($articles as $article) { $m['title'] = pq($article)->find('dl dd a')->html(); $m['title'] = addslashes($m['title']); $m['img'] = pq($article)->find('dl dt a img')->attr('src'); $final[] = $m; } $buffer['articles'] = $final; } $this->getView()->assign($buffer); }
public function crawlAction() { $destination = $this->get('destination', FALSE); if ($destination) { Yaf_Loader::import(LIB_PATH . '/phpQuery/phpQuery.php'); phpQuery::newDocumentFile($destination); $articles = pq('.main-content .chief .mod-focus .focus')->find('ul li'); foreach ($articles as $article) { $m['img'] = pq($article)->find('a img')->attr('src'); $m['title'] = pq($article)->find('a img')->attr('alt'); $final[] = $m; } $buffer['articles'] = $final; } $this->getView()->assign($buffer); }
<?php /*定义字段*/ $site = $_GET['site']; $keyWords = $_GET['keyWords']; //$pn=0; //$searchEngine['谷歌']=('http://www.google.com.hk/search?newwindow=1&safe=strict&site=&source=hp&q=site:'); require 'phpQuery/phpQuery.php'; //echo '<br />'.$site.'<br />'; $word = split(' ', $keyWords); $continue = 1; set_time_limit(0); foreach ($word as $key => $value) { for ($bd = 0; $bd < 100; $bd += 10) { $searchEngine['百度'] = 'http://www.baidu.com/s?pn=' . $bd . '&wd=' . $value; //实例化phpQuery phpQuery::newDocumentFile($searchEngine['百度']); if ($order = pq("table:contains({$site})")->attr("id")) { echo $value . ' => ' . $order . '<br />'; $continue = 0; break; } else { $continue = 1; } } if ($continue == 1) { echo $value . ' => 100+<br />'; } } /*for($start=0;$start<200;$start+=10){ $searchEngine['谷歌']=('http://www.google.com.hk/search?newwindow=1&safe=strict&site=&source=hp&q='.$keyWords.'&start='.$start); //实例化phpQuery
$postPage = isset($_GET['pagenum']) ? $_GET['pagenum'] : 'noPage'; $callback = isset($_GET['callback']) ? $_GET['callback'] : 'callback'; if ($postKey === 'noKey' || $postKey == '') { customJsonRes('204', '没有postkey或为空', 'null'); } else { $key = urlencode($postKey); //urlencode(iconv('GBK', 'UTF-8', '前端'));//encodeURIComponent('前端'); //Detected an illegal character utf-8 //调试 //echo '关键字:'.$key.'搜索类型:'.$postType.'展示第'.$postPage.'页'; switch ($postType) { case 1: $searchUrl = 'http://weixin.sogou.com/weixin?query=' . $key . '&type=2&page=' . $postPage . '&ie=utf8'; //echo $searchUrl; echo '<html><head><meta http-equiv="Content-Type" content="text/html; charset=utf-8"><meta http-equiv="X-UA-Compatible" content="IE=Edge"/><link rel="stylesheet" href="../bootstrap/css/bootstrap.min.css"><link rel="stylesheet" href="../bootstrap/css/bootstrap-theme.min.css"><link rel="stylesheet" type="text/css" href="../test/cssjs/default.css"><title>' . $key . '的微信公众号文章 - by HighSea</title><!--<script src="../test/cssjs/copy.js">--></script><script type="text/javascript" charset="gbk" src="http://cdn.bootcss.com/jquery/1.11.1/jquery.min.js"></script></head><body><div class="container"><div class="row"><p class="bg-success">以下内容来自微信公众平台</p></div>'; phpQuery::newDocumentFile($searchUrl); $artlist = pq(".results"); //echo $artlist; foreach ($artlist as $li) { $description = pq($li)->find('.wx-rb'); echo '<div class="row alltitle">' . $description->find('.txt-box h4')->html() . '</div>'; echo '<div class="row">' . $description->find('.img_box2 img')->attr('src'); } echo "</div></body></html>"; break; case 2: $searchUrl = 'http://www.liepin.com/zhaopin/?searchField=1&key=' . $key . '&industries=&jobTitles=&dqs=070020&compscale=&compkind=&pubTime=&salary=&searchType=1&clean_condition=&jobKind=&curPage=' . $postPage; echo "开发中"; break; case 3: echo "开发中";
public function startcollection() { //参数 //采集ID $cid = I('get.cid'); //采集类型 0 采集所有 1 按页数采集 $ctype = I('get.ctype'); //开始页数 $sp = I('get.sp', 1, 'intval'); //停止页数 $tp = I('get.tp'); //当前采集的页数 $page = I('get.page', 1, 'intval'); //按页数采集 $blagStop = false; if ($ctype == 1) { if ($tp && $tp <= $page) { $blagStop = TRUE; } } //开始采集 //获取采集规则详情 $PlgCollection = D('PlgCollection'); $colInfo = $PlgCollection->find($cid); $listRule = json_decode($colInfo['listrule']); $listurl = str_replace('{$page}', $page, $listRule->listurl); $urlInfo = parse_url($listurl); //解析列表 Vendor('phpQuery.phpQuery', '', '.class.php'); \phpQuery::newDocumentFile($listurl); $artlist = pq($listRule->listobj); foreach ($artlist as $li) { //获取详情页地址 $pageurl = pq($li)->attr($listRule->listattr); if (strpos('http', $pageurl) === FALSE) { $pageurl = $urlInfo['scheme'] . '://' . $urlInfo['host'] . $pageurl; } //采集内容 \phpQuery::newDocumentFile($pageurl); $cateArr = explode(',', $colInfo['cate']); $mid = $cateArr[1]; //模型 $ModelField = DD('ModelField'); $fieldlist = $ModelField->selFieldByMid($mid); //模型中所有字段 $pagerule = json_decode($colInfo['pagerule'], true); foreach ($fieldlist as $key => $f) { if (isset($pagerule[$f['fieldname']])) { $rulePageJson = $pagerule[$f['fieldname']]; $rulePageObj = json_decode($rulePageJson); if ($rulePageObj->type == 0) { $artlist = pq($rulePageObj->obj); dump($this->_TxtToUtf8(I('post.langcode'), $artlist->text())); die; } } } } if ($blagStop === true) { //停止采集 } else { //跳转到下一页开始采集 $param = array('cid' => $cid, 'ctype' => $ctype, 'sp' => $sp, 'tp' => $tp, 'page' => $page + 1); //$this->redirect('Collection/Admin/startcollection', $param); } $this->display(); }
$testName = 'Append and move'; $result = phpQuery::newDocumentFile('test.html'); $li = $result->find('li:first'); $result->find('div')->_empty(); $li->html('test1-é-test1')->append('test2-é-test2')->appendTo($result->find('div:first')); $result = $result->find('div:first li:first'); $expected = 'test1-é-test1test2-é-test2'; if (trim(str_replace("\n", '', $result->html())) == $expected) { print "Test '{$testName}' passed :)<br />\n"; } else { print "Test '{$testName}' <strong>FAILED</strong> !!!<br />\n"; print "'" . trim($result->html()) . "'"; } print "\n"; $testName = 'Attr charset'; $result = phpQuery::newDocumentFile('test.html')->find('li:first')->attr('test', 'foo é żźć bar'); if (trim($result->attr('test')) == 'foo é żźć bar') { print "Test '{$testName}' passed :)<br />\n"; } else { print "Test '{$testName}' <strong>FAILED</strong> !!!<br />\n"; print $result->attr('test'); } print "\n"; //$testName = 'Loading document without meta charset'; //$result = phpQuery::newDocumentFile('test.html') // ->_empty(); ////var_dump((string)$result->htmlOuter()); //$result = phpQuery::newDocument($result->htmlOuter()); //$validResult = <<<EOF //<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd"> //<html xmlns="http://www.w3.org/1999/xhtml" xmlns="http://www.w3.org/1999/xhtml"><head><meta http-equiv="Content-Type" content="text/html;charset=UTF-8" /></head></html>