newDocumentFile() public static method

Chainable.
public static newDocumentFile ( string $file, $contentType = null ) : phpQueryObject | QueryTemplatesSource | QueryTemplatesParse | QueryTemplatesSourceQuery
$file string URLs allowed. See File wrapper page at php.net for more supported sources.
return phpQueryObject | QueryTemplatesSource | QueryTemplatesParse | QueryTemplatesSourceQuery
Example #1
0
 public function testFind()
 {
     $htmlPath = BASE_PATH . '/samples/zingmp3_playlist.html';
     $document = phpQuery::newDocumentFile($htmlPath);
     $matches = $document->find('.item-song a.fn-name');
     $item = pq($matches[0]);
     $this->assertEquals('http://mp3.zing.vn/bai-hat/The-First-Noel-David-Archuleta/IW6UUBOW.html', $item->attr('href'), "Attribute and Value are not found");
 }
 public function qihoo()
 {
     set_time_limit(0);
     import('Org.JAE.QueryList');
     header("Content-type: text/html; charset=utf-8");
     $page = 7600;
     $isend = false;
     while (true) {
         if ($isend) {
             break;
         }
         ob_end_flush();
         echo $page . "<br/>";
         flush();
         $listurl = "http://wenda.haosou.com/chip/entanslist?pn=" . $page . "%0A&qid=1433735543";
         $page++;
         $pagecontent = \phpQuery::newDocumentFile($listurl);
         $results = pq('li a')->find();
         if (empty($results)) {
             continue;
         }
         foreach ($results as $result) {
             $url = pq($result)->attr('href');
             $url = "http://wenda.haosou.com" . $url;
             $iscollect = D('ClCollect')->findUrl($url);
             if ($iscollect) {
                 continue;
             }
             $content = \phpQuery::newDocumentFile($url);
             $title = pq('.js-ask-title')->text();
             if (empty($title)) {
                 continue;
             }
             $answer = pq('.resolved-cnt')->text();
             $data['question_title'] = $title;
             $data['question_detail'] = $title;
             $data['published_uid'] = 0;
             $data['game_id'] = 7;
             $data['anonymous'] = 1;
             $data['is_recommend'] = 1;
             $Question = D('AsQuestion');
             $question_id = $Question->addQuestion($data);
             if ($question_id) {
                 $answer = trim($answer, "\r\n\t");
                 $adata['question_id'] = $question_id;
                 $adata['answer_content'] = $answer;
                 $adata['anonymous'] = 1;
                 $answer_id = D('AsAnswer')->addAnswer($adata);
                 D('AsQuestion')->saveCollectAnswer($question_id, $answer_id);
                 $this->sendToBaidu($question_id);
             }
             $cdata['url'] = $url;
             $cdata['site'] = "360";
             D('ClCollect')->addCollect($cdata);
         }
     }
     return;
 }
Example #3
0
function search404($url)
{
    phpQuery::newDocumentFile($url);
    $a_arr = pq("a");
    $a_count = count($a_arr);
    for ($i = 0; $i < $a_count; $i++) {
        $url_arr[] = pq("a:eq({$i})")->attr("href");
    }
    return $url_arr;
}
 public function actionIndex()
 {
     $url = 'http://android.myapp.com/myapp/detail.htm?apkName=me.thinknext.shufa';
     phpQuery::newDocumentFile($url);
     $div = pq(".det-ins-num")->html();
     $downloads = str_replace('下载', '', $div);
     if (preg_match("/^([1-9][0-9]*)\$/", $downloads) == true) {
         $ctime = date('Y-m-d H:i:s');
         $sql = "insert into moyu_downloads(downloads,ctime) VALUES(" . $downloads . ",'" . $ctime . "')";
         $command = Yii::app()->db->createCommand($sql);
         $command->execute();
     }
 }
Example #5
0
function run($page)
{
    $destination = "http://www.ttpet.com/zixun/42/category-catid-42-{$page}.html";
    echo 'Crawling ' . $destination . "\n";
    phpQuery::newDocumentFile($destination);
    $articles = pq('#main_bg .zixunmain .p_lf .p_pad')->find('ul');
    foreach ($articles as $article) {
        $m['title'] = pq($article)->find('dl dd a')->html();
        $final[] = $m;
    }
    echo '=========== Page ===========> ' . $page . "\r\n";
    print_r($final);
}
Example #6
0
function get_list($url)
{
    $result = array();
    $result["data"] = array();
    $html = phpQuery::newDocumentFile($url);
    $lists = $html['ul.rstlist-info li .rstname-wrap strong'];
    foreach ($lists as $list) {
        $url = pq($list)->find('a')->attr('href');
        $text = pq($list)->find('a')->text();
        $result['data'][] = array("url" => $url, "name" => $text);
    }
    $next_url = $html['.next']->attr("href");
    $result['paging'] = array("next" => $next_url);
    return $result;
}
Example #7
0
 /**
  * 根据url获取skus
  * @param  string $url
  * @return array      
  */
 private function _getIdsByUrl($url)
 {
     $ids = [];
     phpQuery::newDocumentFile($url);
     $a = pq('#plist .gl-item div[data-sku]');
     foreach ($a as $value) {
         $sku = pq($value)->attr('data-sku');
         if (strlen($sku) <= 7) {
             array_push($ids, trim($sku));
         }
         if (count($ids) >= $this->count) {
             break;
         }
     }
     return $ids;
 }
Example #8
0
 public function getCategories($url, $excludes = [])
 {
     $categories = [];
     phpQuery::newDocumentFile($url);
     $title = pq('#J_selector [clstag="thirdtype|keycount|thirdtype|select"] b')->text();
     $itemDOM = pq('#J_selector .sl-wrap');
     foreach ($itemDOM as $value) {
         $key = trim(str_replace(':', '', pq($value)->find('.sl-key span')->text()));
         if (in_array($key, $excludes)) {
             continue;
         }
         if ($key !== '品牌') {
             $names = $this->getNames($value, 100);
         } else {
             $names = $this->getNames($value, 24);
         }
         array_push($categories, ['key' => $key, 'values' => $names]);
     }
     // dump($categories);
     return $categories;
 }
$p->dump();
print "\n";
$testName = 'WrapAll';
$testResult = 1;
phpQuery::newDocumentFile('test.html')->find('p')->slice(1, 3)->wrapAll('<div class="wrapper">');
$result = pq('.wrapper');
if ($result->size() == $testResult) {
    print "Test '{$testName}' PASSED :)";
} else {
    print "Test '{$testName}' <strong>FAILED</strong> !!! ";
}
$result->dump();
print "\n";
$testName = 'WrapInner';
$testResult = 3;
phpQuery::newDocumentFile('test.html')->find('li:first')->wrapInner('<div class="wrapper">');
$result = pq('.wrapper p');
if ($result->size() == $testResult) {
    print "Test '{$testName}' PASSED :)";
} else {
    print "Test '{$testName}' <strong>FAILED</strong> !!! ";
}
print $result->dump();
print "\n";
// TODO !
$testName = 'WrapAllTest';
/*
$doc = phpQuery::newDocumentHTML('<div id="myDiv"></div>');
$doc['#myDiv']->append('hors paragraphe<p>Test</p>hors paragraphe')
	->contents()
		->not('[nodeType=1]')
function get_Author($url)
{
    $productPage = phpQuery::newDocumentFile($url);
    return trim(str_replace('(Author)', '', text_prepare(pq($productPage)->find('#byline .author .a-popover-preload .a-size-medium')->text())));
}
Example #11
0
//));
//function v87shs79d8fhs9d($html) {
//	$title = phpQuery::newDocument($html)->find('title');
//	$testName = 'Simple AJAX';
//	if ( strpos(strtolower($title->html()), 'wikipedia') !== false )
//		print "Test '$testName' PASSED :)";
//	else {
//		print "Test '$testName' <strong>FAILED</strong> !!! ";
//		print "<pre>";
//		print_r($title->whois());
//		print "</pre>\n";
//	}
//	print "\n";
//}
$testName = 'Load';
$test = phpQuery::newDocumentFile('test.html')->find('div:first')->load('http://wikipedia.org/ div[lang]');
if (pq('div[lang]')->size()) {
    print "Test '{$testName}' PASSED :)";
} else {
    print "Test '{$testName}' <strong>FAILED</strong> !!! ";
    print "<pre>";
    print "</pre>\n";
}
print "\n";
// http://code.google.com/p/phpquery/issues/detail?id=130
$pq = phpQuery::ajax(array('url' => 'http://' . $_SERVER['SERVER_NAME'] . preg_replace('@/[^/]+$@', '/test_ajax_data_1', $_SERVER['REQUEST_URI']), 'success' => 'a789fhasdui3124', 'error' => 'jhdbg786213u8dsfg7y'));
function a789fhasdui3124($html)
{
    $testName = 'AJAX request text node';
    if ($html == 'hello world') {
        print "Test '{$testName}' PASSED :)";
Example #12
0
}
print_r($result->whois());
print "\n";
$testName = 'Filter with multiplie selectors';
$testResult = array('p.body');
$testDOM = phpQuery::newDocumentFile('test.html');
$single = $testDOM->find('p')->filter('.body')->add($testDOM->find('p')->filter('.title'));
$double = $testDOM->find('p')->filter('.body, .title');
if ($single->length == count($double)) {
    print "Test '{$testName}' PASSED :)";
} else {
    print "Test '{$testName}' <strong>FAILED</strong> !!! ";
}
print "\n";
print_r($single->whois());
print "\n";
print_r($double->whois());
print "\n";
$testName = 'Attributes in HTML element';
$validResult = 'testValue';
$result = phpQuery::newDocumentFile('test.html')->find('html')->empty()->attr('test', $validResult);
$result = phpQuery::newDocument($result->htmlOuter())->find('html')->attr('test');
//similar_text($result->htmlOuter(), $validResult, $similarity);
if ($result == $validResult) {
    print "Test '{$testName}' PASSED :)";
} else {
    print "Test '{$testName}' <strong>FAILED</strong> !!! ";
    print "<pre>";
    print $result;
    print "</pre>\n";
}
Example #13
0
<?php

require_once '../phpQuery/phpQuery.php';
phpQuery::$debug = true;
$testName = 'ReplaceWith';
phpQuery::newDocumentFile('test.html')->find('p:eq(1)')->replaceWith("<p class='newTitle'>\n                        this is example title\n                    </p>");
$result = pq('p:eq(1)');
if ($result->hasClass('newTitle')) {
    print "Test '{$testName}' PASSED :)";
} else {
    print "Test '{$testName}' <strong>FAILED</strong> !!! ";
}
$result->dump();
print "\n";
$testName = 'ReplaceAll';
$testResult = 3;
phpQuery::newDocumentFile('test.html');
pq('<div class="replacer">')->replaceAll('li:first p');
$result = pq('.replacer');
if ($result->size() == $testResult) {
    print "Test '{$testName}' PASSED :)";
} else {
    print "Test '{$testName}' <strong>FAILED</strong> !!! ";
}
$result->dump();
print "\n";
Example #14
0
print "\n";
// CLONE
$testName = 'Clone';
$testResult = 3;
$document;
$p = phpQuery::newDocumentFile('test.html')->toReference($document)->find('p:first');
foreach (array(0, 1, 2) as $i) {
    $p->clone()->addClass("clone-test")->addClass("class-{$i}")->insertBefore($p);
}
if (pq('.clone-test')->size() == $testResult) {
    print "Test '{$testName}' PASSED :)";
} else {
    print "Test '{$testName}' <strong>FAILED</strong> !!! ";
}
print "\n";
// SIBLINGS
$testName = 'Next';
$testResult = 3;
$document;
$result = phpQuery::newDocumentFile('test.html')->find('li:first')->next()->next()->prev()->is('#testID');
if ($result) {
    print "Test '{$testName}' PASSED :)";
} else {
    print "Test '{$testName}' <strong>FAILED</strong> !!! ";
}
print "\n";
?>


<?php 
die;
Example #15
0
error_reporting(0);
header("Content-Type: text/html;charset=utf-8");
if (!isset($_GET['word'])) {
    header('Location: /');
} elseif ($_GET['word'] == '') {
    header('Location: /');
} else {
    $word = $_GET['word'];
}
if (isset($_GET['page'])) {
    $page = $_GET['page'];
} else {
    $page = 1;
}
include './includes/phpQuery.php';
phpQuery::newDocumentFile(get_base() . 'fetch.php?word=' . urlencode($word) . '&page=' . $page);
$list = pq('.g');
foreach ($list as $li) {
    $data['name'][] = pq($li)->find('.r')->find('a')->html();
    $data['_url'][] = pq($li)->find('.r')->find('a')->attr('href');
    $data['desc'][] = pq($li)->find('.s')->find('.st')->html();
}
$data['num'][0] = pq('#resultStats')->html();
$data['num'][1] = get_number($data['num'][0]);
foreach ($data['_url'] as $key => $value) {
    $data['url'][] = get_true_url($data['_url'][$key]);
}
foreach ($data['url'] as $key => $value) {
    if (substr($value, 0, 1) == '/') {
        unset($data['url'][$key], $data['name'][$key], $data['desc'][$key]);
    }
require APP . 'et/phpQuery/phpQuery.php';
echo "开始获取队列\n";
$table_name = 'spider_ecshop_url';
$contentModel = new ContentModel();
$configs = array('need_push' => 'yes');
$url_list = $contentModel->getUrlList($configs, 'LIMIT 15000 ', $table_name);
$url_count = count($url_list);
echo "获取到{$url_count}条要采集的内容... \n";
if (!empty($url_list)) {
    // $url_info = get_line(prepare('select * from task_list where id=?i limit 1', array($ko)));
    foreach ($url_list as $v) {
        if ($v['url']) {
            /**
             * 获取单个产品内容
             */
            phpQuery::newDocumentFile($v['url']);
            $goods_id = intval(pq('input[name="id"]')->attr('value'));
            // 说明源id大于47900是无水印的 http://www.tomdurrie.com/search.php?page=380 前判读吧..
            // 63767 后面开始进行第二次采集
            if ($goods_id > 63767) {
                // 删除旧产品数据和相册数据
                delete('ecs_goods', array('goods_id' => $goods_id));
                delete('ecs_goods_gallery', array('goods_id' => $goods_id));
                $cat_name = trim(pq('#ur_here>.f_l>a:eq(1)')->html());
                $brand_name = trim(pq('.props>dl:eq(1)>dd')->html());
                $price_tmp = trim(pq('#ECS_SHOPPRICE')->html());
                if (preg_match('(\\d+)', $price_tmp, $match)) {
                    $price = $match[0];
                } else {
                    $price = 0;
                }
Example #17
0
         echo "\ntime less then 2015-01-01,so it stoped!\ncurrent:" . pq($value)->find('.c-title')->text() . date('Y-m-d H:i:s', $publishTime_unix) . "\n";
         $continue = false;
         break;
     }
     if ($publishTime_unix <= strtotime('2015-12-31 23:59:59')) {
         $title = trim(pq($value)->find('.c-title')->text());
         $str[] = $title;
         $str[] = trim($authorName);
         $showstr = implode("\t", $str) . "\n";
         echo $showstr;
         file_put_contents($word . '.xls', $showstr, FILE_APPEND);
     }
 }
 foreach ($moreLink as $mk => $mv) {
     $showstr = "";
     phpQuery::newDocumentFile('http://news.baidu.com' . pq($mv)->attr('href'));
     $l = pq("#content_left .result");
     echo "总共:", count($l), "\n";
     foreach ($l as $lk => $lv) {
         $str = array();
         $title = pq($lv)->find('.c-title')->text();
         $author = pq($lv)->find('.c-title-author')->text();
         list($authorDate, $hour) = explode(' ', $author);
         list($authorName, $publishTime) = explode("201", $authorDate);
         $str[] = $title;
         $str[] = trim($authorName);
         $showstr = implode("\t", $str) . "\n";
         echo $showstr;
         file_put_contents($word . '.xls', $showstr, FILE_APPEND);
     }
 }
Example #18
0
     //echo $url;
     try {
         @phpQuery::newDocumentFile($url);
     } catch (Exception $e) {
         echo 'have waring ' . $e;
         exit;
     }
     $get_arr = pq('.result.c-result.c-clk-recommend a:first-child');
     foreach ($get_arr as $gKey => $gVal) {
         $get_Urls[] = pq($gVal)->attr('href');
     }
     $get_Urls = array_unique($get_Urls);
     //exit;
     foreach ($get_Urls as $key => $val) {
         try {
             @phpQuery::newDocumentFile($val);
         } catch (Exception $e) {
             echo 'have waring ' . $e;
             exit;
         }
         // echo $val.'<br />';
         $e_url = pq('script')->html();
         @($ul_arr = explode('"', $e_url));
         @($is_url[] = $ul_arr[1]);
         echo @$ul_arr[1];
         if (@$ul_arr[1]) {
             echo '<br />';
         }
     }
 }
 $pn += 10;
set_time_limit(0);
// 定义应用目录
define('APP', dirname(dirname(__FILE__)) . DIRECTORY_SEPARATOR);
// 载入框架引导文件
require APP . 'system/_shell.php';
require APP . 'funcs/spider.fn.php';
require APP . 'models/TaskModel.php';
require APP . 'et/phpQuery/phpQuery.php';
//获取链接列表  http://www.tomdurrie.com/search.php?page=380
$links = get_batch_link('http://www.tomdurrie.com/search.php?page=(*)', 1, 6, 1);
if (!empty($links)) {
    foreach ($links as $target_url) {
        /**
         * 获取维美达链接列表
         */
        echo "正在获取链接{$target_url}下的产品链接\n";
        phpQuery::newDocumentFile($target_url);
        $goods_list = pq('.hoverlist');
        $lists_tmp = array();
        foreach ($goods_list as $li) {
            $lists_tmp[] = array('url' => pq($li)->find('a')->attr('href'), 'thumb_img_org' => pq($li)->find('img')->attr('src'));
        }
        // 探测链接失败
        if (empty($lists_tmp)) {
            system("echo -e '探测链接列表失败: \\033[31m" . $target_url . "\\033[0m'");
            $result_errr = insert_log($target_url, '探测链接列表失败');
        } else {
            insert_ec_urls($lists_tmp, 0, true, 'spider_ecshop_url');
        }
    }
}
Example #20
0
 function cool()
 {
     Vendor('phpQuery.phpQuery');
     \phpQuery::newDocumentFile('http://job.blueidea.com');
     $companies = pq('#hotcoms .coms')->find('div');
     foreach ($companies as $company) {
         echo pq($company)->find('h3 a')->text() . "<br>";
     }
 }
Example #21
0
<?php

include 'Lib/phpQuery/phpQuery.php';
include 'Lib/Smarty/Smarty.class.php';
$keywords = trim($_POST['keywords']);
$urls = trim($_POST['urls']);
if (empty($keywords) & empty($urls)) {
    echo '请输入争取参数';
    exit;
}
$keyword_arr = explode("\n", $keywords);
$url_arr = explode("\n", $urls);
foreach ($keyword_arr as $key) {
    $randa = rand(100, 999);
    $url = 'http://wap.baidu.com/s?word=' . $key . '&ts=8173515&t_kt=46&rsv_iqid=13192952292074186' . $randa . '&sa=ib&rsv_sug4=3785&inputT=1951&ss=100';
    phpQuery::newDocumentFile($url);
    $get_arr = pq(".ec_site");
    foreach ($get_arr as $gKey => $gVal) {
        $gValHtml = pq($gVal)->html();
        if (empty($gValHtml)) {
            continue;
        } elseif (!is_array($gValHtml)) {
            $gValHtmls[] = $gValHtml;
        } else {
            $gValHtmls = $gValHtml;
        }
        foreach ($url_arr as $uKey => $urlone) {
            if (in_array($urlone, $gValHtmls)) {
                //                $sql = "INSERT INTO `mobile_baidu` (keyword,url,rank,`timestamp`) VALUES ($key,$gValHtml,$gKey+1,now())";
                $rankVal['keyword'] = $key;
                $rankVal['url'] = $gValHtml;
Example #22
0
$invalid_keys = ["HDOM", "DEFA", "MAX_"];
$function_list = get_defined_functions()["internal"];
sort($function_list);
if (!isset($_GET['id'])) {
    $id = rand(0, sizeof($function_list));
} else {
    $id = $_GET['id'];
}
$file = fopen("php_functions.csv", "w");
$fields = [];
error_reporting(E_ALL & ~E_WARNING);
//sizeof($function_list)
for ($i = 0; $i < 3; $i++) {
    $string = "";
    $function_url = "http://php.net/manual/en/function." . str_replace("_", "-", $function_list[$i]) . ".php";
    $html = phpQuery::newDocumentFile($function_url);
    if (strpos(error_get_last()["message"], "404 Not Found") == false) {
        $num_of_functions++;
        echo "Fetching {$function_url}... \n";
        $name = $function_list[$i];
        foreach (get_defined_constants(true)["user"] as $key => $value) {
            if (!in_array(substr($key, 0, 4), $invalid_keys)) {
                foreach (pq(constant($key)) as $element) {
                    $string = $string . process_string($element->textContent, $name);
                }
            }
        }
        $info = $string;
        echo "<span style='font-weight:bold'>{$function_list[$id]}</span> <BR> {$info}";
        fwrite($file, "\"{$name}\", \"{$info}\" \n");
    } else {
// CALLBACKS
class callbackClass
{
    static function staticMethodCallback($node)
    {
        pq($node)->addClass('newClass');
    }
    function methodCallback($node)
    {
        pq($node)->addClass('newClass');
    }
}
function functionCallback($node)
{
    pq($node)->addClass('newClass');
}
$testResult = array('li.newClass', 'li#testID.newClass', 'li.newClass', 'li#i_have_nested_list.newClass', 'li.nested.newClass', 'li.second.newClass');
$tests = array('functionCallback', array('callbackClass', 'staticMethodCallback'), array(new callbackClass(), 'methodCallback'));
foreach ($tests as $test) {
    $result = phpQuery::newDocumentFile('test.html')->find('li')->each($test);
    $testName = is_array($test) ? $test[1] : $test;
    if ($result->whois() == $testResult) {
        print "Test '{$testName}' PASSED :)";
    } else {
        print "Test '{$testName}' <strong>FAILED</strong> !!! ";
        print "<pre>";
        print_r($result->whois());
        print "</pre>\n";
    }
    print "\n";
}
Example #24
0
function filter_cityUrl($link_provice)
{
    phpQuery::newDocumentFile($link_provice);
    $l = pq(".wy_state_topNav")->find('div')->eq(9)->find('a')->eq(2)->attr("href");
    phpQuery::newDocumentFile($l);
    //下层景点链接
    $list = pq(".wy_state_model ")->find('dd');
    // echo $list."nihai";
    $a = array();
    foreach ($list as $li) {
        $link = pq($li)->find('a:first')->attr("href");
        array_push($a, $link);
    }
    //  print_r($a) ;
    echo "--------get cityUrl success-----------" . PHP_EOL;
    return $a;
}
Example #25
0
 public function crawlAction()
 {
     $destination = $this->get('destination', FALSE);
     if ($destination) {
         include LIB_PATH . '/phpQuery/phpQuery.php';
         phpQuery::newDocumentFile($destination);
         $articles = pq('#main_bg .zixunmain .p_lf .p_pad')->find('ul');
         foreach ($articles as $article) {
             $m['title'] = pq($article)->find('dl dd a')->html();
             $m['title'] = addslashes($m['title']);
             $m['img'] = pq($article)->find('dl dt a img')->attr('src');
             $final[] = $m;
         }
         $buffer['articles'] = $final;
     }
     $this->getView()->assign($buffer);
 }
Example #26
0
 public function crawlAction()
 {
     $destination = $this->get('destination', FALSE);
     if ($destination) {
         Yaf_Loader::import(LIB_PATH . '/phpQuery/phpQuery.php');
         phpQuery::newDocumentFile($destination);
         $articles = pq('.main-content .chief .mod-focus .focus')->find('ul li');
         foreach ($articles as $article) {
             $m['img'] = pq($article)->find('a img')->attr('src');
             $m['title'] = pq($article)->find('a img')->attr('alt');
             $final[] = $m;
         }
         $buffer['articles'] = $final;
     }
     $this->getView()->assign($buffer);
 }
Example #27
0
<?php 
/*定义字段*/
$site = $_GET['site'];
$keyWords = $_GET['keyWords'];
//$pn=0;
//$searchEngine['谷歌']=('http://www.google.com.hk/search?newwindow=1&safe=strict&site=&source=hp&q=site:');
require 'phpQuery/phpQuery.php';
//echo '<br />'.$site.'<br />';
$word = split(' ', $keyWords);
$continue = 1;
set_time_limit(0);
foreach ($word as $key => $value) {
    for ($bd = 0; $bd < 100; $bd += 10) {
        $searchEngine['百度'] = 'http://www.baidu.com/s?pn=' . $bd . '&wd=' . $value;
        //实例化phpQuery
        phpQuery::newDocumentFile($searchEngine['百度']);
        if ($order = pq("table:contains({$site})")->attr("id")) {
            echo $value . ' => ' . $order . '<br />';
            $continue = 0;
            break;
        } else {
            $continue = 1;
        }
    }
    if ($continue == 1) {
        echo $value . ' => 100+<br />';
    }
}
/*for($start=0;$start<200;$start+=10){
	$searchEngine['谷歌']=('http://www.google.com.hk/search?newwindow=1&safe=strict&site=&source=hp&q='.$keyWords.'&start='.$start);
	//实例化phpQuery
Example #28
0
$postPage = isset($_GET['pagenum']) ? $_GET['pagenum'] : 'noPage';
$callback = isset($_GET['callback']) ? $_GET['callback'] : 'callback';
if ($postKey === 'noKey' || $postKey == '') {
    customJsonRes('204', '没有postkey或为空', 'null');
} else {
    $key = urlencode($postKey);
    //urlencode(iconv('GBK', 'UTF-8', '前端'));//encodeURIComponent('前端');
    //Detected an illegal character  utf-8
    //调试
    //echo '关键字:'.$key.'搜索类型:'.$postType.'展示第'.$postPage.'页';
    switch ($postType) {
        case 1:
            $searchUrl = 'http://weixin.sogou.com/weixin?query=' . $key . '&type=2&page=' . $postPage . '&ie=utf8';
            //echo $searchUrl;
            echo '<html><head><meta http-equiv="Content-Type" content="text/html; charset=utf-8"><meta http-equiv="X-UA-Compatible" content="IE=Edge"/><link rel="stylesheet" href="../bootstrap/css/bootstrap.min.css"><link rel="stylesheet" href="../bootstrap/css/bootstrap-theme.min.css"><link rel="stylesheet" type="text/css" href="../test/cssjs/default.css"><title>' . $key . '的微信公众号文章 - by HighSea</title><!--<script src="../test/cssjs/copy.js">--></script><script type="text/javascript" charset="gbk" src="http://cdn.bootcss.com/jquery/1.11.1/jquery.min.js"></script></head><body><div class="container"><div class="row"><p class="bg-success">以下内容来自微信公众平台</p></div>';
            phpQuery::newDocumentFile($searchUrl);
            $artlist = pq(".results");
            //echo $artlist;
            foreach ($artlist as $li) {
                $description = pq($li)->find('.wx-rb');
                echo '<div class="row alltitle">' . $description->find('.txt-box h4')->html() . '</div>';
                echo '<div class="row">' . $description->find('.img_box2 img')->attr('src');
            }
            echo "</div></body></html>";
            break;
        case 2:
            $searchUrl = 'http://www.liepin.com/zhaopin/?searchField=1&key=' . $key . '&industries=&jobTitles=&dqs=070020&compscale=&compkind=&pubTime=&salary=&searchType=1&clean_condition=&jobKind=&curPage=' . $postPage;
            echo "开发中";
            break;
        case 3:
            echo "开发中";
 public function startcollection()
 {
     //参数
     //采集ID
     $cid = I('get.cid');
     //采集类型 0 采集所有 1 按页数采集
     $ctype = I('get.ctype');
     //开始页数
     $sp = I('get.sp', 1, 'intval');
     //停止页数
     $tp = I('get.tp');
     //当前采集的页数
     $page = I('get.page', 1, 'intval');
     //按页数采集
     $blagStop = false;
     if ($ctype == 1) {
         if ($tp && $tp <= $page) {
             $blagStop = TRUE;
         }
     }
     //开始采集
     //获取采集规则详情
     $PlgCollection = D('PlgCollection');
     $colInfo = $PlgCollection->find($cid);
     $listRule = json_decode($colInfo['listrule']);
     $listurl = str_replace('{$page}', $page, $listRule->listurl);
     $urlInfo = parse_url($listurl);
     //解析列表
     Vendor('phpQuery.phpQuery', '', '.class.php');
     \phpQuery::newDocumentFile($listurl);
     $artlist = pq($listRule->listobj);
     foreach ($artlist as $li) {
         //获取详情页地址
         $pageurl = pq($li)->attr($listRule->listattr);
         if (strpos('http', $pageurl) === FALSE) {
             $pageurl = $urlInfo['scheme'] . '://' . $urlInfo['host'] . $pageurl;
         }
         //采集内容
         \phpQuery::newDocumentFile($pageurl);
         $cateArr = explode(',', $colInfo['cate']);
         $mid = $cateArr[1];
         //模型
         $ModelField = DD('ModelField');
         $fieldlist = $ModelField->selFieldByMid($mid);
         //模型中所有字段
         $pagerule = json_decode($colInfo['pagerule'], true);
         foreach ($fieldlist as $key => $f) {
             if (isset($pagerule[$f['fieldname']])) {
                 $rulePageJson = $pagerule[$f['fieldname']];
                 $rulePageObj = json_decode($rulePageJson);
                 if ($rulePageObj->type == 0) {
                     $artlist = pq($rulePageObj->obj);
                     dump($this->_TxtToUtf8(I('post.langcode'), $artlist->text()));
                     die;
                 }
             }
         }
     }
     if ($blagStop === true) {
         //停止采集
     } else {
         //跳转到下一页开始采集
         $param = array('cid' => $cid, 'ctype' => $ctype, 'sp' => $sp, 'tp' => $tp, 'page' => $page + 1);
         //$this->redirect('Collection/Admin/startcollection', $param);
     }
     $this->display();
 }
Example #30
0
$testName = 'Append and move';
$result = phpQuery::newDocumentFile('test.html');
$li = $result->find('li:first');
$result->find('div')->_empty();
$li->html('test1-&eacute;-test1')->append('test2-é-test2')->appendTo($result->find('div:first'));
$result = $result->find('div:first li:first');
$expected = 'test1-é-test1test2-é-test2';
if (trim(str_replace("\n", '', $result->html())) == $expected) {
    print "Test '{$testName}' passed :)<br />\n";
} else {
    print "Test '{$testName}' <strong>FAILED</strong> !!!<br />\n";
    print "'" . trim($result->html()) . "'";
}
print "\n";
$testName = 'Attr charset';
$result = phpQuery::newDocumentFile('test.html')->find('li:first')->attr('test', 'foo &eacute; żźć bar');
if (trim($result->attr('test')) == 'foo &eacute; żźć bar') {
    print "Test '{$testName}' passed :)<br />\n";
} else {
    print "Test '{$testName}' <strong>FAILED</strong> !!!<br />\n";
    print $result->attr('test');
}
print "\n";
//$testName = 'Loading document without meta charset';
//$result = phpQuery::newDocumentFile('test.html')
//	->_empty();
////var_dump((string)$result->htmlOuter());
//$result = phpQuery::newDocument($result->htmlOuter());
//$validResult = <<<EOF
//<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd">
//<html xmlns="http://www.w3.org/1999/xhtml" xmlns="http://www.w3.org/1999/xhtml"><head><meta http-equiv="Content-Type" content="text/html;charset=UTF-8" /></head></html>