PHP mycrawler示例

编程语言: PHP

类/类型: mycrawler

hotexamples.com的示例: 2

PHP mycrawler - 已找到2个示例。这些是从开源项目中提取的最受好评的mycrawler现实PHP示例。您可以评价示例，以帮助我们提高示例质量。

常用方法

显示隐藏

setFetchJobs(2)

run(1)

示例#1

显示文件

文件： Crawler_test.php 项目： sunaimin/phpfetcher

<?php

require_once 'autoload.php';
class mycrawler extends Phpfetcher_Crawler_Default
{
    public function handlePage($page)
    {
        print_r($page->getHyperLinks());
    }
}
$crawler = new mycrawler();
$arrFetchJobs = array('blog.reetsee' => array('start_page' => 'http://blog.reetsee.com', 'link_rules' => array('/blog\\.reetsee\\.com/', '/wordpress/')), 'qq' => array('start_page' => 'http://news.qq.com', 'link_rules' => array('/(.*)\\/a\\/(\\d{8})\\/(\\d+)\\.htm/'), 'max_depth' => 4));
$crawler->setFetchJobs($arrFetchJobs)->run();
//$page->setConfField('url', 'http://tech.qq.com/a/20140715/073002.htm');

示例#2

显示文件

文件： single_page.php 项目： andy0010/phpfetcher

<?php

//下面两行使得这个项目被下载下来后本文件能直接运行
$demo_include_path = dirname(__FILE__) . '/../';
set_include_path(get_include_path() . PATH_SEPARATOR . $demo_include_path);
require_once 'phpfetcher.php';
class mycrawler extends Phpfetcher_Crawler_Default
{
    public function handlePage($page)
    {
        //打印处当前页面的title
        $res = $page->sel('//title');
        for ($i = 0; $i < count($res); ++$i) {
            echo $res[$i]->plaintext;
            echo "\n";
        }
    }
}
$crawler = new mycrawler();
$arrJobs = array('qqnews' => array('start_page' => 'http://news.qq.com/a/20140927/026557.htm', 'link_rules' => array(), 'max_depth' => 1));
//$crawler->setFetchJobs($arrJobs)->run(); //这一行的效果和下面两行的效果一样
$crawler->setFetchJobs($arrJobs);
$crawler->run();