/**
  * [queryHotList 采集分类下的文章]
  * @param unknown $cat
  * @return Ambigous <multitype:, array>
  * @access public
  * @author polo<*****@*****.**>
  * @version 2015-3-11 上午10:35:25
  * @copyright Show More
  */
 public static function queryHotList($cat)
 {
     $url = 'http://weixin.sogou.com/pcindex/pc/' . $cat . '/' . $cat . '.html';
     $reg = array("Title" => array('.wx-news-info2 h4', 'text'), 'Description' => array('.wx-news-info2>a', 'text'), 'PicUrl' => array('.wx-img-box a img', 'src'), 'Url' => array('.wx-img-box a', 'href'));
     $obj = \QueryList::Query($url, $reg);
     return $obj->jsonArr;
 }
Exemple #2
0
 /**
  * 更新各省IPV4地址段
  */
 public static function update()
 {
     require 'QueryList.class.php';
     $province = array('BJ', 'GD', 'SD', 'ZJ', 'JS', 'SH', 'LN', 'SC', 'HA', 'HB', 'FJ', 'HN', 'HE', 'CQ', 'SX', 'JX', 'SN', 'AH', 'HL', 'GX', 'JL', 'YN', 'TJ', 'NM', 'XJ', 'GS', 'GZ', 'HI', 'NX', 'QH', 'XZ');
     $count = count($province);
     $ip_segment = array();
     for ($i = 0; $i < $count; $i++) {
         // 采集IP地址段目标网址
         $url = 'http://ips.chacuo.net/view/s_' . $province[$i];
         $ip_list = QueryList::Query($url, array('begin' => array('.v_l', 'text'), 'end' => array('.v_r', 'text')), '', 'UTF-8');
         $ip_array = $ip_list->jsonArr;
         $ip_segment[$province[$i]] = $ip_array;
     }
     $path = str_replace('\\', '/', __DIR__);
     $file = $path . '/ip_segment.php';
     $res = self::_write($file, $ip_segment);
     if ($res) {
         echo 'Update OK!';
     }
 }
Exemple #3
0
 private function getType($html)
 {
     $reg = array('type1' => array('#site-nav a:eq(0)', 'text'), 'type2' => array('#main-body a:eq(0)', 'text'), 'type3' => array('.block_all', 'text'), 'type4' => array('.midsml-rank-stars', 'class'));
     $rang = '';
     $query = QueryList::Query($html, $reg, $rang);
     $type = 0;
     if (isset($query->jsonArr[0])) {
         if (isset($query->jsonArr[0]['type2'])) {
             $type = isset($query->jsonArr[0]['type3']) ? 4 : 3;
         } else {
             $type = isset($query->jsonArr[0]['type4']) ? 5 : 2;
         }
     } else {
         $type = 1;
     }
     return $type;
 }
<?php

require_once '../inc/QueryList/QueryList.class.php';
header('Content-type:text/html;charset=utf-8');
$url = "http://meiriyiwen.com/";
$reg = array("title" => array("#article_show h1", "text"), "author" => array("#article_show .article_author span", "text"), "content" => array("#article_show .article_text", "html"));
$mryw_rst = QueryList::Query($url, $reg);
$mryw_array = $mryw_rst->jsonArr;
if ($mryw_array) {
    $m_title = $mryw_array[0]['title'];
    $m_author = $mryw_array[0]['author'];
    $m_content = $mryw_array[0]['content'];
    $m_md5 = md5($m_content);
    if (empty($m_title) || empty($m_author) || empty($m_content) || empty($m_md5)) {
        echo 'empty mryw, ' . date('Y-m-d H:i:s', time());
    } else {
        require_once '../inc/mryw.inc.php';
        $mryw_dao = new Mryw();
        if ($mryw_dao->is_mryw_exist($m_md5)) {
            echo 'exist mryw, ' . date('Y-m-d H:i:s', time());
        } else {
            $lastest = $mryw_dao->get_lastest_article(true);
            $n_time = strtotime($lastest['m_time']) + 3600 * 23 + 57;
            $mryw_dao->insert_mryw($m_title, $m_content, $m_author, $m_md5, date('Y-m-d H:i:s', $n_time));
            //
            echo 'success mryw, ' . date('Y-m-d H:i:s', time());
            Mysql::closeConn();
        }
    }
}
            continue;
        }
        $data['title'] = $value['title'];
        $data['source'] = $value['url'];
        $data['sourceType'] = 2;
        // 2表示来源是互联网
        $data['classId'] = 3;
        // 3表示左岸频道
        $data['category'] = 1;
        // 1表示创造之路
        $data['isPublish'] = 1;
        $data['publishTime'] = $nowDate;
        $data['changeTime'] = $nowDate;
        $data['createTime'] = $nowDate;
        $data['isDel'] = 0;
        $con_hj = QueryList::Query($value['url'], array("content" => array('#content .entry-content', 'html', '-div')));
        $docId = $arti->insertData('uctoo_article_article', $data);
        if ($docId > 0) {
            $data2 = array();
            $data2['docId'] = $docId;
            $data2['content'] = $con_hj->jsonArr[0]['content'];
            # 如果存在,则不执行添加操作
            $res1 = $arti->select('select "X" from uctoo_article_content_1 where docId=' . $docId);
            if ($res1) {
                continue;
            }
            $arti->insertData('uctoo_article_content_1', $data2);
        }
    }
}
print_r($hj->jsonArr);
Exemple #6
0
//开启前台发布
if ($config['UserAddArticle'] == 0) {
    echo "<script>alert('前台会员不支持发布文章');location.href='ucenter.php'</script>";
    exit;
}
//微信文章导入
if ($_POST) {
    require 'QueryList.class.php';
    $long = guolv(trim($_POST['long']));
    $type_id = guolv(trim($_POST['type_id']));
    $html = get_contents($long);
    $money = $type_arr[2];
    $html = str_replace('data-src', 'src', $html);
    $caiji = array("title" => array(".rich_media_title:first", "text"), "content" => array("#js_content", "html"));
    $quyu = '';
    $hj = QueryList::Query($html, $caiji, $quyu);
    $arr = $hj->jsonArr;
    $title = $arr[0]['title'];
    $content = $arr[0]['content'];
    $pic = cut($html, 'var msg_cdn_url = "', '"');
    if (url_exists($long) == 1) {
        echo "<script>alert('网址不存在');location.href='weixin.php'</script>";
        exit;
    }
    if (is_numeric($type_id) == false) {
        echo "<script>alert('分类不存在');location.href='weixin.php'</script>";
        exit;
    }
    $row = $mysql->query("select * from `article` where `title`='{$title}' limit 1");
    if (!$row) {
        $arr = array('top' => 0, 'title' => $title, 'content' => $content, 'pic' => '[weixin]' . $pic, 'type' => $type_id, 'pv' => 0, 'pv_max' => '', 'money' => $money, 'day' => date("Y-m-d", time()));
 public function getPage()
 {
     $reg = array('page' => array('.PageLink:last', 'text'));
     $query = QueryList::Query($this->html, $reg, '');
     $page = !empty($query->jsonArr[0]['page']) ? $query->jsonArr[0]['page'] : '0';
     return $page;
 }
 public function index()
 {
     set_time_limit(0);
     import('Org.JAE.QueryList');
     header("Content-type: text/html; charset=utf-8");
     $listurl = "http://qt.qq.com/static/pages/news/phone/c12_list_1.shtml";
     $page = 0;
     while (true) {
         if ($page > 10) {
             break;
         }
         $pageresult = \QueryList::Query($listurl);
         $json = $pageresult->getHtmlJSON();
         if (empty($json[0]['next'])) {
             echo $listurl;
             dump($json[0]);
             break;
         }
         $listurl = "http://qt.qq.com/static/pages/news/phone/" . $json[0]['next'];
         $items = $json[0]['list'];
         foreach ($items as $item) {
             $article_url = $item['article_url'];
             if (strpos($article_url, 'qq.com')) {
                 continue;
             }
             if (!strpos($article_url, "article_")) {
                 continue;
             }
             $article_url = "http://qt.qq.com/static/pages/news/phone/" . $article_url;
             $map['title'] = $item['title'];
             $iscollect = D('DcDocument')->where($map)->find();
             if (!empty($iscollect)) {
                 continue;
             }
             $data['create_time'] = strtotime($item['insert_date']);
             $data['title'] = $item['title'];
             $data['description'] = $item['summary'];
             $data['cover_id'] = $this->saveCoverImage($item['image_url_small']);
             if (empty($data['cover_id'])) {
                 continue;
             }
             $Document = D('DcDocument');
             $data['title'] = str_replace('掌盟', '群挑', $data['title']);
             $docid = $Document->addDoc($data);
             $pagecontent = \phpQuery::newDocumentFile($article_url);
             $content = pq(".article_content")->html();
             $imgs = pq($content)->find("img");
             foreach ($imgs as $img) {
                 $src = pq($img)->attr('src');
                 if (empty($src)) {
                     $src = pq($img)->attr('jason');
                 }
                 $imgurl = $this->saveArticleImage($src);
                 $content = str_replace($src, $imgurl, $content);
                 $content = str_replace("jason=", "src=", $content);
                 $content = str_replace("<img", "<img alt='" . $item['title'] . "'", $content);
             }
             $content = str_replace('掌盟', '群挑', $content);
             $content = preg_replace("/<a[^>]*>(.*)<\\/a>/isU", '${1}', $content);
             $Article = D('DcArticle');
             $article['content'] = trim($content);
             $article['id'] = $docid;
             $article_id = $Article->addArticle($article);
             \phpQuery::$documents = array();
             $page++;
         }
     }
 }
mysql_select_db(DB_NAME);
$sql = "select count(*) from cn_kb_blogs";
$res = mysql_query($sql);
$counts_info = mysql_fetch_array($res);
$exist_max_num = $counts_info['0'];
//每页20
$start_page = ceil($exist_max_num / 20);
$start_num = intval($exist_max_num % 20);
if ($start_num == 19) {
    $start_page++;
}
for ($page = 4; $page > 0; $page--) {
    $url = 'http://home.cnblogs.com/kb';
    $fetch_page = $start_page + $page;
    $url = empty($page) ? $url : $url . "/page/" . $fetch_page;
    $hj = QueryList::Query($url, $reg, $rang, 'UTF-8');
    $cn_blogs = $hj->data;
    $get_cnblogs_nums = count($cn_blogs) - 1;
    for ($i = $get_cnblogs_nums; $i >= 0; $i--) {
        $cn_blog = $cn_blogs[$i];
        $exist_sql = "select id from cn_kb_blogs where content_url='" . $cn_blog['content_url'] . "'";
        $exist_res = mysql_query($exist_sql);
        $num_rows = mysql_num_rows($exist_res);
        if (empty($num_rows)) {
            $values = implode("','", $cn_blog);
            $sql = "insert into cn_kb_blogs(title, content_url, kb_type) values('{$values}')";
            mysql_query($sql);
            echo 'success' . "<br>";
        }
    }
}
Exemple #10
0
echo $json . "<hr/>";
//采OSC内容页内容
$url = "http://www.oschina.net/code/snippet_186288_23816";
$reg = array("title" => array(".QTitle h1", "text"), "con" => array(".Content", "html"));
$hj = QueryList::Query($url, $reg);
$arr = $hj->jsonArr;
echo "<pre>";
print_r($arr);
echo "</pre><hr/>";
//抓取网站基本信息
//设置规则
$reg = array("kw" => array("meta[name=keywords]", "content"), "desc" => array("meta[name=description]", "content"), "title" => array("title", "text"), "css1" => array("link:eq(0)", "href"), "js2" => array("script[src]:eq(1)", "src"));
//抓取的目标站
$url = 'http://x.44i.cc/';
//抓取
$data = QueryList::Query($url, $reg)->jsonArr;
print_r($data);
//下面单独演示回调函数的用法
//抓取网站keywords并分离每个关键词
$reg = array("kw" => array("meta[name=keywords]", "content", '', 'fun'));
//自定义回调函数
function fun($content, $key)
{
    //分离关键词
    return explode(',', $content);
}
//抓取的目标站
$url = 'http://x.44i.cc/';
//抓取
$data = QueryList::Query($url, $reg)->jsonArr;
print_r($data);
 public static function http_query_list($url, $reg, $rang)
 {
     $hj = QueryList::Query($url, $reg, $rang, 'UTF-8');
     $cn_blogs = $hj->data;
     return $cn_blogs;
 }
Exemple #12
0
 private function getList()
 {
     $s = urlencode($this->key);
     $num = $this->num;
     $getHtmlWay = 'get';
     $start = $this->num * $this->page;
     if ($this->searcher == 'baidu') {
         $url = "http://www.baidu.com/s?pn={$start}&rn={$num}&wd={$s}";
         $reg_znum = '/[\\d,]+/';
     } else {
         if ($this->searcher == 'google') {
             $url = "https://www.google.com.hk/search?filter=0&lr=&newwindow=1&safe=images&hl=en&as_qdr=all&num={$num}&start={$start}&q={$s}";
             $reg_znum = '/([\\d,]+) result(s)?/';
             $getHtmlWay = 'curl';
         } else {
             if ($this->searcher == 'sogou') {
                 $url = "http://www.sogou.com/web?query={$s}&num={$num}&page=" . $this->page;
                 $reg_znum = '/[\\d,]+/';
             }
         }
     }
     $searcherObj = QueryList::Query($url, $this->regArr, $this->regRange, $getHtmlWay, false);
     for ($i = 0; $i < count($searcherObj->jsonArr); $i++) {
         if ($this->searcher == 'baidu') {
             // $searcherObj->jsonArr[$i]['url'] = $this->getBaiduRealURL($searcherObj->jsonArr[$i]['url']);
         } else {
             if ($this->searcher == 'google') {
                 $searcherObj->jsonArr[$i]['url'] = $this->getGoogleRealURL($searcherObj->jsonArr[$i]['url']);
             }
         }
     }
     $this->jsonArr = $searcherObj->jsonArr;
     //获取总共结果条数
     $searcherObj->setQuery($this->regZnum);
     $zNum = $searcherObj->jsonArr[0]['zNum'];
     preg_match($reg_znum, $zNum, $arr) ? $zNum = $arr[0] : ($zNum = 0);
     $zNum = (int) str_replace(',', '', $zNum);
     //计算总页数
     $zPage = ceil($zNum / $this->num);
     $this->jsonArr = array('num' => $this->num, 'page' => (int) $this->page + 1, 'zNum' => $zNum, 'zPage' => $zPage, "s" => "{$this->key}", 'other' => array('author' => 'JAE', 'QQ' => '734708094', 'blog' => 'http://blog.jaekj.com'), 'data' => $this->jsonArr);
 }
Exemple #13
0
<?php

/*
 * Company:Quguonet.com
 * Author:Myth
 */
//避免延时
set_time_limit(0);
//调用库
__autoload("QueryList");
//require 'Query/QueryList.class.php';
$stime = microtime(true);
//获取程序开始执行的时间
//采集测试
$url = "http://wap.ganji.com/foshan/songshui/shunde/?domain=foshan&url=songshui&d=shunde%2F&page=4";
$arr = array('name' => array('.list-item a:even', 'text', '-.tel'), 'href' => array('.list-item a:even', 'href', '-.tel'));
$data = QueryList::Query($url, $arr, '', 'UTF-8', 'UTF-8', true)->data;
print_r($data);
foreach ($data as $key => $data) {
    $name = $data['name'];
    $url = $data['href'];
    $db->insert("crm_list", "`name`,`href`,`pagename`,`createtime`", "'{$name}','{$url}','赶集顺德送水公司',NOW()");
}
$etime = microtime(true);
//获取程序执行结束的时间
$total = $etime - $stime;
//计算差值
echo '<br />耗时:' . $total;
Exemple #14
0
                });
        
            return $x;
           
    });

print_r($data);
*/
/*
//模拟登陆
$login = QueryList::run('Login',[
    'target' => 'http://doc.querylist.cc/login/login',
    'method' => 'post',
    'params' => ['username'=>'admin','password'=>'admin'],
    'cookiePath' => './cookie123.txt'
    ]);

// print_r($login->html);

$rt = $login->post('http://doc.querylist.cc/admin')->setQuery(['title'=>['h1','text']])->data;

print_r($rt);
*/
//乱码终极解决方案
$data = QueryList::Query('http://www.phpddt.com/', ['title' => ['h2', 'text']], '', 'UTF-8', 'UTF-8', true)->data;
print_r($data);
$etime = microtime(true);
//获取程序执行结束的时间
$total = $etime - $stime;
//计算差值
// echo '耗时:'.$total;