Example #1
0
 private function _getList()
 {
     $hobj = phpQuery::newDocumentHTML($this->html);
     if (!empty($this->regRange)) {
         $robj = pq($hobj)->find($this->regRange);
         $i = 0;
         foreach ($robj as $item) {
             while (list($key, $reg_value) = each($this->regArr)) {
                 if ($key == 'callback') {
                     continue;
                 }
                 $tags = isset($reg_value[2]) ? $reg_value[2] : '';
                 $iobj = pq($item)->find($reg_value[0]);
                 switch ($reg_value[1]) {
                     case 'text':
                         $this->jsonArr[$i][$key] = $this->_allowTags(pq($iobj)->html(), $tags);
                         break;
                     case 'html':
                         $this->jsonArr[$i][$key] = $this->_stripTags(pq($iobj)->html(), $tags);
                         break;
                     default:
                         $this->jsonArr[$i][$key] = pq($iobj)->attr($reg_value[1]);
                         break;
                 }
                 if (isset($reg_value[3])) {
                     $this->jsonArr[$i][$key] = call_user_func($reg_value[3], $this->jsonArr[$i][$key], $key);
                 } else {
                     if (isset($this->regArr['callback'])) {
                         $this->jsonArr[$i][$key] = call_user_func($this->regArr['callback'], $this->jsonArr[$i][$key], $key);
                     }
                 }
             }
             //重置数组指针
             reset($this->regArr);
             $i++;
         }
     } else {
         while (list($key, $reg_value) = each($this->regArr)) {
             if ($key == 'callback') {
                 continue;
             }
             $hobj = phpQuery::newDocumentHTML($this->html);
             $tags = isset($reg_value[2]) ? $reg_value[2] : '';
             $lobj = pq($hobj)->find($reg_value[0]);
             $i = 0;
             foreach ($lobj as $item) {
                 switch ($reg_value[1]) {
                     case 'text':
                         $this->jsonArr[$i][$key] = $this->_allowTags(pq($item)->html(), $tags);
                         break;
                     case 'html':
                         $this->jsonArr[$i][$key] = $this->_stripTags(pq($item)->html(), $tags);
                         break;
                     default:
                         $this->jsonArr[$i][$key] = pq($item)->attr($reg_value[1]);
                         break;
                 }
                 if (isset($reg_value[3])) {
                     $this->jsonArr[$i][$key] = call_user_func($reg_value[3], $this->jsonArr[$i][$key], $key);
                 } else {
                     if (isset($this->regArr['callback'])) {
                         $this->jsonArr[$i][$key] = call_user_func($this->regArr['callback'], $this->jsonArr[$i][$key], $key);
                     }
                 }
                 $i++;
             }
         }
     }
     if ($this->outputEncoding) {
         //编码转换
         $this->jsonArr = $this->_arrayConvertEncoding($this->jsonArr, $this->outputEncoding, $this->htmlEncoding);
     }
     phpQuery::$documents = array();
 }
Example #2
0
/**
 * 11
 * @param string $content
 * @return array
 */
function sp_getcontent_imgs($content)
{
    import("phpQuery");
    phpQuery::newDocumentHTML($content);
    $pq = pq();
    $imgs = $pq->find("img");
    $imgs_data = array();
    if ($imgs->length()) {
        foreach ($imgs as $img) {
            $img = pq($img);
            $im['src'] = $img->attr("src");
            $im['title'] = $img->attr("title");
            $im['alt'] = $img->attr("alt");
            $imgs_data[] = $im;
        }
    }
    phpQuery::$documents = null;
    return $imgs_data;
}
Example #3
0
 public function parse()
 {
     $dom = \phpQuery::newDocument($this->content);
     $a = [];
     $nodes_count = count($dom->find('*[href]'));
     for ($i = 0; $i < $nodes_count; $i++) {
         $a[] = $dom->find('*[href]:eq(' . $i . ')')->attr('href');
     }
     $srcs = [];
     $nodes_count = count($dom->find('*[src]'));
     for ($i = 0; $i < $nodes_count; $i++) {
         $srcs[] = $dom->find('*[src]:eq(' . $i . ')')->attr('src');
     }
     \phpQuery::$documents = [];
     $urls = [];
     $patern = '/(?<=url\\()[^)]*?(?=\\))/';
     preg_match_all($patern, $this->content, $match);
     if (!empty($match[0])) {
         array_walk($match[0], function (&$val, $key) {
             $val = trim(preg_replace('/[\'"]/', '', $val));
         });
         $urls = $match[0];
     }
     $collection = array_merge($a, $srcs, $urls);
     return $collection;
 }
 public function _setDocument($getcontent)
 {
     phpQuery::$documents = array();
     phpQuery::newDocument($getcontent);
     phpQuery::$defaultCharset = 'UTF-8';
 }
 public function index()
 {
     set_time_limit(0);
     import('Org.JAE.QueryList');
     header("Content-type: text/html; charset=utf-8");
     $listurl = "http://qt.qq.com/static/pages/news/phone/c12_list_1.shtml";
     $page = 0;
     while (true) {
         if ($page > 10) {
             break;
         }
         $pageresult = \QueryList::Query($listurl);
         $json = $pageresult->getHtmlJSON();
         if (empty($json[0]['next'])) {
             echo $listurl;
             dump($json[0]);
             break;
         }
         $listurl = "http://qt.qq.com/static/pages/news/phone/" . $json[0]['next'];
         $items = $json[0]['list'];
         foreach ($items as $item) {
             $article_url = $item['article_url'];
             if (strpos($article_url, 'qq.com')) {
                 continue;
             }
             if (!strpos($article_url, "article_")) {
                 continue;
             }
             $article_url = "http://qt.qq.com/static/pages/news/phone/" . $article_url;
             $map['title'] = $item['title'];
             $iscollect = D('DcDocument')->where($map)->find();
             if (!empty($iscollect)) {
                 continue;
             }
             $data['create_time'] = strtotime($item['insert_date']);
             $data['title'] = $item['title'];
             $data['description'] = $item['summary'];
             $data['cover_id'] = $this->saveCoverImage($item['image_url_small']);
             if (empty($data['cover_id'])) {
                 continue;
             }
             $Document = D('DcDocument');
             $data['title'] = str_replace('掌盟', '群挑', $data['title']);
             $docid = $Document->addDoc($data);
             $pagecontent = \phpQuery::newDocumentFile($article_url);
             $content = pq(".article_content")->html();
             $imgs = pq($content)->find("img");
             foreach ($imgs as $img) {
                 $src = pq($img)->attr('src');
                 if (empty($src)) {
                     $src = pq($img)->attr('jason');
                 }
                 $imgurl = $this->saveArticleImage($src);
                 $content = str_replace($src, $imgurl, $content);
                 $content = str_replace("jason=", "src=", $content);
                 $content = str_replace("<img", "<img alt='" . $item['title'] . "'", $content);
             }
             $content = str_replace('掌盟', '群挑', $content);
             $content = preg_replace("/<a[^>]*>(.*)<\\/a>/isU", '${1}', $content);
             $Article = D('DcArticle');
             $article['content'] = trim($content);
             $article['id'] = $docid;
             $article_id = $Article->addArticle($article);
             \phpQuery::$documents = array();
             $page++;
         }
     }
 }
 public static function clear()
 {
     \phpQuery::$documents = array();
 }