private function _getList() { $hobj = phpQuery::newDocumentHTML($this->html); if (!empty($this->regRange)) { $robj = pq($hobj)->find($this->regRange); $i = 0; foreach ($robj as $item) { while (list($key, $reg_value) = each($this->regArr)) { if ($key == 'callback') { continue; } $tags = isset($reg_value[2]) ? $reg_value[2] : ''; $iobj = pq($item)->find($reg_value[0]); switch ($reg_value[1]) { case 'text': $this->jsonArr[$i][$key] = $this->_allowTags(pq($iobj)->html(), $tags); break; case 'html': $this->jsonArr[$i][$key] = $this->_stripTags(pq($iobj)->html(), $tags); break; default: $this->jsonArr[$i][$key] = pq($iobj)->attr($reg_value[1]); break; } if (isset($reg_value[3])) { $this->jsonArr[$i][$key] = call_user_func($reg_value[3], $this->jsonArr[$i][$key], $key); } else { if (isset($this->regArr['callback'])) { $this->jsonArr[$i][$key] = call_user_func($this->regArr['callback'], $this->jsonArr[$i][$key], $key); } } } //重置数组指针 reset($this->regArr); $i++; } } else { while (list($key, $reg_value) = each($this->regArr)) { if ($key == 'callback') { continue; } $hobj = phpQuery::newDocumentHTML($this->html); $tags = isset($reg_value[2]) ? $reg_value[2] : ''; $lobj = pq($hobj)->find($reg_value[0]); $i = 0; foreach ($lobj as $item) { switch ($reg_value[1]) { case 'text': $this->jsonArr[$i][$key] = $this->_allowTags(pq($item)->html(), $tags); break; case 'html': $this->jsonArr[$i][$key] = $this->_stripTags(pq($item)->html(), $tags); break; default: $this->jsonArr[$i][$key] = pq($item)->attr($reg_value[1]); break; } if (isset($reg_value[3])) { $this->jsonArr[$i][$key] = call_user_func($reg_value[3], $this->jsonArr[$i][$key], $key); } else { if (isset($this->regArr['callback'])) { $this->jsonArr[$i][$key] = call_user_func($this->regArr['callback'], $this->jsonArr[$i][$key], $key); } } $i++; } } } if ($this->outputEncoding) { //编码转换 $this->jsonArr = $this->_arrayConvertEncoding($this->jsonArr, $this->outputEncoding, $this->htmlEncoding); } phpQuery::$documents = array(); }
/** * 11 * @param string $content * @return array */ function sp_getcontent_imgs($content) { import("phpQuery"); phpQuery::newDocumentHTML($content); $pq = pq(); $imgs = $pq->find("img"); $imgs_data = array(); if ($imgs->length()) { foreach ($imgs as $img) { $img = pq($img); $im['src'] = $img->attr("src"); $im['title'] = $img->attr("title"); $im['alt'] = $img->attr("alt"); $imgs_data[] = $im; } } phpQuery::$documents = null; return $imgs_data; }
public function parse() { $dom = \phpQuery::newDocument($this->content); $a = []; $nodes_count = count($dom->find('*[href]')); for ($i = 0; $i < $nodes_count; $i++) { $a[] = $dom->find('*[href]:eq(' . $i . ')')->attr('href'); } $srcs = []; $nodes_count = count($dom->find('*[src]')); for ($i = 0; $i < $nodes_count; $i++) { $srcs[] = $dom->find('*[src]:eq(' . $i . ')')->attr('src'); } \phpQuery::$documents = []; $urls = []; $patern = '/(?<=url\\()[^)]*?(?=\\))/'; preg_match_all($patern, $this->content, $match); if (!empty($match[0])) { array_walk($match[0], function (&$val, $key) { $val = trim(preg_replace('/[\'"]/', '', $val)); }); $urls = $match[0]; } $collection = array_merge($a, $srcs, $urls); return $collection; }
public function _setDocument($getcontent) { phpQuery::$documents = array(); phpQuery::newDocument($getcontent); phpQuery::$defaultCharset = 'UTF-8'; }
public function index() { set_time_limit(0); import('Org.JAE.QueryList'); header("Content-type: text/html; charset=utf-8"); $listurl = "http://qt.qq.com/static/pages/news/phone/c12_list_1.shtml"; $page = 0; while (true) { if ($page > 10) { break; } $pageresult = \QueryList::Query($listurl); $json = $pageresult->getHtmlJSON(); if (empty($json[0]['next'])) { echo $listurl; dump($json[0]); break; } $listurl = "http://qt.qq.com/static/pages/news/phone/" . $json[0]['next']; $items = $json[0]['list']; foreach ($items as $item) { $article_url = $item['article_url']; if (strpos($article_url, 'qq.com')) { continue; } if (!strpos($article_url, "article_")) { continue; } $article_url = "http://qt.qq.com/static/pages/news/phone/" . $article_url; $map['title'] = $item['title']; $iscollect = D('DcDocument')->where($map)->find(); if (!empty($iscollect)) { continue; } $data['create_time'] = strtotime($item['insert_date']); $data['title'] = $item['title']; $data['description'] = $item['summary']; $data['cover_id'] = $this->saveCoverImage($item['image_url_small']); if (empty($data['cover_id'])) { continue; } $Document = D('DcDocument'); $data['title'] = str_replace('掌盟', '群挑', $data['title']); $docid = $Document->addDoc($data); $pagecontent = \phpQuery::newDocumentFile($article_url); $content = pq(".article_content")->html(); $imgs = pq($content)->find("img"); foreach ($imgs as $img) { $src = pq($img)->attr('src'); if (empty($src)) { $src = pq($img)->attr('jason'); } $imgurl = $this->saveArticleImage($src); $content = str_replace($src, $imgurl, $content); $content = str_replace("jason=", "src=", $content); $content = str_replace("<img", "<img alt='" . $item['title'] . "'", $content); } $content = str_replace('掌盟', '群挑', $content); $content = preg_replace("/<a[^>]*>(.*)<\\/a>/isU", '${1}', $content); $Article = D('DcArticle'); $article['content'] = trim($content); $article['id'] = $docid; $article_id = $Article->addArticle($article); \phpQuery::$documents = array(); $page++; } } }
public static function clear() { \phpQuery::$documents = array(); }