public function getLinkContent() { require_once './ThinkPHP/Library/Vendor/Collection/phpQuery.php'; $link = op_t(I('post.url')); $content = get_content_by_url($link); $charset = preg_match("/<meta.+?charset=[^\\w]?([-\\w]+)/i", $content, $temp) ? strtolower($temp[1]) : "utf-8"; \phpQuery::$defaultCharset = $charset; \phpQuery::newDocument($content); $title = pq("meta[name='title']")->attr('content'); if (empty($title)) { $title = pq("title")->html(); } $title = iconv($charset, "UTF-8", $title); $keywords = pq("meta[name='keywords'],meta[name='Keywords']")->attr('content'); $description = pq("meta[name='description'],meta[name='Description']")->attr('content'); $url = parse_url($link); $img = pq("img")->eq(0)->attr('src'); if (is_bool(strpos($img, 'http://'))) { $img = 'http://' . $url['host'] . $img; } $title = text($title); $description = text($description); $keywords = text($keywords); $return['title'] = $title; $return['img'] = $img; $return['description'] = empty($description) ? $title : $description; $return['keywords'] = empty($keywords) ? $title : $keywords; exit(json_encode($return)); }
/** * @param $conf ConfNode * @param $html * @return array|float|int|String */ public static function parseHtml($conf, $html) { query::$defaultCharset = "utf-8"; $doc = query::newDocument($html); query::selectDocument($doc); $value = self::queryValue(pq($doc), $conf); //清理内存 query::unloadDocuments($doc); return $value; }
/** * 测试采集内容 */ public function testpage() { header("Content-type: text/html; charset=utf-8"); //列表地址 $listurl = I('post.listurl'); $listurl = str_replace('{$page}', 2, $listurl); $urlInfo = parse_url($listurl); //解析列表 $listobj = $_POST['listobj']; //列表对象 $listattr = I('post.listattr'); //列表属性 //采集列表 Vendor('phpQuery.phpQuery', '', '.class.php'); \phpQuery::newDocumentFile($listurl); $artlist = pq($listobj); foreach ($artlist as $li) { $pageurl = pq($li)->attr($listattr); if (stripos($pageurl, 'http') !== 0) { $pageurl = $urlInfo['scheme'] . '://' . $urlInfo['host'] . $pageurl; } //采集内容 \phpQuery::$defaultCharset = I('post.langcode'); \phpQuery::newDocumentFileHTML($pageurl); //获取字段 $cate = I('post.cate'); $cateArr = explode(',', $cate); $mid = $cateArr[1]; $ModelField = DD('ModelField'); $fieldlist = $ModelField->selFieldByMid($mid); foreach ($fieldlist as $key => $f) { if (isset($_POST[$f['fieldname'] . '_rule'])) { $ruleJson = $_POST[$f['fieldname'] . '_rule']; $ruleObj = json_decode($ruleJson); if ($ruleObj->type == 0) { $artlist = pq($ruleObj->obj); echo $artlist->text() . '<br />'; } } } $artlist = pq($listobj); break; } }
/** * 移除特定的html标签 * @param string $html * @param array $tags 标签数组 * @return string */ private function _removeTags($html, $tags) { $tag_str = ''; if (count($tags)) { foreach ($tags as $tag) { $tag_str .= $tag_str ? ',' . $tag : $tag; } phpQuery::$defaultCharset = $this->htmlEncoding; $doc = phpQuery::newDocumentHTML($html); pq($doc)->find($tag_str)->remove(); $html = pq($doc)->htmlOuter(); $doc->unloadDocument(); } return $html; }
public function getVideoInfo($link) { $return = S('video_info_' . md5($link)); if (empty($return)) { require_once './ThinkPHP/Library/Vendor/Collection/phpQuery.php'; preg_match("/(youku.com|ku6.com|sohu.com|sina.com.cn|qq.com|tudou.com|yinyuetai.com|iqiyi.com)/i", $link, $hosts); $host = $hosts[1]; $content = get_content_by_url($link); if ('youku.com' == $host) { \phpQuery::newDocument($content); $title = pq("title")->html(); $flash_url = pq("#link2")->attr('value'); } elseif ('ku6.com' == $host) { \phpQuery::$defaultCharset = GBK; \phpQuery::newDocument($content); $title = pq("title")->html(); $flash_url = pq(".ckl_input")->eq(0)->attr('value'); $title = iconv("GBK", "UTF-8", $title); } elseif ('tudou.com' == $host) { \phpQuery::newDocument($content); $title = pq("title")->html(); preg_match('/iid:(.*?)\\s+,icode/s', $content, $program); $programId = intval($program[1]); if (strpos($link, 'www.tudou.com/albumplay') !== false) { preg_match("/albumplay\\/([\\w\\-\\.]+)[\\/|\\.]/", $link, $album); $albumId = $album[1]; $flash_url = 'http://www.tudou.com/a/' . $albumId . '/&iid=' . $programId . '/v.swf'; } elseif (strpos($link, 'www.tudou.com/programs') !== false) { $flash_url = 'http://www.tudou.com/v/' . $programId . '/v.swf'; } elseif (strpos($link, 'www.tudou.com/listplay') !== false) { preg_match("/listplay\\/([\\w\\-\\.]+)\\//", $link, $list); $listId = $list[1]; $flash_url = 'http://www.tudou.com/l/' . $listId . '/&iid=' . $programId . '/v.swf'; } } elseif ('sohu.com' == $host) { \phpQuery::$defaultCharset = GBK; \phpQuery::newDocument($content); $title = pq("title")->html(); $title = iconv("GBK", "UTF-8", $title); $flash_url = pq("[property='og:videosrc']")->attr('content'); } elseif ('qq.com' == $host) { $contentType = 'text/html;charset=gbk'; \phpQuery::newDocument($content, $contentType); preg_match("/vid=(.*)/i", $link, $vid); $vid = $vid[1]; $flash_url = 'http://static.video.qq.com/TPout.swf?vid=' . $vid . '&auto=0'; $title = $title = pq("#" . $vid)->attr('title'); } elseif ('sina.com.cn' == $host) { \phpQuery::newDocument($content); $title = pq("title")->html(); preg_match("/swfOutsideUrl:\\'(.+?)\\'/i", $content, $flashvar); $flash_url = $flashvar[1]; } elseif ('yinyuetai.com' == $host) { \phpQuery::newDocument($content); $title = pq("title")->html(); $flash_url = pq("[property='og:videosrc']")->attr('content'); } elseif ('iqiyi.com' == $host) { \phpQuery::newDocument($content); $title = pq("title")->html(); $obj = pq("#videoArea")->find('div')->eq(0); $temp1 = $obj->attr('data-player-videoid'); preg_match("/iqiyi.com\\/(.*).html/i", $link, $temp2); $temp2 = $temp2[1]; $temp3 = $obj->attr('data-player-albumid'); $temp4 = $obj->attr('data-player-tvid'); $flash_url = 'http://player.video.qiyi.com/' . $temp1 . '/0/0/' . $temp2 . '.swf-albumId=' . $temp3 . '-tvId=' . $temp4; } $return['title'] = text($title); $return['flash_url'] = urldecode($flash_url); S('video_info_' . md5($link), $return, 60 * 60); } return $return; }
/** * 模拟一卡通 */ public function ykt_login($user, $pwd) { $stim = time(); $cookie_jar = dirname(__FILE__) . "/" . $stim . "ykt.cookie"; $data['IDToken0'] = ''; $data['IDToken1'] = $user; $data['IDToken2'] = $pwd; $data['IDButton'] = 'Submit'; $data['goto'] = 'aHR0cDovL215aWQuam11LmVkdS5jbi9pZHMvVXNlckNoZWNrLmFzcHg/Z290bz1odHRwOi8vbXlpZC5qbXUuZWR1LmNuL3lrdC9kZWZhdWx0LmFzcHg/ZnJvbUlEUz0x'; $data['goto_Url'] = 'http://myid.jmu.edu.cn/ids/UserCheck.aspx?goto=http://myid.jmu.edu.cn/ykt/default.aspx?fromIDS=1'; $data['encoded'] = 'true'; $data['inputCode'] = ''; $data['gx_charset'] = 'UTF-8'; $post = http_build_query($data); //统一登陆地址 $ch = curl_init(); curl_setopt($ch, CURLOPT_URL, "http://id.jmu.edu.cn/amserver/UI/Login"); curl_setopt($ch, CURLOPT_REFERER, 'http://id.jmu.edu.cn/amserver/UI/Login?goto=http%3a%2f%2fmyid.jmu.edu.cn%2fykt%2fdefault.aspx%3ffromIDS%3d1'); curl_setopt($ch, CURLOPT_HTTPHEADER, $this->passheader); curl_setopt($ch, CURLOPT_HEADER, 1); curl_setopt($ch, CURLOPT_RETURNTRANSFER, 1); curl_setopt($ch, CURLOPT_FOLLOWLOCATION, 1); curl_setopt($ch, CURLOPT_POST, 1); //设置为POST方式 curl_setopt($ch, CURLOPT_POSTFIELDS, $post); curl_setopt($ch, CURLOPT_COOKIEJAR, $cookie_jar); $result = curl_exec($ch); curl_close($ch); Vendor('phpQuery'); phpQuery::$defaultCharset = 'UTF-8'; phpQuery::newDocumentHTML($result); $username = pq("#welcome")->text(); $examname = pq("#ctl00_ContentPlaceHolder1_MainCallbackPanel_pageControl_Page0_Overview_AccountsControl_nbAccounts")->find('span'); $list = array(); foreach ($examname as $company) { $list[] = pq($company)->text(); } unset($list[0]); unset($list[1]); $groupclass = array_chunk($list, 4); $num = 0; foreach ($groupclass as $key => $value) { $nowjiage = $this->only_num($value[2]); $num = $num + $nowjiage; $other[] = $nowjiage; } //总消费记录 $groupclass['total'] = $num; $groupclass['name'] = str_replace('欢迎您, ', '', $username); $xiaofeiinfo = pq("#ctl00_ContentPlaceHolder1_MainCallbackPanel_pageControl_Page0_Overview_navBar_GCTC1_RecentlyView_RecentlyRecGrid_DXMainTable")->find('td'); $xiaofeilist = array(); foreach ($xiaofeiinfo as $company) { $xiaofeilist[] = pq($company)->text(); } for ($i = 0; $i <= 14; $i++) { unset($xiaofeilist[$i]); } $groupinfo = array_chunk($xiaofeilist, 5); unlink($cookie_jar); return array('total' => $groupclass, 'list' => $groupinfo, 'other' => $other); }
public function getVideoInfo($link) { $return = S('video_info_' . md5($link)); if (empty($return)) { require_once './ThinkPHP/Library/Vendor/Collection/phpQuery.php'; preg_match("/(youku.com|ku6.com|sohu.com|sina.com.cn|qq.com|tudou.com|yinyuetai.com|iqiyi.com|bilibili.com)/i", $link, $hosts); $host = $hosts[1]; $content = get_content_by_url($link); if ('youku.com' == $host) { \phpQuery::newDocument($content); $title = pq("title")->html(); $flash_url = pq("#link2")->attr('value'); // 获取缩略图 preg_match("/sid\\/(.*?)\\//", $flash_url, $id); // $json = get_content_by_url('http://v.youku.com/player/getPlayList/VideoIDS/' . $id[1]); $json = get_content_by_url('http://play.youku.com/play/get.json?vid=' . $id[1] . '&ct=10&ran=1951'); $json = json_decode($json, true); $img_url = $json['data']['video']['logo']; } elseif ('ku6.com' == $host) { \phpQuery::$defaultCharset = GBK; \phpQuery::newDocument($content); $title = pq("title")->html(); $flash_url = pq(".ckl_input")->eq(0)->attr('value'); $title = iconv("GBK", "UTF-8", $title); // 获取缩略图 preg_match("/show\\/(.*?).html/", $link, $id); $json = get_content_by_url('http://v.ku6.com/fetch.htm?t=getVideo4Player&vid=' . $id[1]); $json = json_decode($json, true); $img_url = $json['data']['bigpicpath']; } elseif ('tudou.com' == $host) { \phpQuery::newDocument($content); $title = pq("title")->html(); preg_match('/iid:(.*?)\\s+,icode/s', $content, $program); $programId = intval($program[1]); if (strpos($link, 'www.tudou.com/albumplay') !== false) { preg_match("/albumplay\\/([\\w\\-\\.]+)[\\/|\\.]/", $link, $album); $albumId = $album[1]; $flash_url = 'http://www.tudou.com/a/' . $albumId . '/&iid=' . $programId . '/v.swf'; } elseif (strpos($link, 'www.tudou.com/programs') !== false) { $flash_url = 'http://www.tudou.com/v/' . $programId . '/v.swf'; } elseif (strpos($link, 'www.tudou.com/listplay') !== false) { preg_match("/listplay\\/([\\w\\-\\.]+)\\//", $link, $list); $listId = $list[1]; $flash_url = 'http://www.tudou.com/l/' . $listId . '/&iid=' . $programId . '/v.swf'; } //获取缩略图 $json = get_content_by_url('http://api.tudou.com/v6/video/info?app_key=myKey&format=json&itemCodes=' . $programId); $json = json_decode($json, true); $img_url = $json['results'][0]['bigPicUrl']; } elseif ('sohu.com' == $host) { \phpQuery::$defaultCharset = GBK; \phpQuery::newDocument($content); $title = pq("title")->html(); $title = iconv("GBK", "UTF-8", $title); $flash_url = pq("[property='og:videosrc']")->attr('content'); // 获取缩略图 preg_match("/com\\/(.*?)\\/v.swf/", $flash_url, $id); $json = get_content_by_url('http://hot.vrs.sohu.com/vrs_flash.action?vid=' . $id[1]); $json = json_decode($json, true); $img_url = $json['data']['coverImg']; } elseif ('qq.com' == $host) { $contentType = 'text/html;charset=gbk'; \phpQuery::newDocument($content, $contentType); preg_match("/vid:\"(.*)\"/i", $content, $vid); $vid = $vid[1]; $flash_url = 'http://static.video.qq.com/TPout.swf?vid=' . $vid . '&auto=0'; $title = $title = pq("#" . $vid)->attr('title'); // 获取缩略图 $img_url = 'http://vpic.video.qq.com/d/' . $vid . '_ori_1.jpg'; } elseif ('sina.com.cn' == $host) { \phpQuery::newDocument($content); $title = pq("title")->html(); preg_match("/swfOutsideUrl:\\'(.+?)\\'/i", $content, $flashvar); $flash_url = $flashvar[1]; //获取缩略图 preg_match("/pic[\\s]*:[\\s]*[\"|\\']?[\\s]*([^'|\"]+)?/", $content, $mch1); $img_url = $mch1[1]; } elseif ('yinyuetai.com' == $host) { \phpQuery::newDocument($content); $title = pq("title")->html(); $flash_url = pq("[property='og:videosrc']")->attr('content'); //获取缩略图 $img_url = pq("[property='og:image']")->attr('content'); } elseif ('iqiyi.com' == $host) { \phpQuery::newDocument($content); $title = pq("title")->html(); $obj = pq("#videoArea")->find('div')->eq(0); $temp1 = $obj->attr('data-player-videoid'); preg_match("/iqiyi.com\\/(.*).html/i", $link, $temp2); $temp2 = $temp2[1]; $temp3 = $obj->attr('data-player-albumid'); $temp4 = $obj->attr('data-player-tvid'); $flash_url = 'http://player.video.qiyi.com/' . $temp1 . '/0/0/' . $temp2 . '.swf-albumId=' . $temp3 . '-tvId=' . $temp4; //获取缩略图 //$img_url = pq("[itemprop='thumbnailUrl']")->attr('content') ; // $img_url = pq("[itemprop='image']")->attr('content') ; // 奇艺网有跨站过滤,使用默认图片 $img_url = get_pic_src('/Public/images/iqiyi.jpg'); } elseif ('bilibili.com' == $host) { $content = $this->gzdecode($content); \phpQuery::newDocument($content); $title = pq("title")->html(); //获取缩略图 $img_url = pq("[class='cover_image']")->attr('src'); //获取视频地址 $url_js = pq('#bofqi')->find('script')->eq(0)->html(); preg_match("/(cid=\\w*)/", $url_js, $url_cid); preg_match("/aid=\\w*/", $url_js, $url_aid); $url_cid = substr($url_cid[0], strpos($url_cid[0], "=") + 1); $url_aid = substr($url_aid[0], strpos($url_aid[0], "=") + 1); $flash_url = "http://static.hdslb.com/play.swf" . "?cid=" . $url_cid . "&aid=" . $url_aid; //下载视频 // $link_bao = explode('bilibili.com',$link); // $link_bi = $link_bao[0]."ibilibili.com".$link_bao[1]; // $content_bi = get_content_by_url($link_bi); // \phpQuery::newDocument($content_bi); // $obj_bi = pq("#firstLi")->find('a')->eq(3); // $id_bi = $obj_bi->attr('onclick'); // preg_match("/[0-9]*/", $id_bi, $cid_bi); } $return['title'] = text($title); $return['flash_url'] = urldecode($flash_url); $return['img_url'] = urldecode($img_url); S('video_info_' . md5($link), $return, 60 * 60); } return $return; }
/** * 获取用户 等级考试 */ public function getUserexam() { $ch = curl_init(); curl_setopt($ch, CURLOPT_URL, "http://jwgl3.jmu.edu.cn/Student/RegistExam/MyExamHistory.aspx"); curl_setopt($ch, CURLOPT_REFERER, 'http://jwgl3.jmu.edu.cn/Student/Left.aspx'); curl_setopt($ch, CURLOPT_HTTPHEADER, $this->header); curl_setopt($ch, CURLOPT_HEADER, 0); curl_setopt($ch, CURLOPT_RETURNTRANSFER, 1); curl_setopt($ch, CURLOPT_FOLLOWLOCATION, 1); curl_setopt($ch, CURLOPT_COOKIEFILE, $_SESSION['edu_cookie']); $MyExamHistory = curl_exec($ch); curl_close($ch); //分析用户考证成绩 Vendor('phpQuery'); phpQuery::$defaultCharset = 'UTF-8'; phpQuery::newDocumentHTML($MyExamHistory); $examname = pq("#ctl00_ContentPlaceHolder1_GridView1 td")->find('span'); $list = array(); foreach ($examname as $company) { $list[] = pq($company)->text(); } $grouplist = array_chunk($list, 6); $newgrouplist = array(); foreach ($grouplist as $key => $value) { $newgrouplist[$value[0]] = $value; } return $newgrouplist; }
public function _setDocument($getcontent) { phpQuery::$documents = array(); phpQuery::newDocument($getcontent); phpQuery::$defaultCharset = 'UTF-8'; }
public function index() { //获取该插件配置参数 $config = Amango_Addons_Config(); //查看缓存是否存在 $article = S('ADDONS_SnatchTieba'); if (empty($article)) { Amango_Addons_Import('phpQuery/phpQuery.php'); \phpQuery::$defaultCharset = 'GBK'; \phpQuery::newDocumentFile('http://tieba.baidu.com/f?kw=' . urlencode($config['tieba_name']) . '&fr=ala0'); $articlecontent = array(); $artlist = \pq(".j_thread_list"); foreach ($artlist as $li) { //获取评论数 $tz_commont = iconv('GBK', 'UTF-8', \pq($li)->find('.threadlist_rep_num')->html()); //获取标题 $tz_title = iconv('GBK', 'UTF-8', \pq($li)->find('a.j_th_tit')->html()); //获取内容 $tz_content = iconv('GBK', 'UTF-8', \pq($li)->find('.threadlist_abs_onlyline')->html()); $tz_content = preg_replace('/s/', '', $tz_content); $tz_content = str_replace('<!---->', '', $tz_content); //获取链接 $tz_link = 'http://tieba.baidu.com' . iconv('GBK', 'UTF-8', \pq($li)->find('a.j_th_tit')->attr('href')); //获取作者 $tz_author = strip_tags(iconv('GBK', 'UTF-8', \pq($li)->find('span.tb_icon_author a')->html())); $tz_author = preg_replace('/s/', '', $tz_author); //获取回复者 $tz_reply = iconv('GBK', 'UTF-8', \pq($li)->find('span.tb_icon_author_rely a')->html()); //获取回复时间 $tz_replytime = \pq($li)->find('span.j_reply_data')->text(); $tz_replytime = preg_replace('/s/', '', $tz_replytime); //获取图片 $tz_pic = iconv('GBK', 'UTF-8', \pq($li)->find('img')->attr('original')); if (!in_array($tz_title, $toptitle)) { $articlecontent['other'][] = array('Title' => 1 == $config['tieba_extra'] ? "[" . $tz_commont . "]" . $tz_title . "\n" . $tz_content . "\n作者:" . $tz_author . "|回复:" . $tz_reply . "-" . $tz_replytime : $tz_title . "\n" . $tz_content, 'Description' => '', 'PicUrl' => empty($tz_pic) ? '' : $tz_pic, 'Url' => $tz_link); } } $allownums = $config['tieba_nums'] > 8 ? 8 : $config['tieba_nums']; $allownums = $allownums >= 1 ? $allownums : 1; if ($config['tieba_jinghua'] == 1) { $arttoplist = \pq(".thread_top"); foreach ($arttoplist as $li) { //获取评论数 $tz_commont = iconv('GBK', 'UTF-8', \pq($li)->find('.threadlist_rep_num')->html()); //获取标题 $tz_title = iconv('GBK', 'UTF-8', \pq($li)->find('a.j_th_tit')->html()); //获取链接 $tz_link = 'http://tieba.baidu.com' . iconv('GBK', 'UTF-8', \pq($li)->find('a.j_th_tit')->attr('href')); //获取作者 $tz_author = strip_tags(iconv('GBK', 'UTF-8', \pq($li)->find('span.tb_icon_author a')->html())); $toptitle[] = $tz_title; $tz_author = preg_replace('/s/', '', $tz_author); $articlecontent['top'][] = array('Title' => "[" . $tz_commont . "]" . $tz_title, 'Description' => '', 'PicUrl' => empty($tz_pic) ? '' : $tz_pic, 'Url' => $tz_link); } $article = self::havejinghua($articlecontent['top'], $articlecontent['other'], $allownums); } else { $article = self::deljinghua($articlecontent['other'], $allownums); } \phpQuery::unloadDocuments(); if ($config['tieba_cache'] > 0 && !empty($article)) { S('ADDONS_SnatchTieba', $article, $config['tieba_cache']); } } $this->assign('Duotw', $article); $this->display(); }