Example #1
0
 public function getLinkContent()
 {
     require_once './ThinkPHP/Library/Vendor/Collection/phpQuery.php';
     $link = op_t(I('post.url'));
     $content = get_content_by_url($link);
     $charset = preg_match("/<meta.+?charset=[^\\w]?([-\\w]+)/i", $content, $temp) ? strtolower($temp[1]) : "utf-8";
     \phpQuery::$defaultCharset = $charset;
     \phpQuery::newDocument($content);
     $title = pq("meta[name='title']")->attr('content');
     if (empty($title)) {
         $title = pq("title")->html();
     }
     $title = iconv($charset, "UTF-8", $title);
     $keywords = pq("meta[name='keywords'],meta[name='Keywords']")->attr('content');
     $description = pq("meta[name='description'],meta[name='Description']")->attr('content');
     $url = parse_url($link);
     $img = pq("img")->eq(0)->attr('src');
     if (is_bool(strpos($img, 'http://'))) {
         $img = 'http://' . $url['host'] . $img;
     }
     $title = text($title);
     $description = text($description);
     $keywords = text($keywords);
     $return['title'] = $title;
     $return['img'] = $img;
     $return['description'] = empty($description) ? $title : $description;
     $return['keywords'] = empty($keywords) ? $title : $keywords;
     exit(json_encode($return));
 }
Example #2
0
 /**
  * @param $conf ConfNode
  * @param $html
  * @return array|float|int|String
  */
 public static function parseHtml($conf, $html)
 {
     query::$defaultCharset = "utf-8";
     $doc = query::newDocument($html);
     query::selectDocument($doc);
     $value = self::queryValue(pq($doc), $conf);
     //清理内存
     query::unloadDocuments($doc);
     return $value;
 }
 /**
  * 测试采集内容
  */
 public function testpage()
 {
     header("Content-type: text/html; charset=utf-8");
     //列表地址
     $listurl = I('post.listurl');
     $listurl = str_replace('{$page}', 2, $listurl);
     $urlInfo = parse_url($listurl);
     //解析列表
     $listobj = $_POST['listobj'];
     //列表对象
     $listattr = I('post.listattr');
     //列表属性
     //采集列表
     Vendor('phpQuery.phpQuery', '', '.class.php');
     \phpQuery::newDocumentFile($listurl);
     $artlist = pq($listobj);
     foreach ($artlist as $li) {
         $pageurl = pq($li)->attr($listattr);
         if (stripos($pageurl, 'http') !== 0) {
             $pageurl = $urlInfo['scheme'] . '://' . $urlInfo['host'] . $pageurl;
         }
         //采集内容
         \phpQuery::$defaultCharset = I('post.langcode');
         \phpQuery::newDocumentFileHTML($pageurl);
         //获取字段
         $cate = I('post.cate');
         $cateArr = explode(',', $cate);
         $mid = $cateArr[1];
         $ModelField = DD('ModelField');
         $fieldlist = $ModelField->selFieldByMid($mid);
         foreach ($fieldlist as $key => $f) {
             if (isset($_POST[$f['fieldname'] . '_rule'])) {
                 $ruleJson = $_POST[$f['fieldname'] . '_rule'];
                 $ruleObj = json_decode($ruleJson);
                 if ($ruleObj->type == 0) {
                     $artlist = pq($ruleObj->obj);
                     echo $artlist->text() . '<br />';
                 }
             }
         }
         $artlist = pq($listobj);
         break;
     }
 }
Example #4
0
 /**
  * 移除特定的html标签
  * @param  string $html 
  * @param  array  $tags 标签数组    
  * @return string       
  */
 private function _removeTags($html, $tags)
 {
     $tag_str = '';
     if (count($tags)) {
         foreach ($tags as $tag) {
             $tag_str .= $tag_str ? ',' . $tag : $tag;
         }
         phpQuery::$defaultCharset = $this->htmlEncoding;
         $doc = phpQuery::newDocumentHTML($html);
         pq($doc)->find($tag_str)->remove();
         $html = pq($doc)->htmlOuter();
         $doc->unloadDocument();
     }
     return $html;
 }
 public function getVideoInfo($link)
 {
     $return = S('video_info_' . md5($link));
     if (empty($return)) {
         require_once './ThinkPHP/Library/Vendor/Collection/phpQuery.php';
         preg_match("/(youku.com|ku6.com|sohu.com|sina.com.cn|qq.com|tudou.com|yinyuetai.com|iqiyi.com)/i", $link, $hosts);
         $host = $hosts[1];
         $content = get_content_by_url($link);
         if ('youku.com' == $host) {
             \phpQuery::newDocument($content);
             $title = pq("title")->html();
             $flash_url = pq("#link2")->attr('value');
         } elseif ('ku6.com' == $host) {
             \phpQuery::$defaultCharset = GBK;
             \phpQuery::newDocument($content);
             $title = pq("title")->html();
             $flash_url = pq(".ckl_input")->eq(0)->attr('value');
             $title = iconv("GBK", "UTF-8", $title);
         } elseif ('tudou.com' == $host) {
             \phpQuery::newDocument($content);
             $title = pq("title")->html();
             preg_match('/iid:(.*?)\\s+,icode/s', $content, $program);
             $programId = intval($program[1]);
             if (strpos($link, 'www.tudou.com/albumplay') !== false) {
                 preg_match("/albumplay\\/([\\w\\-\\.]+)[\\/|\\.]/", $link, $album);
                 $albumId = $album[1];
                 $flash_url = 'http://www.tudou.com/a/' . $albumId . '/&iid=' . $programId . '/v.swf';
             } elseif (strpos($link, 'www.tudou.com/programs') !== false) {
                 $flash_url = 'http://www.tudou.com/v/' . $programId . '/v.swf';
             } elseif (strpos($link, 'www.tudou.com/listplay') !== false) {
                 preg_match("/listplay\\/([\\w\\-\\.]+)\\//", $link, $list);
                 $listId = $list[1];
                 $flash_url = 'http://www.tudou.com/l/' . $listId . '/&iid=' . $programId . '/v.swf';
             }
         } elseif ('sohu.com' == $host) {
             \phpQuery::$defaultCharset = GBK;
             \phpQuery::newDocument($content);
             $title = pq("title")->html();
             $title = iconv("GBK", "UTF-8", $title);
             $flash_url = pq("[property='og:videosrc']")->attr('content');
         } elseif ('qq.com' == $host) {
             $contentType = 'text/html;charset=gbk';
             \phpQuery::newDocument($content, $contentType);
             preg_match("/vid=(.*)/i", $link, $vid);
             $vid = $vid[1];
             $flash_url = 'http://static.video.qq.com/TPout.swf?vid=' . $vid . '&auto=0';
             $title = $title = pq("#" . $vid)->attr('title');
         } elseif ('sina.com.cn' == $host) {
             \phpQuery::newDocument($content);
             $title = pq("title")->html();
             preg_match("/swfOutsideUrl:\\'(.+?)\\'/i", $content, $flashvar);
             $flash_url = $flashvar[1];
         } elseif ('yinyuetai.com' == $host) {
             \phpQuery::newDocument($content);
             $title = pq("title")->html();
             $flash_url = pq("[property='og:videosrc']")->attr('content');
         } elseif ('iqiyi.com' == $host) {
             \phpQuery::newDocument($content);
             $title = pq("title")->html();
             $obj = pq("#videoArea")->find('div')->eq(0);
             $temp1 = $obj->attr('data-player-videoid');
             preg_match("/iqiyi.com\\/(.*).html/i", $link, $temp2);
             $temp2 = $temp2[1];
             $temp3 = $obj->attr('data-player-albumid');
             $temp4 = $obj->attr('data-player-tvid');
             $flash_url = 'http://player.video.qiyi.com/' . $temp1 . '/0/0/' . $temp2 . '.swf-albumId=' . $temp3 . '-tvId=' . $temp4;
         }
         $return['title'] = text($title);
         $return['flash_url'] = urldecode($flash_url);
         S('video_info_' . md5($link), $return, 60 * 60);
     }
     return $return;
 }
Example #6
0
 /**
  * 模拟一卡通
  */
 public function ykt_login($user, $pwd)
 {
     $stim = time();
     $cookie_jar = dirname(__FILE__) . "/" . $stim . "ykt.cookie";
     $data['IDToken0'] = '';
     $data['IDToken1'] = $user;
     $data['IDToken2'] = $pwd;
     $data['IDButton'] = 'Submit';
     $data['goto'] = 'aHR0cDovL215aWQuam11LmVkdS5jbi9pZHMvVXNlckNoZWNrLmFzcHg/Z290bz1odHRwOi8vbXlpZC5qbXUuZWR1LmNuL3lrdC9kZWZhdWx0LmFzcHg/ZnJvbUlEUz0x';
     $data['goto_Url'] = 'http://myid.jmu.edu.cn/ids/UserCheck.aspx?goto=http://myid.jmu.edu.cn/ykt/default.aspx?fromIDS=1';
     $data['encoded'] = 'true';
     $data['inputCode'] = '';
     $data['gx_charset'] = 'UTF-8';
     $post = http_build_query($data);
     //统一登陆地址
     $ch = curl_init();
     curl_setopt($ch, CURLOPT_URL, "http://id.jmu.edu.cn/amserver/UI/Login");
     curl_setopt($ch, CURLOPT_REFERER, 'http://id.jmu.edu.cn/amserver/UI/Login?goto=http%3a%2f%2fmyid.jmu.edu.cn%2fykt%2fdefault.aspx%3ffromIDS%3d1');
     curl_setopt($ch, CURLOPT_HTTPHEADER, $this->passheader);
     curl_setopt($ch, CURLOPT_HEADER, 1);
     curl_setopt($ch, CURLOPT_RETURNTRANSFER, 1);
     curl_setopt($ch, CURLOPT_FOLLOWLOCATION, 1);
     curl_setopt($ch, CURLOPT_POST, 1);
     //设置为POST方式
     curl_setopt($ch, CURLOPT_POSTFIELDS, $post);
     curl_setopt($ch, CURLOPT_COOKIEJAR, $cookie_jar);
     $result = curl_exec($ch);
     curl_close($ch);
     Vendor('phpQuery');
     phpQuery::$defaultCharset = 'UTF-8';
     phpQuery::newDocumentHTML($result);
     $username = pq("#welcome")->text();
     $examname = pq("#ctl00_ContentPlaceHolder1_MainCallbackPanel_pageControl_Page0_Overview_AccountsControl_nbAccounts")->find('span');
     $list = array();
     foreach ($examname as $company) {
         $list[] = pq($company)->text();
     }
     unset($list[0]);
     unset($list[1]);
     $groupclass = array_chunk($list, 4);
     $num = 0;
     foreach ($groupclass as $key => $value) {
         $nowjiage = $this->only_num($value[2]);
         $num = $num + $nowjiage;
         $other[] = $nowjiage;
     }
     //总消费记录
     $groupclass['total'] = $num;
     $groupclass['name'] = str_replace('欢迎您, ', '', $username);
     $xiaofeiinfo = pq("#ctl00_ContentPlaceHolder1_MainCallbackPanel_pageControl_Page0_Overview_navBar_GCTC1_RecentlyView_RecentlyRecGrid_DXMainTable")->find('td');
     $xiaofeilist = array();
     foreach ($xiaofeiinfo as $company) {
         $xiaofeilist[] = pq($company)->text();
     }
     for ($i = 0; $i <= 14; $i++) {
         unset($xiaofeilist[$i]);
     }
     $groupinfo = array_chunk($xiaofeilist, 5);
     unlink($cookie_jar);
     return array('total' => $groupclass, 'list' => $groupinfo, 'other' => $other);
 }
 public function getVideoInfo($link)
 {
     $return = S('video_info_' . md5($link));
     if (empty($return)) {
         require_once './ThinkPHP/Library/Vendor/Collection/phpQuery.php';
         preg_match("/(youku.com|ku6.com|sohu.com|sina.com.cn|qq.com|tudou.com|yinyuetai.com|iqiyi.com|bilibili.com)/i", $link, $hosts);
         $host = $hosts[1];
         $content = get_content_by_url($link);
         if ('youku.com' == $host) {
             \phpQuery::newDocument($content);
             $title = pq("title")->html();
             $flash_url = pq("#link2")->attr('value');
             // 获取缩略图
             preg_match("/sid\\/(.*?)\\//", $flash_url, $id);
             // $json = get_content_by_url('http://v.youku.com/player/getPlayList/VideoIDS/' . $id[1]);
             $json = get_content_by_url('http://play.youku.com/play/get.json?vid=' . $id[1] . '&ct=10&ran=1951');
             $json = json_decode($json, true);
             $img_url = $json['data']['video']['logo'];
         } elseif ('ku6.com' == $host) {
             \phpQuery::$defaultCharset = GBK;
             \phpQuery::newDocument($content);
             $title = pq("title")->html();
             $flash_url = pq(".ckl_input")->eq(0)->attr('value');
             $title = iconv("GBK", "UTF-8", $title);
             // 获取缩略图
             preg_match("/show\\/(.*?).html/", $link, $id);
             $json = get_content_by_url('http://v.ku6.com/fetch.htm?t=getVideo4Player&vid=' . $id[1]);
             $json = json_decode($json, true);
             $img_url = $json['data']['bigpicpath'];
         } elseif ('tudou.com' == $host) {
             \phpQuery::newDocument($content);
             $title = pq("title")->html();
             preg_match('/iid:(.*?)\\s+,icode/s', $content, $program);
             $programId = intval($program[1]);
             if (strpos($link, 'www.tudou.com/albumplay') !== false) {
                 preg_match("/albumplay\\/([\\w\\-\\.]+)[\\/|\\.]/", $link, $album);
                 $albumId = $album[1];
                 $flash_url = 'http://www.tudou.com/a/' . $albumId . '/&iid=' . $programId . '/v.swf';
             } elseif (strpos($link, 'www.tudou.com/programs') !== false) {
                 $flash_url = 'http://www.tudou.com/v/' . $programId . '/v.swf';
             } elseif (strpos($link, 'www.tudou.com/listplay') !== false) {
                 preg_match("/listplay\\/([\\w\\-\\.]+)\\//", $link, $list);
                 $listId = $list[1];
                 $flash_url = 'http://www.tudou.com/l/' . $listId . '/&iid=' . $programId . '/v.swf';
             }
             //获取缩略图
             $json = get_content_by_url('http://api.tudou.com/v6/video/info?app_key=myKey&format=json&itemCodes=' . $programId);
             $json = json_decode($json, true);
             $img_url = $json['results'][0]['bigPicUrl'];
         } elseif ('sohu.com' == $host) {
             \phpQuery::$defaultCharset = GBK;
             \phpQuery::newDocument($content);
             $title = pq("title")->html();
             $title = iconv("GBK", "UTF-8", $title);
             $flash_url = pq("[property='og:videosrc']")->attr('content');
             // 获取缩略图
             preg_match("/com\\/(.*?)\\/v.swf/", $flash_url, $id);
             $json = get_content_by_url('http://hot.vrs.sohu.com/vrs_flash.action?vid=' . $id[1]);
             $json = json_decode($json, true);
             $img_url = $json['data']['coverImg'];
         } elseif ('qq.com' == $host) {
             $contentType = 'text/html;charset=gbk';
             \phpQuery::newDocument($content, $contentType);
             preg_match("/vid:\"(.*)\"/i", $content, $vid);
             $vid = $vid[1];
             $flash_url = 'http://static.video.qq.com/TPout.swf?vid=' . $vid . '&auto=0';
             $title = $title = pq("#" . $vid)->attr('title');
             // 获取缩略图
             $img_url = 'http://vpic.video.qq.com/d/' . $vid . '_ori_1.jpg';
         } elseif ('sina.com.cn' == $host) {
             \phpQuery::newDocument($content);
             $title = pq("title")->html();
             preg_match("/swfOutsideUrl:\\'(.+?)\\'/i", $content, $flashvar);
             $flash_url = $flashvar[1];
             //获取缩略图
             preg_match("/pic[\\s]*:[\\s]*[\"|\\']?[\\s]*([^'|\"]+)?/", $content, $mch1);
             $img_url = $mch1[1];
         } elseif ('yinyuetai.com' == $host) {
             \phpQuery::newDocument($content);
             $title = pq("title")->html();
             $flash_url = pq("[property='og:videosrc']")->attr('content');
             //获取缩略图
             $img_url = pq("[property='og:image']")->attr('content');
         } elseif ('iqiyi.com' == $host) {
             \phpQuery::newDocument($content);
             $title = pq("title")->html();
             $obj = pq("#videoArea")->find('div')->eq(0);
             $temp1 = $obj->attr('data-player-videoid');
             preg_match("/iqiyi.com\\/(.*).html/i", $link, $temp2);
             $temp2 = $temp2[1];
             $temp3 = $obj->attr('data-player-albumid');
             $temp4 = $obj->attr('data-player-tvid');
             $flash_url = 'http://player.video.qiyi.com/' . $temp1 . '/0/0/' . $temp2 . '.swf-albumId=' . $temp3 . '-tvId=' . $temp4;
             //获取缩略图
             //$img_url = pq("[itemprop='thumbnailUrl']")->attr('content') ;
             //  $img_url = pq("[itemprop='image']")->attr('content') ;
             // 奇艺网有跨站过滤,使用默认图片
             $img_url = get_pic_src('/Public/images/iqiyi.jpg');
         } elseif ('bilibili.com' == $host) {
             $content = $this->gzdecode($content);
             \phpQuery::newDocument($content);
             $title = pq("title")->html();
             //获取缩略图
             $img_url = pq("[class='cover_image']")->attr('src');
             //获取视频地址
             $url_js = pq('#bofqi')->find('script')->eq(0)->html();
             preg_match("/(cid=\\w*)/", $url_js, $url_cid);
             preg_match("/aid=\\w*/", $url_js, $url_aid);
             $url_cid = substr($url_cid[0], strpos($url_cid[0], "=") + 1);
             $url_aid = substr($url_aid[0], strpos($url_aid[0], "=") + 1);
             $flash_url = "http://static.hdslb.com/play.swf" . "?cid=" . $url_cid . "&aid=" . $url_aid;
             //下载视频
             //                $link_bao = explode('bilibili.com',$link);
             //                $link_bi = $link_bao[0]."ibilibili.com".$link_bao[1];
             //                $content_bi = get_content_by_url($link_bi);
             //                \phpQuery::newDocument($content_bi);
             //                $obj_bi = pq("#firstLi")->find('a')->eq(3);
             //                $id_bi = $obj_bi->attr('onclick');
             //                preg_match("/[0-9]*/", $id_bi, $cid_bi);
         }
         $return['title'] = text($title);
         $return['flash_url'] = urldecode($flash_url);
         $return['img_url'] = urldecode($img_url);
         S('video_info_' . md5($link), $return, 60 * 60);
     }
     return $return;
 }
Example #8
0
 /**
  * 获取用户 等级考试 
  */
 public function getUserexam()
 {
     $ch = curl_init();
     curl_setopt($ch, CURLOPT_URL, "http://jwgl3.jmu.edu.cn/Student/RegistExam/MyExamHistory.aspx");
     curl_setopt($ch, CURLOPT_REFERER, 'http://jwgl3.jmu.edu.cn/Student/Left.aspx');
     curl_setopt($ch, CURLOPT_HTTPHEADER, $this->header);
     curl_setopt($ch, CURLOPT_HEADER, 0);
     curl_setopt($ch, CURLOPT_RETURNTRANSFER, 1);
     curl_setopt($ch, CURLOPT_FOLLOWLOCATION, 1);
     curl_setopt($ch, CURLOPT_COOKIEFILE, $_SESSION['edu_cookie']);
     $MyExamHistory = curl_exec($ch);
     curl_close($ch);
     //分析用户考证成绩
     Vendor('phpQuery');
     phpQuery::$defaultCharset = 'UTF-8';
     phpQuery::newDocumentHTML($MyExamHistory);
     $examname = pq("#ctl00_ContentPlaceHolder1_GridView1 td")->find('span');
     $list = array();
     foreach ($examname as $company) {
         $list[] = pq($company)->text();
     }
     $grouplist = array_chunk($list, 6);
     $newgrouplist = array();
     foreach ($grouplist as $key => $value) {
         $newgrouplist[$value[0]] = $value;
     }
     return $newgrouplist;
 }
 public function _setDocument($getcontent)
 {
     phpQuery::$documents = array();
     phpQuery::newDocument($getcontent);
     phpQuery::$defaultCharset = 'UTF-8';
 }
 public function index()
 {
     //获取该插件配置参数
     $config = Amango_Addons_Config();
     //查看缓存是否存在
     $article = S('ADDONS_SnatchTieba');
     if (empty($article)) {
         Amango_Addons_Import('phpQuery/phpQuery.php');
         \phpQuery::$defaultCharset = 'GBK';
         \phpQuery::newDocumentFile('http://tieba.baidu.com/f?kw=' . urlencode($config['tieba_name']) . '&fr=ala0');
         $articlecontent = array();
         $artlist = \pq(".j_thread_list");
         foreach ($artlist as $li) {
             //获取评论数
             $tz_commont = iconv('GBK', 'UTF-8', \pq($li)->find('.threadlist_rep_num')->html());
             //获取标题
             $tz_title = iconv('GBK', 'UTF-8', \pq($li)->find('a.j_th_tit')->html());
             //获取内容
             $tz_content = iconv('GBK', 'UTF-8', \pq($li)->find('.threadlist_abs_onlyline')->html());
             $tz_content = preg_replace('/s/', '', $tz_content);
             $tz_content = str_replace('<!---->', '', $tz_content);
             //获取链接
             $tz_link = 'http://tieba.baidu.com' . iconv('GBK', 'UTF-8', \pq($li)->find('a.j_th_tit')->attr('href'));
             //获取作者
             $tz_author = strip_tags(iconv('GBK', 'UTF-8', \pq($li)->find('span.tb_icon_author a')->html()));
             $tz_author = preg_replace('/s/', '', $tz_author);
             //获取回复者
             $tz_reply = iconv('GBK', 'UTF-8', \pq($li)->find('span.tb_icon_author_rely a')->html());
             //获取回复时间
             $tz_replytime = \pq($li)->find('span.j_reply_data')->text();
             $tz_replytime = preg_replace('/s/', '', $tz_replytime);
             //获取图片
             $tz_pic = iconv('GBK', 'UTF-8', \pq($li)->find('img')->attr('original'));
             if (!in_array($tz_title, $toptitle)) {
                 $articlecontent['other'][] = array('Title' => 1 == $config['tieba_extra'] ? "[" . $tz_commont . "]" . $tz_title . "\n" . $tz_content . "\n作者:" . $tz_author . "|回复:" . $tz_reply . "-" . $tz_replytime : $tz_title . "\n" . $tz_content, 'Description' => '', 'PicUrl' => empty($tz_pic) ? '' : $tz_pic, 'Url' => $tz_link);
             }
         }
         $allownums = $config['tieba_nums'] > 8 ? 8 : $config['tieba_nums'];
         $allownums = $allownums >= 1 ? $allownums : 1;
         if ($config['tieba_jinghua'] == 1) {
             $arttoplist = \pq(".thread_top");
             foreach ($arttoplist as $li) {
                 //获取评论数
                 $tz_commont = iconv('GBK', 'UTF-8', \pq($li)->find('.threadlist_rep_num')->html());
                 //获取标题
                 $tz_title = iconv('GBK', 'UTF-8', \pq($li)->find('a.j_th_tit')->html());
                 //获取链接
                 $tz_link = 'http://tieba.baidu.com' . iconv('GBK', 'UTF-8', \pq($li)->find('a.j_th_tit')->attr('href'));
                 //获取作者
                 $tz_author = strip_tags(iconv('GBK', 'UTF-8', \pq($li)->find('span.tb_icon_author a')->html()));
                 $toptitle[] = $tz_title;
                 $tz_author = preg_replace('/s/', '', $tz_author);
                 $articlecontent['top'][] = array('Title' => "[" . $tz_commont . "]" . $tz_title, 'Description' => '', 'PicUrl' => empty($tz_pic) ? '' : $tz_pic, 'Url' => $tz_link);
             }
             $article = self::havejinghua($articlecontent['top'], $articlecontent['other'], $allownums);
         } else {
             $article = self::deljinghua($articlecontent['other'], $allownums);
         }
         \phpQuery::unloadDocuments();
         if ($config['tieba_cache'] > 0 && !empty($article)) {
             S('ADDONS_SnatchTieba', $article, $config['tieba_cache']);
         }
     }
     $this->assign('Duotw', $article);
     $this->display();
 }