function getinfolist($_cate) { global $_root, $cid; for ($i = 1; $i <= 2; $i++) { //通过 atotal计算i的值 $suf = $i == 1 ? '' : 'index' . $i . '.html'; $url = $_root . $_cate['ourl'] . $suf; echo "\n++++ ", $url, " ++++\n"; for ($ei = 0; $ei < 3; $ei++) { $html = getHtml($url); if ($html) { break; } sleep(12); } // $html = iconv("GBK","UTF-8//TRANSLIT",$html) ; $html = mb_convert_encoding($html, "UTF-8", "GBK"); $matchs = getParseListInfo($html); //echo '<pre>';var_dump($matchs);exit; if (empty($matchs)) { $html .= "\r\n\r\n+++++++\r\n {$i}"; file_put_contents('match_error_list' . $cid . '.html', $html); //preg_match_all('##Uis',$html,$matchs,PREG_SET_ORDER); } if (empty($matchs)) { echo 'Cate list Failed ' . $url . "\r\n"; return 6; } foreach ($matchs as $list) { $ovid = explode('/', $list['ovid']); $ovid = array_pop($ovid); $oid = intval($ovid); $oname = trim($list['name']); /*/ //在判断是否更新 $aid = checkArticleByOname($oname); if($aid){ echo "{$aid}已存在未更新!\r\n"; continue; return 6; } #echo "\n|",$oname,"|{$aid}|\n"; /**/ $ourl = $_root . $list['ourl']; $purl = $list['ovid']; $ainfo = array('thum' => $list['thum'], 'ourl' => $ourl, 'purl' => $purl, 'actor' => $list['actor'], 'name' => $oname, 'oid' => $oid, 'cid' => $cid); getinfodetail($ainfo); sleep(1); } } return 0; }
function getinfolist(&$cate) { global $_root, $cid; for ($i = 1; $i <= 2; $i++) { //通过 atotal计算i的值 $suf = $i == 1 ? '' : 'index' . $i . '.html'; $url = $_root . $cate['ourl'] . $suf; echo "\n++++ ", $url, " ++++\n"; $html = getHtml($url); // $html = iconv("GBK","UTF-8//TRANSLIT",$html) ; $html = mb_convert_encoding($html, "UTF-8", "GBK"); $matchs = getParseListInfo($html); # echo '<pre>';var_dump($matchs);exit; if (empty($matchs)) { file_put_contents('match_error_list' . $cid . '.html', $html); //preg_match_all('##Uis',$html,$matchs,PREG_SET_ORDER); } if (empty($matchs)) { echo 'Cate list Failed ' . $url . "\r\n"; return 6; } foreach ($matchs as $list) { $oid = preg_replace('#[^\\d]+#', '', $list['ourl']); $oname = trim($list['name']); /**/ //在判断是否更新 $aid = checkArticleByOname($oname); if ($aid) { echo "{$aid}已存在未更新!\r\n"; continue; return 6; } /**/ $ourl = $list['ourl']; if ('http://' != substr($ourl, 0, 7)) { $ourl = $_root . $ourl; } $purl = ''; $ainfo = array('thum' => $list['thum'], 'ourl' => $ourl, 'purl' => $purl, 'actor' => $list['actor'], 'name' => $oname, 'oid' => $oid, 'cid' => $cid); getinfodetail($ainfo); sleep(1); } } return 0; }
function getinfolist(&$cateurl) { global $startPage, $_root, $cid; $startPage = $startPage ? $startPage : 1; for ($i = $startPage; $i <= 2; $i++) { //通过 atotal计算i的值 $suf = $i == 1 ? '' : '_' . $i; $url = $cateurl . $suf . '.html'; echo "\n++++ ", $url, " ++++\n"; $html = getHtml($url); // $html = iconv("GBK","UTF-8//TRANSLIT",$html) ; $html = mb_convert_encoding($html, "UTF-8", "GBK"); preg_match_all('#<li onmousemove="[^"]+" onmouseout="[^"]+"><a href="[^"]+" class="aimg l" target="_blank"><img src="([^"]+)" alt="[^"]+" /></a>\\s+<h2><a href="(/view/index\\d+\\.html)" target="_blank">([^<]+)</a></h2>\\s+<p>主演:([^<]+)</p>\\s+<p>分类:([^<]+)</p>\\s+<p>人气:\\d+</p>\\s+<p>时间:[^<]+</p>\\s+<p><a href="(/player/index\\d+\\.html\\?\\d+-\\d+-\\d+)" class="btn1" target="_blank">马上观看</a></p></li>#Uis', $html, $matchs, PREG_SET_ORDER); //echo '<pre>';var_dump($matchs);exit; if (empty($matchs)) { file_put_contents('match_error_list' . $cid . '.html', $html); //preg_match_all('##Uis',$html,$matchs,PREG_SET_ORDER); } if (empty($matchs)) { echo 'Cate list Failed ' . $url . "\r\n"; return 6; } foreach ($matchs as $list) { $oid = preg_replace('#[^\\d]+#', '', $list[2]); $oname = trim($list[3]); /* //在判断是否更新 $aid = checkArticleByOname($oname); if($aid){ echo "{$aid}已存在未更新!\r\n"; continue; return 6; } */ $actor = $list[4]; $actor = str_replace(array(',/,'), array(','), $actor); $ourl = $_root . $list[2]; $purl = $_root . $list[6]; $ainfo = array('thum' => $list[1], 'ourl' => $ourl, 'purl' => $purl, 'actor' => $actor, 'name' => $oname, 'oid' => $oid, 'cid' => $cid); getinfodetail($ainfo); sleep(1); } } return 0; }
function getinfolist(&$_cate) { global $_root, $cid, $start_page; $start_page = $start_page ? $start_page : 1; for ($i = $start_page; $i <= 2000; $i++) { //通过 atotal计算i的值 $suf = $i == 1 ? '' : '-' . $i; $url = $_root . $_cate['ourl'] . $suf . '.html'; echo "\n++++ ", $url, " ++++\n"; //exit; $html = getHtml($url); // $html = mb_convert_encoding($html,"UTF-8","GBK"); $matchs = getParseListInfo($html); #echo '<pre>';var_dump($matchs);exit; if (empty($matchs)) { file_put_contents('match_error_list' . $cid . '.html', $html); //preg_match_all('##Uis',$html,$matchs,PREG_SET_ORDER); } if (empty($matchs)) { echo 'Cate list Failed ' . $url . "\r\n"; return 6; } foreach ($matchs as $list) { $oid = intval(preg_replace('#[^\\d]+#', '', $list['ourl'])); $oname = trim($list['name']); /*/ //在判断是否更新 $aid = checkArticleByOname($oname); if($aid){ echo "{$aid}已存在未更新!\r\n"; continue; return 6; } /**/ $ourl = getFullPath($list['ourl']); $purl = '/play/' . $oid . '-1-1.html'; $purl = getFullPath($purl); $ainfo = array('thum' => $list['cover'], 'ourl' => $ourl, 'purl' => $purl, 'actor' => $list['actor'], 'name' => $oname, 'oid' => $oid, 'cid' => $cid); getinfodetail($ainfo); //sleep(5); } } return 0; }
function getinfolist(&$cate_info) { global $_root, $cid, $cate_list_url; for ($i = 1; $i <= 5000; $i++) { //通过 atotal计算i的值 $url = sprintf('%s' . $cate_list_url, $_root, $cate_info['ourl'], $i); echo "\n++++ ", $url, " ++++\n"; $html = getHtml($url); preg_match('#<ul class="show-list fn-clear" id="contents">(.+)</ul>#Uis', $html, $matchs); $html = $matchs[1]; //echo '<pre>';var_dump($html);exit; $matchs = getParseListInfo($html); if (empty($matchs)) { file_put_contents('match_error_list' . $cid . '.html', $html); //preg_match_all('##Uis',$html,$matchs,PREG_SET_ORDER); } //echo '<pre>';var_dump($matchs);exit; if (empty($matchs)) { echo 'Cate list Failed ' . $url . "\r\n"; return 6; } foreach ($matchs as $list) { // var_dump($list);exit; $oname = trim($list['name']); /**/ //在判断是否更新 $aid = checkArticleByOname($oname); if ($aid) { echo "{$aid}已存在未更新!\r\n"; continue; return 6; } /**/ $list['oid'] = 0; //$ainfo = array('thum'=>$list['thum'],'ourl'=>$list['ourl'],'actor'=>$list['actor'],'name'=>$oname,'oid'=>$oid,'cid'=>$cid); getinfodetail($list); sleep(1); } } return 0; }
function getinfolist(&$_cate) { global $_root, $cid, $startPage; for ($i = $startPage; $i <= 20; $i++) { //通过 atotal计算i的值 $suf = $i == 1 ? '' : '_' . $i; $url = $_root . $_cate['ourl'] . $suf . '.html'; echo "\n++++ ", $url, " ++++\n"; $html = getHtml($url); // $html = iconv("GBK","UTF-8//TRANSLIT",$html) ; $html = mb_convert_encoding($html, "UTF-8", "GBK"); file_put_contents('list_match.html', $html); $matchs = getParseListInfo($html); //echo '<pre>';var_dump($matchs);exit; if (empty($matchs)) { file_put_contents('match_error_list' . $cid . '.html', $html); echo 'Cate list Failed ' . $url . "\r\n"; return 6; } foreach ($matchs as $list) { $oid = preg_replace('#[^\\d]+#', '', $list['ourl']); $oname = trim($list['title']); /**/ //在判断是否更新 $aid = checkArticleByOname($oname); if ($aid) { echo "{$aid}已存在未更新!\r\n"; continue; return 6; } /**/ $ourl = $_root . $list['ourl']; $purl = $_root . 'player/index' . $oid . '.html?' . $oid . '-0-0?' . $oid . '-0-0'; $ainfo = array('thum' => $_root . $list['thum'], 'ourl' => $ourl, 'purl' => $purl, 'actor' => '', 'name' => $oname, 'oid' => $oid, 'cid' => $cid); getinfodetail($ainfo); sleep(1); } } return 0; }
function getinfolist(&$_cate) { global $_root, $cid; for ($i = 1; $i <= 2000; $i++) { //通过 atotal计算i的值 //$suf = $i == 1?'':'_'.$i; $url = sprintf('%s/?m=vod-type-id-%d-pg-%d.htm', $_root, $_cate['ourl'], $i); echo "\n++++ ", $url, " ++++\n"; $html = getHtml($url); // $html = iconv("GBK","UTF-8//TRANSLIT",$html) ; // $html = mb_convert_encoding($html,"UTF-8","GBK"); $matchs = getParseListInfo($html); //echo '<pre>';var_dump($matchs);exit; if (empty($matchs)) { file_put_contents('match_error_list' . $cid . '.html', $html); echo 'Cate list Failed ' . $url . "\r\n"; return 6; } foreach ($matchs as $list) { $oid = preg_replace('#[^\\d]+#', '', $list['url']); $oname = trim($list['name']); /**/ //在判断是否更新 $aid = checkArticleByOname($oname); if ($aid) { echo "{$aid}已存在未更新!\r\n"; continue; return 6; } /**/ $ourl = $_root . $list['url']; $purl = $_root . $list['purl']; $ainfo = array('thum' => $list['thum'], 'ourl' => $ourl, 'purl' => $purl, 'keyword' => $list['actor'], 'name' => $oname, 'oid' => $oid, 'cid' => $cid); getinfodetail($ainfo); sleep(5); } } return 0; }
preg_match('#/(\\d+)\\.html#is', $uv, $mh); $ovid = @$mh[1]; if (!$ovid) { echo "\n==== get ovid failed Ourl: {$uv} Page: {$start_page} ====\n"; continue; } // check local exists $check = $m->checkid97vid($ovid); if ($check) { echo "\n=== {$ovid} already exists! ====\n"; continue; } $infoUrl = sprintf('%svideos/play/mid/%d.html', $_root, $ovid); $title = trim($titlePool[$uk]); $sourceUrl = sprintf('%svideos/resource/id/%d', $_root, $ovid); $data = getinfodetail($sourceUrl); $data['name'] = $title; $data['ourl'] = $ovid; $data['thum'] = $picPool[$uk]; $vinfo = getParseVideoInfo($uv); $data['vols'] = $vinfo; //var_dump($data);exit; if (empty($vinfo)) { echo "\n==== Get Parse Info Failed Ourl: {$infoUrl} Purl: {$sourceUrl} Page: {$start_page} =====\n"; if (stripos($uv, '/resource/id/') !== false) { continue; } exit; } $aid = checkArticleByOname($data['name']); if ($aid) {