function getinfolist(&$cate) { global $_root, $cid; for ($i = 1; $i <= 2; $i++) { //通过 atotal计算i的值 $suf = $i == 1 ? '' : 'index' . $i . '.html'; $url = $_root . $cate['ourl'] . $suf; echo "\n++++ ", $url, " ++++\n"; $html = getHtml($url); // $html = iconv("GBK","UTF-8//TRANSLIT",$html) ; $html = mb_convert_encoding($html, "UTF-8", "GBK"); $matchs = getParseListInfo($html); # echo '<pre>';var_dump($matchs);exit; if (empty($matchs)) { file_put_contents('match_error_list' . $cid . '.html', $html); //preg_match_all('##Uis',$html,$matchs,PREG_SET_ORDER); } if (empty($matchs)) { echo 'Cate list Failed ' . $url . "\r\n"; return 6; } foreach ($matchs as $list) { $oid = preg_replace('#[^\\d]+#', '', $list['ourl']); $oname = trim($list['name']); /**/ //在判断是否更新 $aid = checkArticleByOname($oname); if ($aid) { echo "{$aid}已存在未更新!\r\n"; continue; return 6; } /**/ $ourl = $list['ourl']; if ('http://' != substr($ourl, 0, 7)) { $ourl = $_root . $ourl; } $purl = ''; $ainfo = array('thum' => $list['thum'], 'ourl' => $ourl, 'purl' => $purl, 'actor' => $list['actor'], 'name' => $oname, 'oid' => $oid, 'cid' => $cid); getinfodetail($ainfo); sleep(1); } } return 0; }
function getinfolist(&$cate_info) { global $_root, $cid, $cate_list_url; for ($i = 1; $i <= 5000; $i++) { //通过 atotal计算i的值 $url = sprintf('%s' . $cate_list_url, $_root, $cate_info['ourl'], $i); echo "\n++++ ", $url, " ++++\n"; $html = getHtml($url); preg_match('#<ul class="show-list fn-clear" id="contents">(.+)</ul>#Uis', $html, $matchs); $html = $matchs[1]; //echo '<pre>';var_dump($html);exit; $matchs = getParseListInfo($html); if (empty($matchs)) { file_put_contents('match_error_list' . $cid . '.html', $html); //preg_match_all('##Uis',$html,$matchs,PREG_SET_ORDER); } //echo '<pre>';var_dump($matchs);exit; if (empty($matchs)) { echo 'Cate list Failed ' . $url . "\r\n"; return 6; } foreach ($matchs as $list) { // var_dump($list);exit; $oname = trim($list['name']); /**/ //在判断是否更新 $aid = checkArticleByOname($oname); if ($aid) { echo "{$aid}已存在未更新!\r\n"; continue; return 6; } /**/ $list['oid'] = 0; //$ainfo = array('thum'=>$list['thum'],'ourl'=>$list['ourl'],'actor'=>$list['actor'],'name'=>$oname,'oid'=>$oid,'cid'=>$cid); getinfodetail($list); sleep(1); } } return 0; }
function getinfolist(&$_cate) { global $_root, $cid, $startPage; for ($i = $startPage; $i <= 20; $i++) { //通过 atotal计算i的值 $suf = $i == 1 ? '' : '_' . $i; $url = $_root . $_cate['ourl'] . $suf . '.html'; echo "\n++++ ", $url, " ++++\n"; $html = getHtml($url); // $html = iconv("GBK","UTF-8//TRANSLIT",$html) ; $html = mb_convert_encoding($html, "UTF-8", "GBK"); file_put_contents('list_match.html', $html); $matchs = getParseListInfo($html); //echo '<pre>';var_dump($matchs);exit; if (empty($matchs)) { file_put_contents('match_error_list' . $cid . '.html', $html); echo 'Cate list Failed ' . $url . "\r\n"; return 6; } foreach ($matchs as $list) { $oid = preg_replace('#[^\\d]+#', '', $list['ourl']); $oname = trim($list['title']); /**/ //在判断是否更新 $aid = checkArticleByOname($oname); if ($aid) { echo "{$aid}已存在未更新!\r\n"; continue; return 6; } /**/ $ourl = $_root . $list['ourl']; $purl = $_root . 'player/index' . $oid . '.html?' . $oid . '-0-0?' . $oid . '-0-0'; $ainfo = array('thum' => $_root . $list['thum'], 'ourl' => $ourl, 'purl' => $purl, 'actor' => '', 'name' => $oname, 'oid' => $oid, 'cid' => $cid); getinfodetail($ainfo); sleep(1); } } return 0; }
function getinfolist(&$_cate) { global $_root, $cid; for ($i = 1; $i <= 2000; $i++) { //通过 atotal计算i的值 //$suf = $i == 1?'':'_'.$i; $url = sprintf('%s/?m=vod-type-id-%d-pg-%d.htm', $_root, $_cate['ourl'], $i); echo "\n++++ ", $url, " ++++\n"; $html = getHtml($url); // $html = iconv("GBK","UTF-8//TRANSLIT",$html) ; // $html = mb_convert_encoding($html,"UTF-8","GBK"); $matchs = getParseListInfo($html); //echo '<pre>';var_dump($matchs);exit; if (empty($matchs)) { file_put_contents('match_error_list' . $cid . '.html', $html); echo 'Cate list Failed ' . $url . "\r\n"; return 6; } foreach ($matchs as $list) { $oid = preg_replace('#[^\\d]+#', '', $list['url']); $oname = trim($list['name']); /**/ //在判断是否更新 $aid = checkArticleByOname($oname); if ($aid) { echo "{$aid}已存在未更新!\r\n"; continue; return 6; } /**/ $ourl = $_root . $list['url']; $purl = $_root . $list['purl']; $ainfo = array('thum' => $list['thum'], 'ourl' => $ourl, 'purl' => $purl, 'keyword' => $list['actor'], 'name' => $oname, 'oid' => $oid, 'cid' => $cid); getinfodetail($ainfo); sleep(5); } } return 0; }
$sourceUrl = sprintf('%svideos/resource/id/%d', $_root, $ovid); $data = getinfodetail($sourceUrl); $data['name'] = $title; $data['ourl'] = $ovid; $data['thum'] = $picPool[$uk]; $vinfo = getParseVideoInfo($uv); $data['vols'] = $vinfo; //var_dump($data);exit; if (empty($vinfo)) { echo "\n==== Get Parse Info Failed Ourl: {$infoUrl} Purl: {$sourceUrl} Page: {$start_page} =====\n"; if (stripos($uv, '/resource/id/') !== false) { continue; } exit; } $aid = checkArticleByOname($data['name']); if ($aid) { $vdata = array('name' => $data['name'], 'vols' => $data['vols']); $aid = addArticleVols($vdata); echo "{$aid}已存在更新! ovid: {$ovid} Page: {$start_page} \r\n"; $m->addid97vid($ovid); continue; } $aid = addArticle($data); if ($aid) { $m->addid97vid($ovid); echo "\n=== Add Aid: {$aid} ovid: {$ovid} Page: {$start_page} OK ====\n"; } else { var_dump($data); echo "\r\n添加失败! Ourl: {$infoUrl} \r\n"; exit;
function getinfodetail(&$data) { global $model, $_root, $cid, $strreplace, $pregreplace; echo $data['ourl'], "\n"; $html = getHtml($data['ourl']); // file_put_contents('error_view.html',$html); //$html = iconv("GBK","UTF-8//TRANSLIT",$html) ; $html = mb_convert_encoding($html, "UTF-8", "GBK"); if (!$html) { echo "获取html失败"; exit; } $data['keyword'] = ''; $data['actor'] = @iconv("UTF-8", "UTF-8//TRANSLIT", $data['actor']); // $data['ptime'] = time(); $data['utime'] = time(); preg_match('#<div class="introduction" itemprop="description"><p>(.+)</p></div>#Uis', $html, $match); $match[1] = isset($match[1]) ? $match[1] : ''; $match[1] = @iconv("UTF-8", "UTF-8//TRANSLIT", $match[1]); //echo $match[1],"\n"; $data['intro'] = strip_tags($match[1]); $data['intro'] = preg_replace('#&\\S+;#Uis', '', $data['intro']); $data['intro'] = mb_strlen($data['intro']) > 300 ? mb_substr($data['intro'], 0, 256, 'UTF-8') : $data['intro']; $data['intro'] = trim($data['intro']); $playhtml = getArticlePlayData($data['purl']); if (empty($playhtml)) { echo "\n++ Ourl:{$data['ourl']} Purl:{$data['purl']} playdata vols decode error!++\n"; return 0; } $data['vols'] = $playhtml; unset($data['purl']); if (!$data['name'] || empty($data['vols'])) { echo "抓取失败 {$data['ourl']} \r\n"; exit; return false; } $data['ourl'] = str_replace($_root, '', $data['ourl']); # echo '<pre>';var_dump($data);exit; /**/ //在判断是否更新 $oname = $data['name']; $aid = checkArticleByOname($oname); if ($aid) { $vdata = array('name' => $data['name'], 'vols' => $data['vols']); $aid = addArticleVols($vdata); echo "{$aid}已存在更新!\r\n"; return 6; } /**/ $aid = addArticle($data); //echo '|',$aid,'|';exit; if (!$aid) { var_dump($data); echo "\r\n添加失败! {$data['ourl']} \r\n"; exit; return false; } echo "添加成功! {$aid} \r\n"; }
function getinfodetail(&$data) { global $model, $_root, $cid, $strreplace, $pregreplace; echo $data['ourl'], "\n"; $html = getHtml($data['ourl']); // file_put_contents('error_view.html',$html); //$html = iconv("GBK","UTF-8//TRANSLIT",$html) ; $html = mb_convert_encoding($html, "UTF-8", "GBK"); if (!$html) { echo "获取html失败"; exit; } //kw /*/ preg_match('#<meta name="keywords" content="(.+)" />#U',$html,$match); $data['keyword']=trim($match[1]); /**/ $data['keyword'] = ''; // $data['ptime'] = time(); $data['utime'] = time(); preg_match('#<h3 class="ph3">影片介绍</h3>\\s+<ul>(.+)</ul>\\s+</div>\\s+</div>#Uis', $html, $match); $match[1] = isset($match[1]) ? $match[1] : ''; $match[1] = @iconv("UTF-8", "UTF-8//TRANSLIT", $match[1]); //echo $match[1],"\n"; $data['intro'] = strip_tags($match[1]); $data['intro'] = preg_replace('#&\\S+;#Uis', '', $data['intro']); $data['intro'] = trim($data['intro']); $playhtml = getArticlePlayData($data['purl']); if (empty($playhtml)) { echo "\n++ Ourl:{$data['ourl']} Purl:{$data['purl']} playdata vols decode error!++\n"; return 0; } $data['vols'] = jsary2phpary($playhtml); unset($data['purl']); if (!$data['name'] || empty($data['vols'])) { echo "抓取失败 {$data['ourl']} \r\n"; return false; } $data['ourl'] = str_replace($_root, '', $data['ourl']); // echo '<pre>';var_dump($data);exit; //在判断是否更新 $oname = $data['name']; $aid = checkArticleByOname($oname); if ($aid) { $vdata = array('name' => $data['name'], 'vols' => $data['vols']); $aid = addArticleVols($vdata); echo "{$aid}已存在更新!\r\n"; return 6; } $aid = addArticle($data); //echo '|',$aid,'|';exit; if (!$aid) { var_dump($data); echo "\r\n添加失败! {$data['ourl']} \r\n"; exit; return false; } echo "添加成功! {$aid} \r\n"; }
function getinfodetail(&$data) { global $model, $_root, $cid, $strreplace, $pregreplace; echo $data['ourl'], "\n"; $html = getHtml($data['ourl']); $html = mb_convert_encoding($html, "UTF-8", "GBK"); if (!$html) { echo "获取html失败"; exit; } //kw $data['keyword'] = ''; // $data['ptime'] = time(); $data['utime'] = time(); preg_match('#剧情介绍:</h2>\\s*</div>\\s*<div style="[^"]+">(.+)<p>#Uis', $html, $match); $match[1] = isset($match[1]) ? $match[1] : ''; //echo $match[1],"\n"; $data['intro'] = strip_tags($match[1]); $data['intro'] = preg_replace('#&\\S+;#Uis', '', $data['intro']); $data['intro'] = mb_strlen($data['intro']) > 300 ? mb_substr($data['intro'], 0, 300, 'utf-8') : $data['intro']; $data['intro'] = str_replace('?', '', $data['intro']); $data['intro'] = trim($data['intro']); $data['intro'] = preg_replace("#(\r\n)+#is", "\r\n", $data['intro']); $data['intro'] = preg_replace("#\n+#is", "\n", $data['intro']); $data['intro'] = preg_replace('#\\s\\s+#is', ' ', $data['intro']); $data['intro'] = @iconv("UTF-8", "UTF-8//TRANSLIT", $data['intro']); $data['intro'] = str_replace('?', '', $data['intro']); preg_match('#<li><a title=\'[^\']+\' href=\'(/.+/player-0-0\\.html)\' target="_blank">.+</a></li>#Uis', $html, $match); $data['purl'] = @$match[1]; $playhtml = getArticlePlayData($data['purl']); if (empty($playhtml)) { echo "\n++ Ourl:{$data['ourl']} Purl:{$data['purl']} playdata vols decode error!++\n"; return 0; } $data['vols'] = getParseVideoInfo($playhtml); unset($data['purl']); if (!$data['name'] || empty($data['vols'])) { echo "抓取失败 {$data['ourl']} \r\n"; return false; } $data['ourl'] = str_replace($_root, '', $data['ourl']); # echo '<pre>';var_dump($data);exit; /**/ //在判断是否更新 $oname = $data['name']; $aid = checkArticleByOname($oname); if ($aid) { $vdata = array('name' => $data['name'], 'vols' => $data['vols']); $aid = addArticleVols($vdata); echo "{$aid}已存在更新!\r\n"; return 6; } /**/ $aid = addArticle($data); //echo '|',$aid,'|';exit; if (!$aid) { var_dump($data); echo "\r\n添加失败! {$data['ourl']} \r\n"; exit; return false; } echo "添加成功! {$aid} \r\n"; }
function getinfodetail(&$data) { global $model, $start_page, $_root, $cid, $strreplace, $pregreplace; echo $data['ourl'], "\n"; $html = getHtml($data['ourl']); // $html = mb_convert_encoding($html,"UTF-8","GBK"); if (!$html) { echo "获取html失败"; exit; } //kw $data['keyword'] = ''; // $data['ptime'] = time(); $data['utime'] = time(); preg_match('#<span class="more" txt="([^"]+)"\\s*>#Uis', $html, $match); $match[1] = isset($match[1]) ? $match[1] : ''; //echo $match[1],"\n";exit; $data['intro'] = strip_tags($match[1]); $data['intro'] = preg_replace('#&\\S+;#Uis', '', $data['intro']); $data['intro'] = mb_strlen($data['intro']) > 300 ? mb_substr($data['intro'], 0, 300, 'utf-8') : $data['intro']; $data['intro'] = str_replace('?', '', $data['intro']); $data['intro'] = trim($data['intro']); $data['intro'] = preg_replace("#(\r\n)+#is", "\r\n", $data['intro']); $data['intro'] = preg_replace("#\n+#is", "\n", $data['intro']); $playhtml = getArticlePlayData($data['purl']); if (empty($playhtml)) { echo "\n++ Ourl:{$data['ourl']} Purl:{$data['purl']} playdata vols decode error!++\n"; return 0; } $data['vols'] = getParseVideoInfo($playhtml); unset($data['purl']); if (!$data['name'] || empty($data['vols'])) { echo "抓取失败 {$data['ourl']} \r\n"; return false; } $data['ourl'] = str_replace($_root, '', $data['ourl']); // echo '<pre>';var_dump($data);exit; /**/ //在判断是否更新 $oname = $data['name']; $aid = checkArticleByOname($oname); if ($aid) { $vdata = array('name' => $data['name'], 'vols' => $data['vols']); $aid = addArticleVols($vdata); echo "{$aid}已存在更新!\r\n"; return 6; } /**/ $aid = addArticle($data); //echo '|',$aid,'|';exit; if (!$aid) { var_dump($data); echo "\r\n添加失败! {$data['ourl']} \r\n"; exit; return false; } echo "添加成功! {$aid} \r\n"; }