Esempio n. 1
0
function getinfolist(&$cate)
{
    global $_root, $cid;
    for ($i = 1; $i <= 2; $i++) {
        //通过 atotal计算i的值
        $suf = $i == 1 ? '' : 'index' . $i . '.html';
        $url = $_root . $cate['ourl'] . $suf;
        echo "\n++++ ", $url, " ++++\n";
        $html = getHtml($url);
        //  $html = iconv("GBK","UTF-8//TRANSLIT",$html) ;
        $html = mb_convert_encoding($html, "UTF-8", "GBK");
        $matchs = getParseListInfo($html);
        #  echo '<pre>';var_dump($matchs);exit;
        if (empty($matchs)) {
            file_put_contents('match_error_list' . $cid . '.html', $html);
            //preg_match_all('##Uis',$html,$matchs,PREG_SET_ORDER);
        }
        if (empty($matchs)) {
            echo 'Cate list Failed ' . $url . "\r\n";
            return 6;
        }
        foreach ($matchs as $list) {
            $oid = preg_replace('#[^\\d]+#', '', $list['ourl']);
            $oname = trim($list['name']);
            /**/
            //在判断是否更新
            $aid = checkArticleByOname($oname);
            if ($aid) {
                echo "{$aid}已存在未更新!\r\n";
                continue;
                return 6;
            }
            /**/
            $ourl = $list['ourl'];
            if ('http://' != substr($ourl, 0, 7)) {
                $ourl = $_root . $ourl;
            }
            $purl = '';
            $ainfo = array('thum' => $list['thum'], 'ourl' => $ourl, 'purl' => $purl, 'actor' => $list['actor'], 'name' => $oname, 'oid' => $oid, 'cid' => $cid);
            getinfodetail($ainfo);
            sleep(1);
        }
    }
    return 0;
}
Esempio n. 2
0
function getinfolist(&$cate_info)
{
    global $_root, $cid, $cate_list_url;
    for ($i = 1; $i <= 5000; $i++) {
        //通过 atotal计算i的值
        $url = sprintf('%s' . $cate_list_url, $_root, $cate_info['ourl'], $i);
        echo "\n++++ ", $url, " ++++\n";
        $html = getHtml($url);
        preg_match('#<ul class="show-list fn-clear" id="contents">(.+)</ul>#Uis', $html, $matchs);
        $html = $matchs[1];
        //echo '<pre>';var_dump($html);exit;
        $matchs = getParseListInfo($html);
        if (empty($matchs)) {
            file_put_contents('match_error_list' . $cid . '.html', $html);
            //preg_match_all('##Uis',$html,$matchs,PREG_SET_ORDER);
        }
        //echo '<pre>';var_dump($matchs);exit;
        if (empty($matchs)) {
            echo 'Cate list Failed ' . $url . "\r\n";
            return 6;
        }
        foreach ($matchs as $list) {
            //      var_dump($list);exit;
            $oname = trim($list['name']);
            /**/
            //在判断是否更新
            $aid = checkArticleByOname($oname);
            if ($aid) {
                echo "{$aid}已存在未更新!\r\n";
                continue;
                return 6;
            }
            /**/
            $list['oid'] = 0;
            //$ainfo = array('thum'=>$list['thum'],'ourl'=>$list['ourl'],'actor'=>$list['actor'],'name'=>$oname,'oid'=>$oid,'cid'=>$cid);
            getinfodetail($list);
            sleep(1);
        }
    }
    return 0;
}
Esempio n. 3
0
function getinfolist(&$_cate)
{
    global $_root, $cid, $startPage;
    for ($i = $startPage; $i <= 20; $i++) {
        //通过 atotal计算i的值
        $suf = $i == 1 ? '' : '_' . $i;
        $url = $_root . $_cate['ourl'] . $suf . '.html';
        echo "\n++++ ", $url, " ++++\n";
        $html = getHtml($url);
        //  $html = iconv("GBK","UTF-8//TRANSLIT",$html) ;
        $html = mb_convert_encoding($html, "UTF-8", "GBK");
        file_put_contents('list_match.html', $html);
        $matchs = getParseListInfo($html);
        //echo '<pre>';var_dump($matchs);exit;
        if (empty($matchs)) {
            file_put_contents('match_error_list' . $cid . '.html', $html);
            echo 'Cate list Failed ' . $url . "\r\n";
            return 6;
        }
        foreach ($matchs as $list) {
            $oid = preg_replace('#[^\\d]+#', '', $list['ourl']);
            $oname = trim($list['title']);
            /**/
            //在判断是否更新
            $aid = checkArticleByOname($oname);
            if ($aid) {
                echo "{$aid}已存在未更新!\r\n";
                continue;
                return 6;
            }
            /**/
            $ourl = $_root . $list['ourl'];
            $purl = $_root . 'player/index' . $oid . '.html?' . $oid . '-0-0?' . $oid . '-0-0';
            $ainfo = array('thum' => $_root . $list['thum'], 'ourl' => $ourl, 'purl' => $purl, 'actor' => '', 'name' => $oname, 'oid' => $oid, 'cid' => $cid);
            getinfodetail($ainfo);
            sleep(1);
        }
    }
    return 0;
}
Esempio n. 4
0
function getinfolist(&$_cate)
{
    global $_root, $cid;
    for ($i = 1; $i <= 2000; $i++) {
        //通过 atotal计算i的值
        //$suf = $i == 1?'':'_'.$i;
        $url = sprintf('%s/?m=vod-type-id-%d-pg-%d.htm', $_root, $_cate['ourl'], $i);
        echo "\n++++ ", $url, " ++++\n";
        $html = getHtml($url);
        //  $html = iconv("GBK","UTF-8//TRANSLIT",$html) ;
        //  $html = mb_convert_encoding($html,"UTF-8","GBK");
        $matchs = getParseListInfo($html);
        //echo '<pre>';var_dump($matchs);exit;
        if (empty($matchs)) {
            file_put_contents('match_error_list' . $cid . '.html', $html);
            echo 'Cate list Failed ' . $url . "\r\n";
            return 6;
        }
        foreach ($matchs as $list) {
            $oid = preg_replace('#[^\\d]+#', '', $list['url']);
            $oname = trim($list['name']);
            /**/
            //在判断是否更新
            $aid = checkArticleByOname($oname);
            if ($aid) {
                echo "{$aid}已存在未更新!\r\n";
                continue;
                return 6;
            }
            /**/
            $ourl = $_root . $list['url'];
            $purl = $_root . $list['purl'];
            $ainfo = array('thum' => $list['thum'], 'ourl' => $ourl, 'purl' => $purl, 'keyword' => $list['actor'], 'name' => $oname, 'oid' => $oid, 'cid' => $cid);
            getinfodetail($ainfo);
            sleep(5);
        }
    }
    return 0;
}
Esempio n. 5
0
 $sourceUrl = sprintf('%svideos/resource/id/%d', $_root, $ovid);
 $data = getinfodetail($sourceUrl);
 $data['name'] = $title;
 $data['ourl'] = $ovid;
 $data['thum'] = $picPool[$uk];
 $vinfo = getParseVideoInfo($uv);
 $data['vols'] = $vinfo;
 //var_dump($data);exit;
 if (empty($vinfo)) {
     echo "\n==== Get Parse Info Failed Ourl: {$infoUrl} Purl: {$sourceUrl}  Page: {$start_page} =====\n";
     if (stripos($uv, '/resource/id/') !== false) {
         continue;
     }
     exit;
 }
 $aid = checkArticleByOname($data['name']);
 if ($aid) {
     $vdata = array('name' => $data['name'], 'vols' => $data['vols']);
     $aid = addArticleVols($vdata);
     echo "{$aid}已存在更新! ovid: {$ovid} Page: {$start_page} \r\n";
     $m->addid97vid($ovid);
     continue;
 }
 $aid = addArticle($data);
 if ($aid) {
     $m->addid97vid($ovid);
     echo "\n=== Add Aid: {$aid}  ovid: {$ovid} Page: {$start_page} OK ====\n";
 } else {
     var_dump($data);
     echo "\r\n添加失败! Ourl: {$infoUrl} \r\n";
     exit;
Esempio n. 6
0
function getinfodetail(&$data)
{
    global $model, $_root, $cid, $strreplace, $pregreplace;
    echo $data['ourl'], "\n";
    $html = getHtml($data['ourl']);
    //  file_put_contents('error_view.html',$html);
    //$html = iconv("GBK","UTF-8//TRANSLIT",$html) ;
    $html = mb_convert_encoding($html, "UTF-8", "GBK");
    if (!$html) {
        echo "获取html失败";
        exit;
    }
    $data['keyword'] = '';
    $data['actor'] = @iconv("UTF-8", "UTF-8//TRANSLIT", $data['actor']);
    //
    $data['ptime'] = time();
    $data['utime'] = time();
    preg_match('#<div class="introduction" itemprop="description"><p>(.+)</p></div>#Uis', $html, $match);
    $match[1] = isset($match[1]) ? $match[1] : '';
    $match[1] = @iconv("UTF-8", "UTF-8//TRANSLIT", $match[1]);
    //echo $match[1],"\n";
    $data['intro'] = strip_tags($match[1]);
    $data['intro'] = preg_replace('#&\\S+;#Uis', '', $data['intro']);
    $data['intro'] = mb_strlen($data['intro']) > 300 ? mb_substr($data['intro'], 0, 256, 'UTF-8') : $data['intro'];
    $data['intro'] = trim($data['intro']);
    $playhtml = getArticlePlayData($data['purl']);
    if (empty($playhtml)) {
        echo "\n++ Ourl:{$data['ourl']} Purl:{$data['purl']} playdata vols decode error!++\n";
        return 0;
    }
    $data['vols'] = $playhtml;
    unset($data['purl']);
    if (!$data['name'] || empty($data['vols'])) {
        echo "抓取失败 {$data['ourl']} \r\n";
        exit;
        return false;
    }
    $data['ourl'] = str_replace($_root, '', $data['ourl']);
    #  echo '<pre>';var_dump($data);exit;
    /**/
    //在判断是否更新
    $oname = $data['name'];
    $aid = checkArticleByOname($oname);
    if ($aid) {
        $vdata = array('name' => $data['name'], 'vols' => $data['vols']);
        $aid = addArticleVols($vdata);
        echo "{$aid}已存在更新!\r\n";
        return 6;
    }
    /**/
    $aid = addArticle($data);
    //echo '|',$aid,'|';exit;
    if (!$aid) {
        var_dump($data);
        echo "\r\n添加失败! {$data['ourl']} \r\n";
        exit;
        return false;
    }
    echo "添加成功! {$aid} \r\n";
}
Esempio n. 7
0
function getinfodetail(&$data)
{
    global $model, $_root, $cid, $strreplace, $pregreplace;
    echo $data['ourl'], "\n";
    $html = getHtml($data['ourl']);
    //  file_put_contents('error_view.html',$html);
    //$html = iconv("GBK","UTF-8//TRANSLIT",$html) ;
    $html = mb_convert_encoding($html, "UTF-8", "GBK");
    if (!$html) {
        echo "获取html失败";
        exit;
    }
    //kw
    /*/
      preg_match('#<meta name="keywords" content="(.+)" />#U',$html,$match);
      $data['keyword']=trim($match[1]);
      /**/
    $data['keyword'] = '';
    //
    $data['ptime'] = time();
    $data['utime'] = time();
    preg_match('#<h3 class="ph3">影片介绍</h3>\\s+<ul>(.+)</ul>\\s+</div>\\s+</div>#Uis', $html, $match);
    $match[1] = isset($match[1]) ? $match[1] : '';
    $match[1] = @iconv("UTF-8", "UTF-8//TRANSLIT", $match[1]);
    //echo $match[1],"\n";
    $data['intro'] = strip_tags($match[1]);
    $data['intro'] = preg_replace('#&\\S+;#Uis', '', $data['intro']);
    $data['intro'] = trim($data['intro']);
    $playhtml = getArticlePlayData($data['purl']);
    if (empty($playhtml)) {
        echo "\n++ Ourl:{$data['ourl']} Purl:{$data['purl']} playdata vols decode error!++\n";
        return 0;
    }
    $data['vols'] = jsary2phpary($playhtml);
    unset($data['purl']);
    if (!$data['name'] || empty($data['vols'])) {
        echo "抓取失败 {$data['ourl']} \r\n";
        return false;
    }
    $data['ourl'] = str_replace($_root, '', $data['ourl']);
    //  echo '<pre>';var_dump($data);exit;
    //在判断是否更新
    $oname = $data['name'];
    $aid = checkArticleByOname($oname);
    if ($aid) {
        $vdata = array('name' => $data['name'], 'vols' => $data['vols']);
        $aid = addArticleVols($vdata);
        echo "{$aid}已存在更新!\r\n";
        return 6;
    }
    $aid = addArticle($data);
    //echo '|',$aid,'|';exit;
    if (!$aid) {
        var_dump($data);
        echo "\r\n添加失败! {$data['ourl']} \r\n";
        exit;
        return false;
    }
    echo "添加成功! {$aid} \r\n";
}
Esempio n. 8
0
function getinfodetail(&$data)
{
    global $model, $_root, $cid, $strreplace, $pregreplace;
    echo $data['ourl'], "\n";
    $html = getHtml($data['ourl']);
    $html = mb_convert_encoding($html, "UTF-8", "GBK");
    if (!$html) {
        echo "获取html失败";
        exit;
    }
    //kw
    $data['keyword'] = '';
    //
    $data['ptime'] = time();
    $data['utime'] = time();
    preg_match('#剧情介绍:</h2>\\s*</div>\\s*<div style="[^"]+">(.+)<p>#Uis', $html, $match);
    $match[1] = isset($match[1]) ? $match[1] : '';
    //echo $match[1],"\n";
    $data['intro'] = strip_tags($match[1]);
    $data['intro'] = preg_replace('#&\\S+;#Uis', '', $data['intro']);
    $data['intro'] = mb_strlen($data['intro']) > 300 ? mb_substr($data['intro'], 0, 300, 'utf-8') : $data['intro'];
    $data['intro'] = str_replace('?', '', $data['intro']);
    $data['intro'] = trim($data['intro']);
    $data['intro'] = preg_replace("#(\r\n)+#is", "\r\n", $data['intro']);
    $data['intro'] = preg_replace("#\n+#is", "\n", $data['intro']);
    $data['intro'] = preg_replace('#\\s\\s+#is', ' ', $data['intro']);
    $data['intro'] = @iconv("UTF-8", "UTF-8//TRANSLIT", $data['intro']);
    $data['intro'] = str_replace('?', '', $data['intro']);
    preg_match('#<li><a title=\'[^\']+\' href=\'(/.+/player-0-0\\.html)\' target="_blank">.+</a></li>#Uis', $html, $match);
    $data['purl'] = @$match[1];
    $playhtml = getArticlePlayData($data['purl']);
    if (empty($playhtml)) {
        echo "\n++ Ourl:{$data['ourl']} Purl:{$data['purl']} playdata vols decode error!++\n";
        return 0;
    }
    $data['vols'] = getParseVideoInfo($playhtml);
    unset($data['purl']);
    if (!$data['name'] || empty($data['vols'])) {
        echo "抓取失败 {$data['ourl']} \r\n";
        return false;
    }
    $data['ourl'] = str_replace($_root, '', $data['ourl']);
    #  echo '<pre>';var_dump($data);exit;
    /**/
    //在判断是否更新
    $oname = $data['name'];
    $aid = checkArticleByOname($oname);
    if ($aid) {
        $vdata = array('name' => $data['name'], 'vols' => $data['vols']);
        $aid = addArticleVols($vdata);
        echo "{$aid}已存在更新!\r\n";
        return 6;
    }
    /**/
    $aid = addArticle($data);
    //echo '|',$aid,'|';exit;
    if (!$aid) {
        var_dump($data);
        echo "\r\n添加失败! {$data['ourl']} \r\n";
        exit;
        return false;
    }
    echo "添加成功! {$aid} \r\n";
}
Esempio n. 9
0
function getinfodetail(&$data)
{
    global $model, $start_page, $_root, $cid, $strreplace, $pregreplace;
    echo $data['ourl'], "\n";
    $html = getHtml($data['ourl']);
    //  $html = mb_convert_encoding($html,"UTF-8","GBK");
    if (!$html) {
        echo "获取html失败";
        exit;
    }
    //kw
    $data['keyword'] = '';
    //
    $data['ptime'] = time();
    $data['utime'] = time();
    preg_match('#<span class="more" txt="([^"]+)"\\s*>#Uis', $html, $match);
    $match[1] = isset($match[1]) ? $match[1] : '';
    //echo $match[1],"\n";exit;
    $data['intro'] = strip_tags($match[1]);
    $data['intro'] = preg_replace('#&\\S+;#Uis', '', $data['intro']);
    $data['intro'] = mb_strlen($data['intro']) > 300 ? mb_substr($data['intro'], 0, 300, 'utf-8') : $data['intro'];
    $data['intro'] = str_replace('?', '', $data['intro']);
    $data['intro'] = trim($data['intro']);
    $data['intro'] = preg_replace("#(\r\n)+#is", "\r\n", $data['intro']);
    $data['intro'] = preg_replace("#\n+#is", "\n", $data['intro']);
    $playhtml = getArticlePlayData($data['purl']);
    if (empty($playhtml)) {
        echo "\n++ Ourl:{$data['ourl']} Purl:{$data['purl']} playdata vols decode error!++\n";
        return 0;
    }
    $data['vols'] = getParseVideoInfo($playhtml);
    unset($data['purl']);
    if (!$data['name'] || empty($data['vols'])) {
        echo "抓取失败 {$data['ourl']} \r\n";
        return false;
    }
    $data['ourl'] = str_replace($_root, '', $data['ourl']);
    //  echo '<pre>';var_dump($data);exit;
    /**/
    //在判断是否更新
    $oname = $data['name'];
    $aid = checkArticleByOname($oname);
    if ($aid) {
        $vdata = array('name' => $data['name'], 'vols' => $data['vols']);
        $aid = addArticleVols($vdata);
        echo "{$aid}已存在更新!\r\n";
        return 6;
    }
    /**/
    $aid = addArticle($data);
    //echo '|',$aid,'|';exit;
    if (!$aid) {
        var_dump($data);
        echo "\r\n添加失败! {$data['ourl']} \r\n";
        exit;
        return false;
    }
    echo "添加成功! {$aid} \r\n";
}