Example #1
0
function AddAndUpdateUrl($url, $action)
{
    global $db;
    $spider = new spider();
    $spider->url($url);
    $title = $spider->title;
    $fulltxt = $spider->fulltxt(800);
    $keywords = $spider->keywords;
    $description = $spider->description;
    $pagesize = $spider->pagesize;
    $array = array('url' => $url, 'title' => $title, 'fulltxt' => $fulltxt, 'pagesize' => $pagesize, 'keywords' => $keywords, 'description' => $description, 'updatetime' => time());
    if ($action == "add") {
        $db->insert("kuaso_links", $array);
    } elseif ($action == "update") {
        $db->update("kuaso_links", $array, "url='" . $url . "'");
    }
}
Example #2
0
function add_site($url)
{
    global $db;
    $row = $db->get_one("select * from ve123_links where url='" . $url . "'");
    if (empty($row)) {
        require_once PATH . "include/spider/spider_class.php";
        $spider = new spider();
        $spider->url($url);
        $title = $spider->title;
        $fulltxt = $spider->fulltxt(800);
        $keywords = $spider->keywords;
        $description = $spider->description;
        $pagesize = $spider->pagesize;
        $htmlcode = $spider->htmlcode;
        $array = array("url" => $url, "title" => $title, "fulltxt" => $fulltxt, "pagesize" => $pagesize, "keywords" => $keywords, "description" => $description, "updatetime" => time());
        $db->insert("ve123_links", $array);
    } else {
        $array = array("updatetime" => time());
        $db->update("ve123_links", $array, "url='" . $url . "'");
    }
}
Example #3
0
 function do_mpublish($pubArray = array())
 {
     iPHP::$break = false;
     if ($_POST['pub']) {
         foreach ((array) $_POST['pub'] as $i => $a) {
             list($cid, $pid, $rid, $url, $title) = explode('|', $a);
             $pubArray[] = array('sid' => 0, 'url' => $url, 'title' => $title, 'cid' => $cid, 'rid' => $rid, 'pid' => $pid);
         }
     }
     if (empty($pubArray)) {
         iPHP::$break = true;
         iPHP::alert('暂无最新内容', 0, 30);
     }
     $_count = count($pubArray);
     ob_start();
     ob_end_flush();
     ob_implicit_flush(1);
     foreach ((array) $pubArray as $i => $a) {
         spider::$sid = $a['sid'];
         spider::$cid = $a['cid'];
         spider::$pid = $a['pid'];
         spider::$rid = $a['rid'];
         spider::$url = $a['url'];
         spider::$title = $a['title'];
         $rs = $this->multipublish();
         $updateMsg = $i ? true : false;
         $timeout = $i++ == $_count ? '3' : false;
         iPHP::dialog($rs['msg'], 'js:' . $rs['js'], $timeout, 0, $updateMsg);
         ob_flush();
         flush();
     }
     iDB::update('spider_project', array('lastupdate' => time()), array('id' => $this->pid));
     iPHP::dialog('success:#:check:#:采集完成!', 0, 3, 0, true);
 }
Example #4
0
    if (empty($site)) {
        $array = array('url' => $url, 'spider_depth' => $config["spider_depth"], 'indexdate' => time(), 'addtime' => time());
        $db->insert("kuaso_sites", $array);
    }
    $site = $db->get_one("select * from kuaso_sites where url='{$url}'");
    if (!empty($site)) {
        $ip = ip();
        //$referer=$_SERVER['HTTP_REFERER'];
        $v = $db->get_one("select * from kuaso_stat_visitor where v_ip='" . $ip . "' and v_time>='" . (time() - 86400 * 1) . "'");
        if (empty($v)) {
            $array = array('v_time' => time(), 'v_ip' => $ip);
            $db->insert("kuaso_stat_visitor", $array);
            $db->query("update kuaso_sites set com_time='" . time() . "',com_count_ip=com_count_ip+1 where url='" . $url . "'");
        }
    }
    $site = $db->get_one("select * from kuaso_sites where url='{$url}'");
    if (!empty($site)) {
        $row = $db->get_one("select * from kuaso_links where url='" . $url . "'");
        if (empty($row)) {
            $spider = new spider();
            $spider->url($url);
            $title = $spider->title;
            $fulltxt = $spider->fulltxt(800);
            $keywords = $spider->keywords;
            $description = $spider->description;
            $pagesize = $spider->pagesize;
            $array = array('url' => $url, 'title' => $title, 'fulltxt' => $fulltxt, 'pagesize' => $pagesize, 'keywords' => $keywords, 'description' => $description, 'updatetime' => time());
            $db->insert("kuaso_links", $array);
        }
    }
}
Example #5
0
 public static function remote($url, $_count = 0)
 {
     $url = str_replace('&', '&', $url);
     if (empty(spider::$referer)) {
         $uri = parse_url($url);
         spider::$referer = $uri['scheme'] . '://' . $uri['host'];
     }
     $options = array(CURLOPT_URL => $url, CURLOPT_ENCODING => spider::$encoding, CURLOPT_REFERER => spider::$referer, CURLOPT_USERAGENT => spider::$useragent, CURLOPT_TIMEOUT => 10, CURLOPT_CONNECTTIMEOUT => 10, CURLOPT_RETURNTRANSFER => 1, CURLOPT_FAILONERROR => 1, CURLOPT_HEADER => 0, CURLOPT_NOSIGNAL => true, CURLOPT_DNS_USE_GLOBAL_CACHE => true, CURLOPT_DNS_CACHE_TIMEOUT => 86400, CURLOPT_SSL_VERIFYPEER => false, CURLOPT_SSL_VERIFYHOST => false);
     spider::$cookie && ($options[CURLOPT_COOKIE] = spider::$cookie);
     if (spider::$curl_proxy) {
         $proxy = spiderTools::proxy_test();
         $proxy && ($options = spiderTools::proxy($options, $proxy));
     }
     $ch = curl_init();
     curl_setopt_array($ch, $options);
     $responses = curl_exec($ch);
     $info = curl_getinfo($ch);
     if (spider::$dataTest || spider::$ruleTest) {
         echo "<b>{$url} 头信息:</b><pre>";
         print_r($info);
         echo '</pre><hr />';
         if ($_GET['breakinfo']) {
             exit;
         }
     }
     if (in_array($info['http_code'], array(301, 302)) && $_count < 5) {
         $_count++;
         $newurl = $info['redirect_url'];
         if (empty($newurl)) {
             curl_setopt($ch, CURLOPT_HEADER, 1);
             $header = curl_exec($ch);
             preg_match('|Location: (.*)|i', $header, $matches);
             $newurl = ltrim($matches[1], '/');
             if (empty($newurl)) {
                 return false;
             }
             if (!strstr($newurl, 'http://')) {
                 $host = $uri['scheme'] . '://' . $uri['host'];
                 $newurl = $host . '/' . $newurl;
             }
         }
         $newurl = trim($newurl);
         curl_close($ch);
         unset($responses, $info);
         return spiderTools::remote($newurl, $_count);
     }
     if (in_array($info['http_code'], array(404, 500))) {
         curl_close($ch);
         unset($responses, $info);
         return false;
     }
     if ((empty($responses) || $info['http_code'] != 200) && $_count < 5) {
         $_count++;
         if (spider::$dataTest || spider::$ruleTest) {
             echo $url . '<br />';
             echo "获取内容失败,重试第{$_count}次...<br />";
         }
         curl_close($ch);
         unset($responses, $info);
         return spiderTools::remote($url, $_count);
     }
     $pos = stripos($info['content_type'], 'charset=');
     $pos !== false && ($content_charset = trim(substr($info['content_type'], $pos + 8)));
     $responses = spiderTools::charsetTrans($responses, $content_charset, spider::$charset);
     curl_close($ch);
     unset($info);
     if (spider::$dataTest || spider::$ruleTest) {
         echo '<pre>';
         print_r(htmlspecialchars(substr($responses, 0, 800)));
         echo '</pre><hr />';
     }
     spider::$url = $url;
     return $responses;
 }
Example #6
0
function insert_links($url)
{
    global $db, $config;
    $spider = new spider();
    $spider->url($url);
    $links = $spider->links();
    $sites = $spider->sites();
    foreach ($sites as $value) {
        $site_url = GetSiteUrl($link);
        $site = $db->get_one("select * from ve123_sites where url='" . $site_url . "'");
        $site_id = $site["site_id"];
        $row = $db->get_one("select * from ve123_links where url='" . $value . "'");
        if (empty($row) && is_url($value)) {
            echo $value . "<br>";
            $array = array('url' => $value, 'site_id' => $site_id, 'level' => '0');
            $db->insert("ve123_links", $array);
        } else {
            echo "已存在:" . $value . "<br>";
        }
        ob_flush();
        flush();
        //sleep(1);
        $row = $db->get_one("select * from ve123_sites where url='" . $value . "'");
        if (empty($row) && is_url($value)) {
            $array = array('url' => $value, 'spider_depth' => $config["spider_depth"], 'addtime' => time());
            $db->insert("ve123_sites", $array);
        }
    }
    //sleep(1);
    foreach ($links as $value) {
        $row = $db->get_one("select * from ve123_links_temp where url='" . $value . "'");
        if (empty($row) && is_url($value)) {
            $array = array('url' => $value);
            $db->insert("ve123_links_temp", $array);
        }
    }
}
Example #7
0
function Update_link($url)
{
    global $db, $bug_url;
    $is_success = FALSE;
    $is_shoulu = FALSE;
    $spider = new spider();
    $spider->url($url);
    $title = $spider->title;
    $fulltxt = $spider->fulltxt(800);
    $pagesize = $spider->pagesize;
    $keywords = $spider->keywords;
    $htmlcode = $spider->htmlcode;
    $description = $spider->description;
    $site_url = GetSiteUrl($url);
    $site = $db->get_one("select * from ve123_sites where url='" . $site_url . "'");
    $site_id = $site["site_id"];
    echo $title;
    $array = array('title' => $title, 'fulltxt' => $fulltxt, 'pagesize' => $pagesize, 'keywords' => $keywords, 'description' => $description, 'site_id' => $site_id);
    $db->query("update ve123_links set updatetime='" . time() . "' where url='" . $url . "'");
    if (!empty($title)) {
        $s = array();
        $s = explode("?", $title);
        if ($pagesize > 1 && count($s) < 2) {
            $domain = GetSiteUrl($url);
            $site = $db->get_one("select * from ve123_sites where url='" . $domain . "'");
            if (!empty($site)) {
                if (!empty($site["include_word"])) {
                    foreach (explode(",", $site["include_word"]) as $value) {
                        if (stristr($htmlcode, $value)) {
                            $include_num += 1;
                        }
                    }
                    if ($include_num <= 0) {
                        $is_shoulu = FALSE;
                    }
                } else {
                    $is_shoulu = TRUE;
                }
                if (!empty($site["not_include_word"])) {
                    foreach (explode(",", $site["not_include_word"]) as $value) {
                        if (stristr($htmlcode, $value)) {
                            $not_include_num += 1;
                        }
                    }
                    if ($not_include_num > 0) {
                        $is_shoulu = FALSE;
                    }
                }
            } else {
                $is_shoulu = TRUE;
            }
            if ($is_shoulu) {
                $db->update("ve123_links", $array, "url='" . $url . "'");
                //file_put_contents(PATH."k/www/".str_replace("http://","",$url.".html"),$htmlcode);
                $is_success = TRUE;
            }
        }
    }
    if (empty($bug_url)) {
        exit;
    }
    return $is_success;
}