Exemplo n.º 1
0
function add_all_url_($url, $old, $numm, $ooo, $site_id, $include_word, $not_include_word)
{
    if (!is_url($url)) {
        return false;
    }
    global $db, $config;
    $snoopy = new Snoopy();
    //国外snoopy程序
    $snoopy->fetchlry($url);
    $links = $snoopy->resulry;
    if (!is_array($links)) {
        return;
    }
    $links = check_wai($links, $numm, $url);
    $links = array_values(array_unique($links));
    $title = $snoopy->title;
    $fulltxt = $snoopy->fulltxt;
    $lrymd5 = md5($fulltxt);
    $pagesize = $snoopy->pagesize;
    $description = $snoopy->description;
    $keywords = $snoopy->keywords;
    $updatetime = time();
    if ($title == "") {
        $title = str_cut($fulltxt, 65);
    }
    //读取url,更新内容
    $array = array('lrymd5' => $lrymd5, 'title' => $title, 'fulltxt' => $fulltxt, 'description' => $description, 'keywords' => $keywords, 'pagesize' => $pagesize, 'updatetime' => $updatetime);
    $db->update("ve123_links", $array, "url='" . $url . "'");
    $all_num = count($links);
    $temp_links = array();
    $cha_temp = array();
    //开始读取 ve123_links_temp 中所有site_id 为$site_id 的url   然后和抓取的 $links 数组比较,将得到的差集创建到  ve123_links_temp 中
    $query = $db->query("select url from ve123_links_temp where url like '%" . getdomain($url) . "%'");
    while ($row = $db->fetch_array($query)) {
        $temp_links[] = rtrim($row[url], "/");
    }
    $cha_temp = array_diff($links, $temp_links);
    foreach ((array) $cha_temp as $value) {
        $arral = array('url' => $value, 'site_id' => $site_id);
        $db->insert("ve123_links_temp", $arral);
    }
    //开始读取 ve123_links 中所有site_id 为 $site_id 的url   然后和抓取的 $links 数组比较,将得到的差集创建到  ve123_links 中  合集则输出 已存在了
    $query = $db->query("select url from ve123_links where url like '%" . getdomain($url) . "%'");
    while ($row = $db->fetch_array($query)) {
        $new_links[] = rtrim($row[url], "/");
    }
    $he_links = array_intersect($links, $new_links);
    $he_num = count($he_links);
    $cha_links = array_diff($links, $new_links);
    $cha_num = count($cha_links);
    foreach ((array) $cha_links as $value) {
        if (check_include($value, $include_word, $not_include_word)) {
            $array = array('url' => $value, 'site_id' => $site_id, 'level' => '1');
            $db->insert("ve123_links", $array);
        }
    }
    printLinksReport($cha_num, $all_num, $cl = 0);
    echo "<a href=" . $old . " target=_blank>" . $old . "</a>";
    ob_flush();
    flush();
}