function snatchGetList($record, &$info) { global $db, $setting, $req; $idx = $req->getCookie("ns_idx"); if (empty($idx)) { $idx = 0; } for ($i = $idx, $m = count($info['newList']); $i < $m; $i++) { if (isset($info['para']['pre_max']) && $i - $idx >= $info['para']['pre_max']) { break; } $record['subject'] = $info['newList'][$i][1]; $record['original'] = "搜狐网"; $record['url'] = $info['newList'][$i][2]; $record['add_date'] = date("Y") . "/" . $info['newList'][$i][3]; $record['item_2'] = $info['newList'][$i][4]; if (strpos($record['url'], ".sohu.com") === false) { continue; } if ($content = GetRemoteContent($record['url'], $info['header'])) { if (preg_match("/来源:<span.+?>(.+?)<\\/span>/i", $content, $matches)) { $record['original'] = strip_tags($matches[1]); unset($matches); } if (preg_match("/<meta name=\"keywords\" content=\"(.+?)\">/i", $content, $matches)) { $record['item_3'] = str_replace(" ", ",", $matches[1]); unset($matches); } if (preg_match("/<meta name=\"description\" content=\"(.+?)\">/i", $content, $matches)) { $record['item_4'] = str_replace(" ", ",", $matches[1]); $record['item_4'] = substrPro($record['item_4'], 0, 230); unset($matches); } $flag = false; if (preg_match("/<\\!\\-\\- 正文 st \\-\\->[\r\n\\s]+<div.+?>(.+?)<\\/div>[\r\n\\s]+<\\!\\-\\- 正文 end \\-\\->/is", $content, $matches)) { $record['content'] = $matches[1]; $record['content'] = preg_replace("/<div class\\=\"tagIntg.+?<\\/div>/is", "", $record['content']); $record['content'] = preg_replace("/<div class\\=\"tagHotg.+?<\\/div>/is", "", $record['content']); $record['content'] = preg_replace("/<div class\\=\"editer.+?<\\/div>/is", "", $record['content']); unset($matches); $flag = true; } elseif (preg_match("/<div class\\=\"textcont\" id\\=\"textcont\">(.+?)<\\/div>/is", $content, $matches)) { $cur_content = array(); $cur_content[0] = $matches[1]; $cur_content[0] = preg_replace("/<p class\\=\"editUsr.+?<\\/p>/is", "", $cur_content[0]); $cur_content[0] = preg_replace("/<p>.+?<p>/is", "<p>", $cur_content[0]); $cur_content[0] = preg_replace("/<\\/p>[\\s\r\n]+?<\\/p>/is", "</p>", $cur_content[0]); $cur_content[0] = preg_replace("/^[\r\n\\s]+/is", "", $cur_content[0]); $cur_content[0] = preg_replace("/[\r\n\\s]+\$/is", "", $cur_content[0]); unset($matches); if (preg_match("/<img id\\=\"slide_pic\" src\\=\"(.+?)\" alt\\=\"(.+?)\".*?>/is", $content, $matches)) { $cur_content[0] = "<p>" . $matches[0] . "</p>\n" . $cur_content[0]; } unset($matches); if (preg_match("/<span id\\=\"pageNum\">1\\/(\\d+)<\\/span>/is", $content, $matches)) { $pages = $matches[1]; unset($matches); for ($n = 1; $n < $pages; $n++) { $cur_url = preg_replace("/(\\.\\w+)\$/i", "_" . $n . "\\1", $record['url']); if ($page_content = GetRemoteContent($cur_url, $info['header'])) { if (preg_match("/<div class\\=\"textcont\" id\\=\"textcont\">(.+?)<\\/div>/is", $page_content, $matches)) { $cur_content[$n] = $matches[1]; $cur_content[$n] = preg_replace("/<p class\\=\"editUsr.+?<\\/p>/is", "", $cur_content[$n]); $cur_content[$n] = preg_replace("/<p>.+?<p>/is", "<p>", $cur_content[$n]); $cur_content[$n] = preg_replace("/<\\/p>[\\s\r\n]+?<\\/p>/is", "</p>", $cur_content[$n]); $cur_content[$n] = preg_replace("/^[\r\n\\s]+/is", "", $cur_content[$n]); $cur_content[$n] = preg_replace("/[\r\n\\s]+\$/is", "", $cur_content[$n]); } unset($matches); if (preg_match("/<img id\\=\"slide_pic\" src\\=\"(.+?)\" alt\\=\"(.+?)\".*?>/is", $page_content, $matches)) { $cur_content[$n] = "<p>" . $matches[0] . "</p>\n" . $cur_content[$n]; } unset($matches); } } } $record['content'] = implode("<!-- pagebreak -->", $cur_content); $flag = true; } elseif (preg_match("/<div.+?id\\=\"contentText\">(.+?)<\\/div>/is", $content, $matches)) { $record['content'] = $matches[1]; $record['content'] = preg_replace("/<div class\\=\"editer.+?<\\/div>/is", "", $record['content']); $record['content'] = preg_replace("/<p>.+?<p>/is", "<p>", $record['content']); $record['content'] = preg_replace("/<\\/p>[\\s\r\n]+?<\\/p>/is", "</p>", $record['content']); $record['content'] = preg_replace("/^[\r\n\\s]+/is", "", $record['content']); $record['content'] = preg_replace("/[\r\n\\s]+\$/is", "", $record['content']); unset($matches); $flag = true; } elseif (preg_match("/<div id\\=\"news_c\".+?>(.+?)<div id\\=\"news_s\"/is", $content, $matches)) { $record['content'] = $matches[1]; unset($matches); $flag = true; } else { snatch_log('<div class="item">' . $info['counter']++ . ' - <a href="' . $record['url'] . '" target="_blank">' . $record['subject'] . '</a> 获取<span class="failed" style="color:red;">失败!</span></div>'); } if ($flag) { if ($db->record($setting['db']['pre'] . "news_snatch", "id", array("url", "=", $record['url'])) === false) { $record['content'] = preg_replace("/<script.+?<\\/script>/is", "", $record['content']); $record['content'] = preg_replace("/<style.+?<\\/style>/is", "", $record['content']); $record['content'] = preg_replace("/<form.+?<\\/form>/is", "", $record['content']); $record['content'] = preg_replace("/<iframe.+?<\\/iframe>/is", "", $record['content']); $record['content'] = preg_replace("/^[\r\n\\s]+/is", "", $record['content']); $record['content'] = preg_replace("/[\r\n\\s]+\$/is", "", $record['content']); $record['content'] = preg_replace("/延伸阅读.+\$/", "", $record['content']); $record['content'] = preg_replace("/<DIV class\\=\"tvsubject.+\$/", "", $record['content']); $record['content'] = str_replace("微博推荐", "", $record['content']); $record['content'] = str_replace("我来纠错", "", $record['content']); $record['content'] = str_replace('<div class="line"></div>', "", $record['content']); $record['content'] = preg_replace("/<div class\\=\"stockTrends.+?<\\/div>/s", "", $record['content']); $record['content'] = preg_replace("/<div class\\=\"shareIn.+?<\\/div>/s", "", $record['content']); $record['content'] = preg_replace("/[\r\n]+<div class\\=\"muLink.+?<\\/div>[\r\n]+/", "", $record['content']); $record['content'] = preg_replace("/<DIV class\\=\"tvsubject.+\$/s", "", $record['content']); if (preg_match("/<img.+?src=(.?)(http.+?)\\1.+?>/is", $record['content'], $matches)) { $record['item_5'] = $matches[2]; } else { $record['item_5'] = ""; } if ($record['item_5'] == "http://images.sohu.com/ccc.gif" || $record['item_5'] == "http://photo.sohu.com/20040809/Img221437781.gif" || $record["item_5"] == "http://photocdn.sohu.com/20090828/dot.gif") { $record['item_5'] = ""; } snatch_log('<div class="item">' . $info['counter']++ . ' - <a href="' . $record['url'] . '" target="_blank">' . $record['subject'] . '</a> 获取<span class="succeed" style="color:green;">成功!</span></div>'); $db->insert($setting['db']['pre'] . "news_snatch", $record); } else { snatch_log('<div class="item">' . $info['counter']++ . ' - <a href="' . $record['url'] . '" target="_blank">' . $record['subject'] . '</a> <span class="duplicate" style="color:black;">已存在!</span></div>'); } } } else { snatch_log('<div class="item">' . $info['counter']++ . ' - <a href="' . $record['url'] . '" target="_blank">' . $record['subject'] . '</a> 获取<span class="failed" style="color:red;">失败!</span></div>'); } $req->setCookie("ns_idx", $i, 86400); } if ($i >= $m) { $req->setCookie("ns_idx"); } return true; }
if ($info["page"] > $info["page_max"]) { break; } snatch_log('<div class="page" style="font-size:16px;font-weight:bold;">' . sprintf($setting['language']['plugin_news_snatch_info_snatching'], $info["page"]) . '</div>'); if (snatchGetList($record, $info)) { snatch_log('<div class="succeed" style="color:green;">' . sprintf($setting['language']['plugin_news_snatch_info_snatch_list'], $info["page"], $info['page_count']) . '</div>'); } else { snatch_log('<div class="failed" style="color:red;">' . $setting['language']['plugin_news_snatch_info_snatch_failed'] . '</div>'); } snatch_log('<div class="split">-------------------------------</div>'); } snatch_log('<div class="page">' . sprintf($setting['language']['plugin_news_snatch_info_snatch_page'], $info["page"] - 1) . '</div>'); } else { snatch_log('<div class="page">' . $setting['language']['plugin_news_snatch_info_snatch_error'] . '</div>'); } snatch_log('<div class="page">' . date("Y-m-d H:i:s") . '</div>'); $goto_url = $setting['info']['self']; break; case "news_import": ignore_user_abort("on"); set_time_limit(0); //$log_info = $setting['language']['plugin_news_snatch_import']; $news_show = array(); $news_show['news_id'] = 0; $news_show['cat_id'] = 1; $news_show['web_id'] = 1; $news_show['subject'] = ""; $news_show['style'] = ""; $news_show['views'] = 0; $news_show['describe'] = ""; $news_show['original'] = "";