function getDomainLinks($url, $domain) { global $domainLinks; $snoopy = new Snoopy(); $snoopy->agent = "Mozilla/5.0 (Windows NT 6.1) AppleWebKit/537.17 (KHTML, like Gecko) Chrome/24.0.1312.57 Safari/537.17"; $snoopy->rawheaders['Accept'] = 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8'; $snoopy->rawheaders['Accept-Charset'] = 'GBK,utf-8;q=0.7,*;q=0.3'; $snoopy->rawheaders['Connection'] = 'keep-alive'; $snoopy->rawheaders['Accept-Language'] = 'zh-CN,zh;q=0.8'; $snoopy->rawheaders['Cache-Control'] = 'max-age=0'; $links = array(); if ($snoopy->fetchlinks($url)) { foreach ($snoopy->results as $link) { if (stripos($link, $domain) === false) { continue; } if (in_array($link, $domainLinks)) { continue; } $domainLinks[] = $link; echo $link . "\r\n"; getDomainLinks($link, $domain); } } }
function fetchLinks($s_url, $re) { $o_snoopy = new Snoopy(); $o_snoopy->fetchlinks($s_url); $a_links_temp = $o_snoopy->results; $a_links = array(); $i = 0; foreach ($a_links_temp as $key => $value) { if (preg_match($re, $a_links_temp[$key]) && !isSameLink($a_links, $a_links_temp[$key])) { $a_links[$i++] = $a_links_temp[$key]; } } return $a_links; }
$news = new Newses(); $newstype = new Newstypes(); $snoopy = new Snoopy(); $typeoption = new Typeoption(); $conditions = array(); $tpl_file = "news.gather"; if (isset($_POST['gather']) && !empty($_POST['rules'])) { set_time_limit(180); $rules = stripslashes($_POST['rules']); $tmp_rules = explode("\r\n", $rules); if (!empty($tmp_rules) && count($tmp_rules) == 4) { list($remote_url, $remote_url_match, $remote_title_match, $remote_content_match) = $tmp_rules; } else { flash(); } $snoopy->fetchlinks($remote_url); $urls = array(); $urls = $snoopy->results; if (empty($urls)) { flash(); } foreach ($urls as $key => $value) { //fetched url if (!preg_match($remote_url_match, $value)) { unset($urls[$key]); } } $urls = array_unique($urls); $u = 0; $sql[] = $title = $content = array(); foreach ($urls as $key => $value) {
function get_page_link($url) { $snoopy = new Snoopy(); $snoopy->fetchlinks($url); $all_link = $snoopy->results; $re = is_array($all_link) ? array_unique($all_link) : $all_link; return $re; }
function stbv_main($incomingTB) { global $stbv_opt, $stbv_val; #################################### # We only deal with trackbacks #################################### if ($incomingTB['comment_type'] != 'trackback') { return $incomingTB; } #################################### # Get trackback information #################################### $stbv_val['comment_author'] = $incomingTB['comment_author']; $stbv_val['comment_author_url'] = $incomingTB['comment_author_url']; $stbv_val['comment_post_permalink'] = get_permalink($incomingTB['comment_post_ID']); $stbv_val['comment_post_permalink'] = preg_replace('/\\/$/', '', $stbv_val['comment_post_permalink']); // Remove trailing slash $stbv_val['comment_post_ID'] = $incomingTB['comment_post_ID']; #################################### # Get Plugin options #################################### if ($stbv_opt['stbv_accuracy'] == 'open') { if (is_string($stbv_opt['stbv_blogurls'])) { if (strlen($stbv_opt['stbv_blogurls']) > 9) { $stbv_blogurlsArray = explode(' ', $stbv_opt['stbv_blogurls']); } } } #################################### # 'Is Spam' flag is FALSE by default. Below we check several things # and this flag will become true as soon as we have any doubts. #################################### $stbv_val['is_spam'] = false; #################################### # If a Snoopy problem occurrs (Snoopy can't be loaded or a snoopy error # occurred), this variable will be set to TRUE #################################### $stbv_val['snoopy_problem'] = false; #################################### # If Author's URL is not correct, it will be considered as spam. #################################### if (!$stbv_val['is_spam'] && substr($stbv_val['comment_author_url'], 0, 4) != 'http') { $stbv_val['log_info'][]['warning'] = 'Author\'s URL was found not to be correct'; $stbv_val['is_spam'] = true; } #################################### # Phase 1 (IP) - Verify IP address #################################### if (!$stbv_val['is_spam'] && $stbv_opt['stbv_validateIP'] == '1') { $tmpSender_IP = preg_replace('/[^0-9.]/', '', $_SERVER['REMOTE_ADDR']); $authDomainname = stbv_get_domainname_from_uri($stbv_val['comment_author_url']); $tmpURL_IP = preg_replace('/[^0-9.]/', '', gethostbyname($authDomainname)); if ($tmpSender_IP != $tmpURL_IP) { $stbv_val['log_info'][]['info'] = 'Sender\'s IP address (' . $tmpSender_IP . ') not equal to IP address of host (' . $tmpURL_IP . ').'; $stbv_val['is_spam'] = true; } else { $stbv_val['log_info'][]['info'] = 'IP address (' . $tmpSender_IP . ') was found to be valid.'; } } elseif ($stbv_opt['stbv_validateIP'] != '1') { $stbv_val['log_info'][]['info'] = 'IP address validation (Phase 1) skipped since it is not enabled in the plugin\'s options.'; } #################################### # Phase 2 (URL) - Snoopy #################################### if ($stbv_opt['stbv_validateURL'] == '1') { # Loading snoopy and create snoopy object. In case of # failure it is being considered as spam, just in case. if (!$stbv_val['is_spam'] && !stbv_loadSnoopy()) { // Loading snoopy failed $stbv_val['log_info'][]['warning'] = 'Loading PHP Snoopy class failed. Phase 2 skipped.'; $stbv_val['snoopy_problem'] = true; } else { // Create new Snoopy object $stbvSnoopy = new Snoopy(); } # Fetch all URLs of the author's web page if (!$stbv_val['is_spam'] && !$stbv_val['snoopy_problem'] && !@$stbvSnoopy->fetchlinks($stbv_val['comment_author_url'])) { // Snoopy couldn't couldn't reach the target website, Snoopy error occurred, or something else... $stbv_val['log_info'][]['warning'] = 'Snoopy couldn\\t find something on the source website or Snoopy error occurred. Phase 2 skipped.'; $stbv_val['snoopy_problem'] = true; } else { $stbvAuthorUrlArray = $stbvSnoopy->results; } # Check if URL array contains link to website if (!$stbv_val['is_spam'] && !$stbv_val['snoopy_problem'] && is_array($stbvAuthorUrlArray)) { $loopSuccess = false; foreach ($stbvAuthorUrlArray as $loopUrl) { // Remove trailing slash, "/trackback" and "/trackback/" $loopUrl = preg_replace('/(\\/|\\/trackback|\\/trackback\\/)$/', '', $loopUrl); if ($stbv_opt['stbv_accuracy'] == 'open' && is_array($stbv_blogurlsArray)) { // We have more than one URL to be checked $loopInnerSuccess = false; foreach ($stbv_blogurlsArray as $loopOptionsURL) { // Check if the first chars of the URL of remote page contain URL of the options if (substr($loopUrl, 0, strlen($loopOptionsURL)) == $loopOptionsURL) { $loopInnerSuccess = true; break; } } if ($loopInnerSuccess) { $loopSuccess = true; break; } } else { // Strict mode or no URLs provided so we check strictly the permalink only! if ($loopUrl == $stbv_val['comment_post_permalink']) { $loopSuccess = true; break; } } } if (!$loopSuccess) { $stbv_val['log_info'][]['info'] = 'The target URL was not found on the source website, therefore the trackback is considered to be spam.'; $stbv_val['is_spam'] = true; } else { $stbv_val['log_info'][]['info'] = 'The trackback is considered to be valid: URL was found on the source website.'; } } } else { // if ( $stbv_opt['stbv_validateURL'] == '1' ) $stbv_val['log_info'][]['info'] = 'URL validation (Phase 2) skipped since it is not enabled in the plugin\'s options.'; } #################################### # Now we know if we have a trackback spam or not. #################################### if ($stbv_opt['stbv_moderrors'] == '1' && $stbv_val['snoopy_problem']) { if ($stbv_opt['stbv_enablelog'] == '1') { stbv_log_addentry('Trackback placed into comment moderation due to an occurred problem while retrieving URLs from source website.'); } if ($stbv_opt['stbv_addblockinfo'] == '1') { $incomingTB['comment_author'] = '[BLOCKED BY STBV] ' . $incomingTB['comment_author']; } add_filter('pre_comment_approved', create_function('$a', 'return \'0\';')); return $incomingTB; } elseif (!$stbv_val['is_spam']) { # **** No Trackback Spam *** if ($stbv_opt['stbv_enablelog'] == '1') { stbv_log_addentry('Trackback approved.'); } return $incomingTB; } else { # **** It is Trackback Spam *** # We put trackback in moderation queue, mark as spam or delete right away switch ($stbv_opt['stbv_action']) { case 'delete': if ($stbv_opt['stbv_enablelog'] == '1') { stbv_log_addentry('Trackback discarded.'); } die('Your trackback has been rejected.'); break; case 'spam': if ($stbv_opt['stbv_enablelog'] == '1') { stbv_log_addentry('Trackback marked as spam.'); } if ($stbv_opt['stbv_addblockinfo'] == '1') { $incomingTB['comment_author'] = '[BLOCKED BY STBV] ' . $incomingTB['comment_author']; } add_filter('pre_comment_approved', create_function('$a', 'return \'spam\';')); return $incomingTB; break; default: if ($stbv_opt['stbv_enablelog'] == '1') { stbv_log_addentry('Trackback placed into comment moderation.'); } if ($stbv_opt['stbv_addblockinfo'] == '1') { $incomingTB['comment_author'] = '[BLOCKED BY STBV] ' . $incomingTB['comment_author']; } add_filter('pre_comment_approved', create_function('$a', 'return \'0\';')); return $incomingTB; } } }
function add_links_insite($link, $old, $numm, $ooo, $site_id, $include_word, $not_include_word) { if (!is_url($link)) { return false; } global $db, $config; /* $spider=new spider; //系统自带蜘蛛 echo "<b>网站编码</b>(默认GB2312)<b>:"; $spider->url($link); echo "</b><br>"; $links= $spider->get_insite_links(); */ //$site_url=GetSiteUrl($link); $url_old = GetSiteUrl($old); echo "原始页=" . $url_old . " - - <"; echo "首层 id=" . $site_id . "> - - <"; echo "包含字段=" . $include_word . ">"; echo "<br>"; /*if($ooo==0) { $site=$db->get_one("select * from ve123_sites where url='".$url_old."'"); $site_id=$site["site_id"]; $include_word=$site["include_word"]; $not_include_word=$site["not_include_word"]; $spider_depth=$site["spider_depth"]; } */ $snoopy = new Snoopy(); //国外snoopy程序 $snoopy->fetchlinks($link); $links = $snoopy->results; $links = check_wai($links, $numm, $link); $links = array_values(array_unique($links)); foreach ((array) $links as $value) { $row = $db->get_one("select * from ve123_links_temp where url='" . $value . "'"); if (empty($row)) { $arral = array('url' => $value, 'site_id' => $site_id); $db->insert("ve123_links_temp", $arral); } $value = rtrim($value, "/"); $row = $db->get_one("select * from ve123_links where url='" . $value . "'"); if (check_include($value, $include_word, $not_include_word)) { if (empty($row) && is_url($value)) { echo "<font color=#C60A00><b>抓取到:</b></font>"; $array = array('url' => $value, 'site_id' => $site_id, 'level' => '1'); $db->insert("ve123_links", $array); } else { echo "<b>已存在了:</b>"; } echo "<a href=" . $value . " target=_blank>" . $value . "</a><br>"; ob_flush(); flush(); //$row=$db->get_one("select * from ve123_links_temp where url='".$value."'"); // if(empty($row)&&is_url($value)) // { // $array=array('url'=>$value,'site_id'=>$site_id); // $db->insert("ve123_links_temp",$array); // } } } }
function wphc_check_hidden_tag($comment) { // admins can do what they like if (is_admin()) { return $comment; } // get our options $type = $comment['comment_type']; $options = wphc_option(); $spam = false; if ($type == "trackback" || $type == "pingback") { // check the website's IP against the url it's sending as a trackback if ($options['validate-ip']) { $server_ip = isset($_SERVER['HTTP_X_FORWARDED_FOR']) ? $_SERVER['HTTP_X_FORWARDED_FOR'] : $_SERVER['REMOTE_ADDR']; $web_ip = gethostbyname(parse_url($comment['comment_author_url'], PHP_URL_HOST)); $ipv = $server_ip != $web_ip; $spam = $spam || $ipv; if ($options['logging'] && $ipv) { $comment['comment_content'] .= "\n\n[WORDPRESS HASHCASH] The comment's server IP (" . $server_ip . ") doesn't match the" . " comment's URL host IP (" . $web_ip . ") and so is spam."; } } // look for our link in the page itself if (!$spam && $options['validate-url']) { if (!class_exists('Snoopy')) { require_once ABSPATH . WPINC . '/class-snoopy.php'; } $permalink = get_permalink($comment['comment_post_ID']); $permalink = preg_replace('/\\/$/', '', $permalink); $snoop = new Snoopy(); if (@$snoop->fetchlinks($comment['comment_author_url'])) { $found = false; if (!empty($snoop->results)) { foreach ($snoop->results as $url) { $url = preg_replace('/(\\/|\\/trackback|\\/trackback\\/)$/', '', $url); if ($url == $permalink) { $found = true; } } } if ($options['logging'] && !$found) { $comment['comment_content'] .= "\n\n[WORDPRESS HASHCASH] The comment's actual post text did not contain your blog url (" . $permalink . ") and so is spam."; } $spam = $spam || !$found; } else { $spam = true; if ($options['logging']) { $comment['comment_content'] .= "\n\n[WORDPRESS HASHCASH] Snoopy failed to fetch results for the comment blog url (" . $comment['comment_author_url'] . ") with error '" . $snoop->error . "' and so is spam."; } } } } else { // Check the wphc values against the last five keys $spam = !in_array($_POST["wphc_value"], $options['key']); if ($options['logging'] && $spam) { $comment['comment_content'] .= "\n\n[WORDPRESS HASHCASH] The poster sent us '" . intval($_POST["wphc_value"]) . " which is not a hashcash value."; } } if ($spam) { $options['comments-spam'] = (int) $options['comments-spam'] + 1; wphc_option($options); switch ($options['moderation']) { case 'delete': add_filter('comment_post', create_function('$id', 'wp_delete_comment($id); die(\'This comment has been deleted by WP Hashcash\');')); break; case 'akismet': add_filter('pre_comment_approved', create_function('$a', 'return \'spam\';')); break; case 'moderate': default: add_filter('pre_comment_approved', create_function('$a', 'return 0;')); break; } } else { $options['comments-ham'] = (int) $options['comments-ham'] + 1; wphc_option($options); } return $comment; }
public function jd_item($url) { $snoopy = new Snoopy(); $snoopy->fetchlinks($url); $link_all = $snoopy->results; if (!empty($link_all)) { foreach ($link_all as $key => $value) { $link_decode = urldecode($value); $link_info = parse_url($link_decode); if ($link_info['host'] == 'item.jd.com' && empty($link_info['fragment'])) { $link_item[] = $link_decode; } } if (!empty($link_item)) { $link_item = array_unique($link_item); $data = $this->jd_data($link_item); if (!empty($data)) { $this->jd_inserts($data); } return true; } } return array(); }