/** * @private */ function _markCommentAsNonSpam() { // throw the pre-event $this->notifyEvent(EVENT_PRE_MARK_NO_SPAM_COMMENT, array("commentId" => $this->_commentId)); $this->_view = new AdminArticleCommentsListView($this->_blogInfo, array("article" => $this->_article)); $comments = new ArticleComments(); if (!$comments->updateCommentStatus($this->_commentId, COMMENT_STATUS_NONSPAM)) { $this->_view->setErrorMessage($this->_locale->tr("error_marking_comment_as_nonspam")); $this->setCommonData(); $res = false; } else { $this->_view->setSuccessMessage($this->_locale->tr("comment_marked_as_nonspam_ok")); $this->setCommonData(); $res = true; // before exiting, we should get the comment and train the filter // to recognize this as spam... $comment = $comments->getPostComment($this->_articleId, $this->_commentId); $bayesian = new BayesianFilterCore(); $bayesian->untrain($this->_blogInfo->getId(), $comment->getTopic(), $comment->getText(), $comment->getUserName(), $comment->getUserEmail(), $comment->getUserUrl(), true); $bayesian->train($this->_blogInfo->getId(), $comment->getTopic(), $comment->getText(), $comment->getUserName(), $comment->getUserEmail(), $comment->getUserUrl(), false); // throw the post-event if everythign went fine $this->notifyEvent(EVENT_POST_MARK_NO_SPAM_COMMENT, array("commentId" => $this->_commentId)); } return $res; }
/** * Processes incoming requests * * @return A positive PipelineResult object is the comment is not spam or a negative * one if it is. */ function filter() { $config =& Config::getConfig(); if (!$config->getValue("bayesian_filter_enabled")) { return new PipelineResult(true); } // get some info $blogInfo = $this->_pipelineRequest->getBlogInfo(); $request = $this->_pipelineRequest->getHttpRequest(); // we only have to filter the contents if the user is posting a comment // so there's no point in doing anything else if that's not the case if ($request->getValue("op") != "AddComment") { $result = new PipelineResult(); return $result; } // text and topic of the comment $commentText = $request->getValue("commentText"); $commentTopic = $request->getValue("commentTopic"); $userName = $request->getValue("userName"); $userEmail = $request->getValue("userEmail"); $userUrl = $request->getValue("userUrl"); $articleId = $request->getValue("articleId"); $parentId = $request->getValue("parentId"); if ($parentId == "") { $parentId = 0; } $spamicity = $this->getSpamProbability($blogInfo->getId(), $commentTopic, $commentText, $userName, $userEmail, $userUrl); if ($spamicity >= $config->getValue("bayesian_filter_spam_probability_treshold")) { $result = new PipelineResult(false, HIGH_SPAM_PROBABILITY, "You cannot post this message. Anti-spam filter has blocked it."); // now we need to check what we have to do with this comment... either throw it away // or keep it in the database // this piece of code shouldn't really go here, but it's easier than letting // the AddComment action that there was actually a comment and that it should // still be added but marked as spam and so on... sometimes breaking a few // rules makes things easier :) if ($config->getValue("bayesian_filter_spam_comments_action") == BAYESIAN_FILTER_KEEP_COMMENT_ACTION) { $comments = new ArticleComments(); $clientIp = Client::getIp(); $comment = new UserComment($articleId, $parentId, $commentTopic, $commentText, null, $userName, $userEmail, $userUrl, $clientIp, 0, COMMENT_STATUS_SPAM); $comments->addComment($comment); } else { // nothing to do here, simply throw the comment away } $spam = true; } else { $result = new PipelineResult(true); $spam = false; } // train the filter with the message, be it spam or not... BayesianFilterCore::train($blogInfo->getId(), $commentTopic, $commentText, $userName, $userEmail, $userUrl, $spam); //print "<h1>" . number_format($spamicity * 100, 0) . "% of spamicity</h1>"; return $result; }
/** * untrains the filter based on information from the given Article object * * @param Article and Article object * @return always true * @static * @see untrain */ function untrainWithArticle($article) { return BayesianFilterCore::untrain($article->getBlog(), $article->getTopic(), $article->getText(), "", "", "", false); }
/** * Carries out the specified action */ function perform() { $this->_fetchCommonData(); $this->_postId = $this->_request->getValue("postId"); $this->_previewPost = $this->_request->getValue("previewPost"); $this->_addPost = $this->_request->getValue("addPost"); $this->_saveDraft = $this->_request->getValue("isDraft"); // we know for sure that the information is correct so we can now add // the post to the database $postText = Textfilter::xhtmlize($this->_postText) . POST_EXTENDED_TEXT_MODIFIER . Textfilter::xhtmlize($this->_postExtendedText); $article = new Article($this->_postTopic, $postText, $this->_postCategories, $this->_userInfo->getId(), $this->_blogInfo->getId(), $this->_postStatus, 0, array(), $this->_postSlug); // set also the date before it's too late $article->setDateObject($this->_postTimestamp); $article->setCommentsEnabled($this->_commentsEnabled); // save the article to the db $artId = $this->_savePostData($article); // once we have built the object, we can add it to the database if ($artId) { $this->_view = new AdminPostsListView($this->_blogInfo); //$article->setId( $artId ); $message = $this->_locale->tr("post_added_ok"); // train the filter BayesianFilterCore::trainWithArticle($article); // add the article notification if requested to do so if ($this->_sendNotification) { $artNotifications = new ArticleNotifications(); $artNotifications->addNotification($artId, $this->_blogInfo->getId(), $this->_userInfo->getId()); $message .= " " . $this->_locale->tr("send_notifications_ok"); } // we only have to send trackback pings if the article was published // otherwise there is no need to... $article->setId($artId); if ($article->getStatus() == POST_STATUS_PUBLISHED) { // get the output from the xmlrpc pings but only if the user decided to do so! if ($this->_sendPings) { $pingsOutput = $this->sendXmlRpcPings(); $message .= "<br/><br/>" . $pingsOutput; } // and now check what to do with the trackbacks if ($this->_sendTrackbacks) { // get the links from the text of the post $postLinks = StringUtils::getLinks(stripslashes($article->getText())); // get the real trackback links from trackbackUrls $trackbackLinks = array(); foreach (explode("\r\n", $this->_trackbackUrls) as $host) { trim($host); if ($host != "" && $host != "\r\n" && $host != "\r" && $host != "\n") { array_push($trackbackLinks, $host); } } // if no links, there is nothing to do if (count($postLinks) == 0 && count($trackbackLinks) == 0) { $this->_view = new AdminPostsListView($this->_blogInfo); $this->_view->setErrorMessage($this->_locale->tr("error_no_trackback_links_sent")); } else { $this->_view = new AdminTemplatedView($this->_blogInfo, "sendtrackbacks"); $this->_view->setValue("post", $article); $this->_view->setValue("postLinks", $postLinks); $this->_view->setValue("trackbackLinks", $trackbackLinks); } } $this->_view->setSuccessMessage($message); $this->notifyEvent(EVENT_POST_POST_ADD, array("article" => &$article)); // empty the cache used by this blog CacheControl::resetBlogCache($this->_blogInfo->getId()); } else { $this->_view = new AdminPostsListView($this->_blogInfo); $this->_view->setSuccessMessage($this->_locale->tr("post_added_not_published")); $this->notifyEvent(EVENT_POST_POST_ADD, array("article" => &$article)); } } else { $this->_view = new AdminPostsListView($this->_blogInfo); $this->_view->setErrorMessage($this->_locale->tr("error_adding_post")); } $this->setCommonData(); // better to return true if everything fine return true; }
/** * marks all the comments from the given blog (or all of them * if blogId==0) as non-spam. This is good food for the * filter since then it knows which things should be allowed. * * @static * @param blogId The blog * @return Always true */ function markCommentsAsNonSpam($blogId = 0) { // first, make an array with the comments that should be marked $comments = $this->_getAllComments($blogId); // now, loop through each one of them marking the contents as // non-spam $bayesian = new BayesianFilterCore(); foreach ($comments as $comment) { $bayesian->train($comment["blog_id"], $comment["topic"], $comment["text"], $comment["user_name"], $comment["user_email"], $comment["user_url"], false); } return true; }