/** * @private */ function _markCommentAsNonSpam() { // throw the pre-event $this->notifyEvent(EVENT_PRE_MARK_NO_SPAM_COMMENT, array("commentId" => $this->_commentId)); $this->_view = new AdminArticleCommentsListView($this->_blogInfo, array("article" => $this->_article)); $comments = new ArticleComments(); if (!$comments->updateCommentStatus($this->_commentId, COMMENT_STATUS_NONSPAM)) { $this->_view->setErrorMessage($this->_locale->tr("error_marking_comment_as_nonspam")); $this->setCommonData(); $res = false; } else { $this->_view->setSuccessMessage($this->_locale->tr("comment_marked_as_nonspam_ok")); $this->setCommonData(); $res = true; // before exiting, we should get the comment and train the filter // to recognize this as spam... $comment = $comments->getPostComment($this->_articleId, $this->_commentId); $bayesian = new BayesianFilterCore(); $bayesian->untrain($this->_blogInfo->getId(), $comment->getTopic(), $comment->getText(), $comment->getUserName(), $comment->getUserEmail(), $comment->getUserUrl(), true); $bayesian->train($this->_blogInfo->getId(), $comment->getTopic(), $comment->getText(), $comment->getUserName(), $comment->getUserEmail(), $comment->getUserUrl(), false); // throw the post-event if everythign went fine $this->notifyEvent(EVENT_POST_MARK_NO_SPAM_COMMENT, array("commentId" => $this->_commentId)); } return $res; }
/** * Given an Article object, trains the filter based on the article data * * @param article An Article object * @return true * @static */ function trainWithArticle($article) { return BayesianFilterCore::train($article->getBlog(), $article->getTopic(), $article->getText(), "", "", "", false); }
/** * Processes incoming requests * * @return A positive PipelineResult object is the comment is not spam or a negative * one if it is. */ function filter() { $config =& Config::getConfig(); if (!$config->getValue("bayesian_filter_enabled")) { return new PipelineResult(true); } // get some info $blogInfo = $this->_pipelineRequest->getBlogInfo(); $request = $this->_pipelineRequest->getHttpRequest(); // we only have to filter the contents if the user is posting a comment // so there's no point in doing anything else if that's not the case if ($request->getValue("op") != "AddComment") { $result = new PipelineResult(); return $result; } // text and topic of the comment $commentText = $request->getValue("commentText"); $commentTopic = $request->getValue("commentTopic"); $userName = $request->getValue("userName"); $userEmail = $request->getValue("userEmail"); $userUrl = $request->getValue("userUrl"); $articleId = $request->getValue("articleId"); $parentId = $request->getValue("parentId"); if ($parentId == "") { $parentId = 0; } $spamicity = $this->getSpamProbability($blogInfo->getId(), $commentTopic, $commentText, $userName, $userEmail, $userUrl); if ($spamicity >= $config->getValue("bayesian_filter_spam_probability_treshold")) { $result = new PipelineResult(false, HIGH_SPAM_PROBABILITY, "You cannot post this message. Anti-spam filter has blocked it."); // now we need to check what we have to do with this comment... either throw it away // or keep it in the database // this piece of code shouldn't really go here, but it's easier than letting // the AddComment action that there was actually a comment and that it should // still be added but marked as spam and so on... sometimes breaking a few // rules makes things easier :) if ($config->getValue("bayesian_filter_spam_comments_action") == BAYESIAN_FILTER_KEEP_COMMENT_ACTION) { $comments = new ArticleComments(); $clientIp = Client::getIp(); $comment = new UserComment($articleId, $parentId, $commentTopic, $commentText, null, $userName, $userEmail, $userUrl, $clientIp, 0, COMMENT_STATUS_SPAM); $comments->addComment($comment); } else { // nothing to do here, simply throw the comment away } $spam = true; } else { $result = new PipelineResult(true); $spam = false; } // train the filter with the message, be it spam or not... BayesianFilterCore::train($blogInfo->getId(), $commentTopic, $commentText, $userName, $userEmail, $userUrl, $spam); //print "<h1>" . number_format($spamicity * 100, 0) . "% of spamicity</h1>"; return $result; }
/** * marks all the comments from the given blog (or all of them * if blogId==0) as non-spam. This is good food for the * filter since then it knows which things should be allowed. * * @static * @param blogId The blog * @return Always true */ function markCommentsAsNonSpam($blogId = 0) { // first, make an array with the comments that should be marked $comments = $this->_getAllComments($blogId); // now, loop through each one of them marking the contents as // non-spam $bayesian = new BayesianFilterCore(); foreach ($comments as $comment) { $bayesian->train($comment["blog_id"], $comment["topic"], $comment["text"], $comment["user_name"], $comment["user_email"], $comment["user_url"], false); } return true; }