/** * @private */ function getSpamProbability($blogId, $topic, $text, $userName, $userEmail, $userUrl) { $tokenizer = new BayesianTokenizer(); $tokensTopic = $tokenizer->addContextMark($tokenizer->tokenize($topic), TOKEN_TOPIC_MARK); $tokensText = $tokenizer->tokenize($text); $tokensUserName = $tokenizer->addContextMark($tokenizer->tokenize($userName), TOKEN_USER_NAME_MARK); $tokensUserEmail = $tokenizer->addContextMark($tokenizer->tokenize($userEmail), TOKEN_USER_EMAIL_MARK); $tokensUserUrl = $tokenizer->addContextMark($tokenizer->tokenize($userUrl), TOKEN_USER_URL_MARK); $tokens = array_merge($tokensTopic, $tokensText, $tokensUserName, $tokensUserEmail, $tokensUserUrl); $significantTokens = BayesianFilter::_getMostSignificantTokens($blogId, $tokens); return BayesianFilter::_getBayesProbability($significantTokens); }
/** * Processes the pipeline, using the request and blogInfo * objects as given in the constructor. */ function process() { global $_pLogPipelineRegisteredFilters; // check if the pipeline is enabled $config =& Config::getConfig(); if ($config->getValue("security_pipeline_enabled") == false) { // pipeline is disabled, so everything's fine return new PipelineResult(true); } // boolean to indicate whether we should run the bayesian filter // This is a hack. We don't want to run the bayesian filter on // a message that will be rejected by another filter. The only way // to know this is to run after all of the other filters have run. // // Ideally, once a message is rejected, we would want to let all // of the filters know about it. This way they could do something // interesting (i.e. train the message as spam, report the ip address // and urls to dnsbl services) // default it to false. $runBayesianFilter = false; // if enabled, then check all the filters foreach ($_pLogPipelineRegisteredFilters as $filterClass) { if (strcmp($filterClass, "BayesianFilter") == 0) { $runBayesianFilter = true; } // create an instance of the filter $pipelineRequest = new PipelineRequest($this->_httpRequest, $this->_blogInfo); $filter = new $filterClass($pipelineRequest); // and execute it... $result = $filter->filter(); // if there was an error, we better say so now // and quite, making sure that we're keeping the // error code if (!$result->isValid()) { $this->_result = $result; return $result; } } if ($runBayesianFilter) { // create an instance of the filter $pipelineRequest = new PipelineRequest($this->_httpRequest, $this->_blogInfo); $filter = new BayesianFilter($pipelineRequest); // and execute it... $result = $filter->filter(); } $this->_result = $result; return $result; }