/**
  * @private
  */
 function getSpamProbability($blogId, $topic, $text, $userName, $userEmail, $userUrl)
 {
     $tokenizer = new BayesianTokenizer();
     $tokensTopic = $tokenizer->addContextMark($tokenizer->tokenize($topic), TOKEN_TOPIC_MARK);
     $tokensText = $tokenizer->tokenize($text);
     $tokensUserName = $tokenizer->addContextMark($tokenizer->tokenize($userName), TOKEN_USER_NAME_MARK);
     $tokensUserEmail = $tokenizer->addContextMark($tokenizer->tokenize($userEmail), TOKEN_USER_EMAIL_MARK);
     $tokensUserUrl = $tokenizer->addContextMark($tokenizer->tokenize($userUrl), TOKEN_USER_URL_MARK);
     $tokens = array_merge($tokensTopic, $tokensText, $tokensUserName, $tokensUserEmail, $tokensUserUrl);
     $significantTokens = BayesianFilter::_getMostSignificantTokens($blogId, $tokens);
     return BayesianFilter::_getBayesProbability($significantTokens);
 }
Ejemplo n.º 2
0
 /**
  * Processes the pipeline, using the request and blogInfo
  * objects as given in the constructor.
  */
 function process()
 {
     global $_pLogPipelineRegisteredFilters;
     // check if the pipeline is enabled
     $config =& Config::getConfig();
     if ($config->getValue("security_pipeline_enabled") == false) {
         // pipeline is disabled, so everything's fine
         return new PipelineResult(true);
     }
     // boolean to indicate whether we should run the bayesian filter
     // This is a hack.  We don't want to run the bayesian filter on
     // a message that will be rejected by another filter.  The only way
     // to know this is to run after all of the other filters have run.
     //
     // Ideally, once a message is rejected, we would want to let all
     // of the filters know about it.  This way they could do something
     // interesting (i.e. train the message as spam, report the ip address
     // and urls to dnsbl services)
     // default it to false.
     $runBayesianFilter = false;
     // if enabled, then check all the filters
     foreach ($_pLogPipelineRegisteredFilters as $filterClass) {
         if (strcmp($filterClass, "BayesianFilter") == 0) {
             $runBayesianFilter = true;
         }
         // create an instance of the filter
         $pipelineRequest = new PipelineRequest($this->_httpRequest, $this->_blogInfo);
         $filter = new $filterClass($pipelineRequest);
         // and execute it...
         $result = $filter->filter();
         // if there was an error, we better say so now
         // and quite, making sure that we're keeping the
         // error code
         if (!$result->isValid()) {
             $this->_result = $result;
             return $result;
         }
     }
     if ($runBayesianFilter) {
         // create an instance of the filter
         $pipelineRequest = new PipelineRequest($this->_httpRequest, $this->_blogInfo);
         $filter = new BayesianFilter($pipelineRequest);
         // and execute it...
         $result = $filter->filter();
     }
     $this->_result = $result;
     return $result;
 }