コード例 #1
0
 /**
  * @private
  */
 function getSpamProbability($blogId, $topic, $text, $userName, $userEmail, $userUrl)
 {
     $tokenizer = new BayesianTokenizer();
     $tokensTopic = $tokenizer->addContextMark($tokenizer->tokenize($topic), TOKEN_TOPIC_MARK);
     $tokensText = $tokenizer->tokenize($text);
     $tokensUserName = $tokenizer->addContextMark($tokenizer->tokenize($userName), TOKEN_USER_NAME_MARK);
     $tokensUserEmail = $tokenizer->addContextMark($tokenizer->tokenize($userEmail), TOKEN_USER_EMAIL_MARK);
     $tokensUserUrl = $tokenizer->addContextMark($tokenizer->tokenize($userUrl), TOKEN_USER_URL_MARK);
     $tokens = array_merge($tokensTopic, $tokensText, $tokensUserName, $tokensUserEmail, $tokensUserUrl);
     $significantTokens = BayesianFilter::_getMostSignificantTokens($blogId, $tokens);
     return BayesianFilter::_getBayesProbability($significantTokens);
 }
コード例 #2
0
 /**
  * untrains the filter
  *
  * @param blogId The blog id
  * @param topic The topic of the comment/article that we're using to untrain the filter
  * @param text The text of the comment/articles that we're usingn to untrain the filter
  * @param userName Name of the user posting this comment/article
  * @param userEmail Email address of the user posting this comment/article
  * @param userUrl URL of the user posting this comment/article
  * @param spam Wether we should unmark these contents as spam or not. The content will be unmarked
  * as non-spam by default
  * @static
  * @see train
  */
 function untrain($blogId, $topic, $text, $userName, $userEmail, $userUrl, $spam = false)
 {
     $tokenizer = new BayesianTokenizer();
     $tokensTopic = $tokenizer->addContextMark($tokenizer->tokenize($topic), TOKEN_TOPIC_MARK);
     $tokensText = $tokenizer->tokenize($text);
     $tokensUserName = $tokenizer->addContextMark($tokenizer->tokenize($userName), TOKEN_USER_NAME_MARK);
     $tokensUserEmail = $tokenizer->addContextMark($tokenizer->tokenize($userEmail), TOKEN_USER_EMAIL_MARK);
     $tokensUserUrl = $tokenizer->addContextMark($tokenizer->tokenize($userUrl), TOKEN_USER_URL_MARK);
     $tokens = array_merge($tokensTopic, $tokensText, $tokensUserName, $tokensUserEmail, $tokensUserUrl);
     $bayesianFilterInfos = new BayesianFilterInfos();
     $bayesianFilterInfo = $bayesianFilterInfos->getBlogBayesianFilterInfo($blogId);
     $totalSpam = $bayesianFilterInfo->getTotalSpam();
     $totalNonSpam = $bayesianFilterInfo->getTotalNonSpam();
     $bayesianTokens = new BayesianTokens();
     if ($spam) {
         $bayesianTokens->decSpamOccurrencesFromTokensArray($blogId, $tokens, $totalSpam, $totalNonSpam);
         $bayesianFilterInfos->decTotalSpam($bayesianFilterInfo->getId());
     } else {
         $bayesianTokens->decNonSpamOccurrencesFromTokensArray($blogId, $tokens, $totalSpam, $totalNonSpam);
         $bayesianFilterInfos->decTotalNonSpam($bayesianFilterInfo->getId());
     }
     return true;
 }