/**
  * -- Add function info here --
  */
 function getFarthestToken($blogId, $tokens)
 {
     $bayesianTokens = new BayesianTokens();
     $tokens = $bayesianTokens->getBayesianTokensFromArray($blogId, $tokens, false);
     $tempArray = array();
     foreach ($tokens as $token) {
         array_push($tempArray, abs($token->getProb() - 0.5));
     }
     arsort($tempArray);
     $keys = array_keys($tempArray);
     $key = $keys[0];
     return $tokens[$key];
 }
 /**
  * @private
  */
 function _getMostSignificantTokens($blogId, $tokens)
 {
     $config =& Config::getConfig();
     $bayesianFilterInfos = new BayesianFilterInfos();
     $bayesianFilterInfo = $bayesianFilterInfos->getBlogBayesianFilterInfo($blogId);
     $totalSpam = $bayesianFilterInfo->getTotalSpam();
     $totalNonSpam = $bayesianFilterInfo->getTotalNonSpam();
     $bayesianTokens = new BayesianTokens();
     foreach ($tokens as $token) {
         $bayesianTokens->updateOccurrences($blogId, $token, 0, 0, $totalSpam, $totalNonSpam, false);
     }
     $tokens = $bayesianTokens->getBayesianTokensFromArray($blogId, $tokens);
     $tempArray = array();
     foreach ($tokens as $token) {
         if ($token->isSignificant() && $token->isValid()) {
             array_push($tempArray, abs($token->getProb() - 0.5));
         }
     }
     arsort($tempArray);
     $significantTokens = array();
     $count = 0;
     foreach ($tempArray as $key => $value) {
         array_push($significantTokens, $tokens[$key]);
         $count++;
         if ($count == $config->getValue("bayesian_filter_number_significant_tokens")) {
             break;
         }
     }
     return $significantTokens;
 }
 /**
  * untrains the filter
  *
  * @param blogId The blog id
  * @param topic The topic of the comment/article that we're using to untrain the filter
  * @param text The text of the comment/articles that we're usingn to untrain the filter
  * @param userName Name of the user posting this comment/article
  * @param userEmail Email address of the user posting this comment/article
  * @param userUrl URL of the user posting this comment/article
  * @param spam Wether we should unmark these contents as spam or not. The content will be unmarked
  * as non-spam by default
  * @static
  * @see train
  */
 function untrain($blogId, $topic, $text, $userName, $userEmail, $userUrl, $spam = false)
 {
     $tokenizer = new BayesianTokenizer();
     $tokensTopic = $tokenizer->addContextMark($tokenizer->tokenize($topic), TOKEN_TOPIC_MARK);
     $tokensText = $tokenizer->tokenize($text);
     $tokensUserName = $tokenizer->addContextMark($tokenizer->tokenize($userName), TOKEN_USER_NAME_MARK);
     $tokensUserEmail = $tokenizer->addContextMark($tokenizer->tokenize($userEmail), TOKEN_USER_EMAIL_MARK);
     $tokensUserUrl = $tokenizer->addContextMark($tokenizer->tokenize($userUrl), TOKEN_USER_URL_MARK);
     $tokens = array_merge($tokensTopic, $tokensText, $tokensUserName, $tokensUserEmail, $tokensUserUrl);
     $bayesianFilterInfos = new BayesianFilterInfos();
     $bayesianFilterInfo = $bayesianFilterInfos->getBlogBayesianFilterInfo($blogId);
     $totalSpam = $bayesianFilterInfo->getTotalSpam();
     $totalNonSpam = $bayesianFilterInfo->getTotalNonSpam();
     $bayesianTokens = new BayesianTokens();
     if ($spam) {
         $bayesianTokens->decSpamOccurrencesFromTokensArray($blogId, $tokens, $totalSpam, $totalNonSpam);
         $bayesianFilterInfos->decTotalSpam($bayesianFilterInfo->getId());
     } else {
         $bayesianTokens->decNonSpamOccurrencesFromTokensArray($blogId, $tokens, $totalSpam, $totalNonSpam);
         $bayesianFilterInfos->decTotalNonSpam($bayesianFilterInfo->getId());
     }
     return true;
 }