/**
  * untrains the filter
  *
  * @param blogId The blog id
  * @param topic The topic of the comment/article that we're using to untrain the filter
  * @param text The text of the comment/articles that we're usingn to untrain the filter
  * @param userName Name of the user posting this comment/article
  * @param userEmail Email address of the user posting this comment/article
  * @param userUrl URL of the user posting this comment/article
  * @param spam Wether we should unmark these contents as spam or not. The content will be unmarked
  * as non-spam by default
  * @static
  * @see train
  */
 function untrain($blogId, $topic, $text, $userName, $userEmail, $userUrl, $spam = false)
 {
     $tokenizer = new BayesianTokenizer();
     $tokensTopic = $tokenizer->addContextMark($tokenizer->tokenize($topic), TOKEN_TOPIC_MARK);
     $tokensText = $tokenizer->tokenize($text);
     $tokensUserName = $tokenizer->addContextMark($tokenizer->tokenize($userName), TOKEN_USER_NAME_MARK);
     $tokensUserEmail = $tokenizer->addContextMark($tokenizer->tokenize($userEmail), TOKEN_USER_EMAIL_MARK);
     $tokensUserUrl = $tokenizer->addContextMark($tokenizer->tokenize($userUrl), TOKEN_USER_URL_MARK);
     $tokens = array_merge($tokensTopic, $tokensText, $tokensUserName, $tokensUserEmail, $tokensUserUrl);
     $bayesianFilterInfos = new BayesianFilterInfos();
     $bayesianFilterInfo = $bayesianFilterInfos->getBlogBayesianFilterInfo($blogId);
     $totalSpam = $bayesianFilterInfo->getTotalSpam();
     $totalNonSpam = $bayesianFilterInfo->getTotalNonSpam();
     $bayesianTokens = new BayesianTokens();
     if ($spam) {
         $bayesianTokens->decSpamOccurrencesFromTokensArray($blogId, $tokens, $totalSpam, $totalNonSpam);
         $bayesianFilterInfos->decTotalSpam($bayesianFilterInfo->getId());
     } else {
         $bayesianTokens->decNonSpamOccurrencesFromTokensArray($blogId, $tokens, $totalSpam, $totalNonSpam);
         $bayesianFilterInfos->decTotalNonSpam($bayesianFilterInfo->getId());
     }
     return true;
 }
 /**
  * @private
  */
 function _getMostSignificantTokens($blogId, $tokens)
 {
     $config =& Config::getConfig();
     $bayesianFilterInfos = new BayesianFilterInfos();
     $bayesianFilterInfo = $bayesianFilterInfos->getBlogBayesianFilterInfo($blogId);
     $totalSpam = $bayesianFilterInfo->getTotalSpam();
     $totalNonSpam = $bayesianFilterInfo->getTotalNonSpam();
     $bayesianTokens = new BayesianTokens();
     foreach ($tokens as $token) {
         $bayesianTokens->updateOccurrences($blogId, $token, 0, 0, $totalSpam, $totalNonSpam, false);
     }
     $tokens = $bayesianTokens->getBayesianTokensFromArray($blogId, $tokens);
     $tempArray = array();
     foreach ($tokens as $token) {
         if ($token->isSignificant() && $token->isValid()) {
             array_push($tempArray, abs($token->getProb() - 0.5));
         }
     }
     arsort($tempArray);
     $significantTokens = array();
     $count = 0;
     foreach ($tempArray as $key => $value) {
         array_push($significantTokens, $tokens[$key]);
         $count++;
         if ($count == $config->getValue("bayesian_filter_number_significant_tokens")) {
             break;
         }
     }
     return $significantTokens;
 }
 /**
  * Adds a new blog to the database.
  *
  * @param blog A BlogInfo object with the necessary information
  * @see BlogInfo
  * @return False if unsuccessful or true otherwise. It will also set the database id of the
  * parameter passed by reference in case it is successful.
  */
 function addBlog(&$blog)
 {
     // ititalize iterator to get unique mangled blog names
     $i = 0;
     // check if there already is a blog with the same mangled name
     while ($this->getBlogInfoByName($blog->getMangledBlog())) {
         $i++;
         // and if so, assign a new one (if we already tried with blogname+"i" we have to strip "i" before adding it again!)
         $newMangledName = $i > 1 ? substr($blog->getMangledBlog(), 0, strlen($blog->getMangledBlog()) - strlen($i - 1)) . $i : $blog->getMangledBlog() . $i;
         $blog->setMangledBlog($newMangledName);
     }
     $blogSettings = $blog->getSettings();
     if (!$blogSettings) {
         $blogSettings = new BlogSettings();
     }
     $query = "INSERT INTO " . $this->getPrefix() . "blogs (blog,owner_id,about,settings,mangled_blog,status)\n                     VALUES ('" . Db::qstr($blog->getBlog()) . "'," . $blog->getOwner() . ",'" . Db::qstr($blog->getAbout()) . "', '" . Db::qstr(serialize($blogSettings)) . "', '" . $blog->getMangledBlog() . "', '" . Db::qstr($blog->getStatus()) . "')";
     $result = $this->Execute($query);
     if (!$result) {
         return false;
     }
     $blogId = $this->_db->Insert_ID();
     $blog->setId($blogId);
     // create the row for the bayesian filter info
     $bayesianFilterInfo = new BayesianFilterInfos();
     $bayesianFilterInfo->insert($blogId);
     // and return the blog identifier
     return $blogId;
 }