/** * untrains the filter * * @param blogId The blog id * @param topic The topic of the comment/article that we're using to untrain the filter * @param text The text of the comment/articles that we're usingn to untrain the filter * @param userName Name of the user posting this comment/article * @param userEmail Email address of the user posting this comment/article * @param userUrl URL of the user posting this comment/article * @param spam Wether we should unmark these contents as spam or not. The content will be unmarked * as non-spam by default * @static * @see train */ function untrain($blogId, $topic, $text, $userName, $userEmail, $userUrl, $spam = false) { $tokenizer = new BayesianTokenizer(); $tokensTopic = $tokenizer->addContextMark($tokenizer->tokenize($topic), TOKEN_TOPIC_MARK); $tokensText = $tokenizer->tokenize($text); $tokensUserName = $tokenizer->addContextMark($tokenizer->tokenize($userName), TOKEN_USER_NAME_MARK); $tokensUserEmail = $tokenizer->addContextMark($tokenizer->tokenize($userEmail), TOKEN_USER_EMAIL_MARK); $tokensUserUrl = $tokenizer->addContextMark($tokenizer->tokenize($userUrl), TOKEN_USER_URL_MARK); $tokens = array_merge($tokensTopic, $tokensText, $tokensUserName, $tokensUserEmail, $tokensUserUrl); $bayesianFilterInfos = new BayesianFilterInfos(); $bayesianFilterInfo = $bayesianFilterInfos->getBlogBayesianFilterInfo($blogId); $totalSpam = $bayesianFilterInfo->getTotalSpam(); $totalNonSpam = $bayesianFilterInfo->getTotalNonSpam(); $bayesianTokens = new BayesianTokens(); if ($spam) { $bayesianTokens->decSpamOccurrencesFromTokensArray($blogId, $tokens, $totalSpam, $totalNonSpam); $bayesianFilterInfos->decTotalSpam($bayesianFilterInfo->getId()); } else { $bayesianTokens->decNonSpamOccurrencesFromTokensArray($blogId, $tokens, $totalSpam, $totalNonSpam); $bayesianFilterInfos->decTotalNonSpam($bayesianFilterInfo->getId()); } return true; }
/** * @private */ function _getMostSignificantTokens($blogId, $tokens) { $config =& Config::getConfig(); $bayesianFilterInfos = new BayesianFilterInfos(); $bayesianFilterInfo = $bayesianFilterInfos->getBlogBayesianFilterInfo($blogId); $totalSpam = $bayesianFilterInfo->getTotalSpam(); $totalNonSpam = $bayesianFilterInfo->getTotalNonSpam(); $bayesianTokens = new BayesianTokens(); foreach ($tokens as $token) { $bayesianTokens->updateOccurrences($blogId, $token, 0, 0, $totalSpam, $totalNonSpam, false); } $tokens = $bayesianTokens->getBayesianTokensFromArray($blogId, $tokens); $tempArray = array(); foreach ($tokens as $token) { if ($token->isSignificant() && $token->isValid()) { array_push($tempArray, abs($token->getProb() - 0.5)); } } arsort($tempArray); $significantTokens = array(); $count = 0; foreach ($tempArray as $key => $value) { array_push($significantTokens, $tokens[$key]); $count++; if ($count == $config->getValue("bayesian_filter_number_significant_tokens")) { break; } } return $significantTokens; }
/** * Adds a new blog to the database. * * @param blog A BlogInfo object with the necessary information * @see BlogInfo * @return False if unsuccessful or true otherwise. It will also set the database id of the * parameter passed by reference in case it is successful. */ function addBlog(&$blog) { // ititalize iterator to get unique mangled blog names $i = 0; // check if there already is a blog with the same mangled name while ($this->getBlogInfoByName($blog->getMangledBlog())) { $i++; // and if so, assign a new one (if we already tried with blogname+"i" we have to strip "i" before adding it again!) $newMangledName = $i > 1 ? substr($blog->getMangledBlog(), 0, strlen($blog->getMangledBlog()) - strlen($i - 1)) . $i : $blog->getMangledBlog() . $i; $blog->setMangledBlog($newMangledName); } $blogSettings = $blog->getSettings(); if (!$blogSettings) { $blogSettings = new BlogSettings(); } $query = "INSERT INTO " . $this->getPrefix() . "blogs (blog,owner_id,about,settings,mangled_blog,status)\n VALUES ('" . Db::qstr($blog->getBlog()) . "'," . $blog->getOwner() . ",'" . Db::qstr($blog->getAbout()) . "', '" . Db::qstr(serialize($blogSettings)) . "', '" . $blog->getMangledBlog() . "', '" . Db::qstr($blog->getStatus()) . "')"; $result = $this->Execute($query); if (!$result) { return false; } $blogId = $this->_db->Insert_ID(); $blog->setId($blogId); // create the row for the bayesian filter info $bayesianFilterInfo = new BayesianFilterInfos(); $bayesianFilterInfo->insert($blogId); // and return the blog identifier return $blogId; }