/** * @private */ function getSpamProbability($blogId, $topic, $text, $userName, $userEmail, $userUrl) { $tokenizer = new BayesianTokenizer(); $tokensTopic = $tokenizer->addContextMark($tokenizer->tokenize($topic), TOKEN_TOPIC_MARK); $tokensText = $tokenizer->tokenize($text); $tokensUserName = $tokenizer->addContextMark($tokenizer->tokenize($userName), TOKEN_USER_NAME_MARK); $tokensUserEmail = $tokenizer->addContextMark($tokenizer->tokenize($userEmail), TOKEN_USER_EMAIL_MARK); $tokensUserUrl = $tokenizer->addContextMark($tokenizer->tokenize($userUrl), TOKEN_USER_URL_MARK); $tokens = array_merge($tokensTopic, $tokensText, $tokensUserName, $tokensUserEmail, $tokensUserUrl); $significantTokens = BayesianFilter::_getMostSignificantTokens($blogId, $tokens); return BayesianFilter::_getBayesProbability($significantTokens); }
/** * untrains the filter * * @param blogId The blog id * @param topic The topic of the comment/article that we're using to untrain the filter * @param text The text of the comment/articles that we're usingn to untrain the filter * @param userName Name of the user posting this comment/article * @param userEmail Email address of the user posting this comment/article * @param userUrl URL of the user posting this comment/article * @param spam Wether we should unmark these contents as spam or not. The content will be unmarked * as non-spam by default * @static * @see train */ function untrain($blogId, $topic, $text, $userName, $userEmail, $userUrl, $spam = false) { $tokenizer = new BayesianTokenizer(); $tokensTopic = $tokenizer->addContextMark($tokenizer->tokenize($topic), TOKEN_TOPIC_MARK); $tokensText = $tokenizer->tokenize($text); $tokensUserName = $tokenizer->addContextMark($tokenizer->tokenize($userName), TOKEN_USER_NAME_MARK); $tokensUserEmail = $tokenizer->addContextMark($tokenizer->tokenize($userEmail), TOKEN_USER_EMAIL_MARK); $tokensUserUrl = $tokenizer->addContextMark($tokenizer->tokenize($userUrl), TOKEN_USER_URL_MARK); $tokens = array_merge($tokensTopic, $tokensText, $tokensUserName, $tokensUserEmail, $tokensUserUrl); $bayesianFilterInfos = new BayesianFilterInfos(); $bayesianFilterInfo = $bayesianFilterInfos->getBlogBayesianFilterInfo($blogId); $totalSpam = $bayesianFilterInfo->getTotalSpam(); $totalNonSpam = $bayesianFilterInfo->getTotalNonSpam(); $bayesianTokens = new BayesianTokens(); if ($spam) { $bayesianTokens->decSpamOccurrencesFromTokensArray($blogId, $tokens, $totalSpam, $totalNonSpam); $bayesianFilterInfos->decTotalSpam($bayesianFilterInfo->getId()); } else { $bayesianTokens->decNonSpamOccurrencesFromTokensArray($blogId, $tokens, $totalSpam, $totalNonSpam); $bayesianFilterInfos->decTotalNonSpam($bayesianFilterInfo->getId()); } return true; }