/** * -- Add function info here -- */ function getBayesianTokensFromArray($blogId, $tokens, $degenerate = true) { $bayesianTokens = array(); foreach ($tokens as $token) { $bayesianToken = $this->getBayesianTokenFromToken($blogId, $token); if (!$bayesianToken) { if ($degenerate) { $bayesianToken = $this->getFarthestToken($blogId, BayesianToken::degenerate($token)); } else { $bayesianToken = new BayesianToken($blogId, $token, 0, 0, TOKEN_DEFAULT_PROBABILITY); } } array_push($bayesianTokens, $bayesianToken); } return $bayesianTokens; }
/** * -- Add function info here -- */ function degenerate($token = false) { if ($token === false) { $token = $this->getToken(); } $degenerations = array(); $curToken = $token; $prefix = true; while ($prefix) { if (ereg("^(" . TOKEN_TOPIC_MARK . "|" . TOKEN_URL_MARK . "|" . TOKEN_USER_NAME_MARK . "|" . TOKEN_USER_EMAIL_MARK . "|" . TOKEN_USER_URL_MARK . ")(.*)\$", $curToken, $regs)) { $curToken = $regs[2]; $prefix = $regs[1]; } else { $prefix = ""; } $degenerations = array_merge($degenerations, BayesianToken::getBasicsDegeneration($curToken, $prefix)); if (ereg("([^!]+!)!+\$", $curToken, $regs)) { $degenerations = array_merge($degenerations, BayesianToken::getBasicsDegeneration($regs[1], $prefix)); } if (ereg("([^!]+)!+\$", $curToken, $regs)) { $degenerations = array_merge($degenerations, BayesianToken::getBasicsDegeneration($regs[1], $prefix)); } if ($prefix == "" && $curToken == $token) { foreach ($degenerations as $degeneration) { array_push($degenerations, TOKEN_TOPIC_MARK . $degeneration); array_push($degenerations, TOKEN_URL_MARK . $degeneration); array_push($degenerations, TOKEN_USER_NAME_MARK . $degeneration); array_push($degenerations, TOKEN_USER_EMAIL_MARK . $degeneration); array_push($degenerations, TOKEN_USER_URL_MARK . $degeneration); } } } return array_unique($degenerations); }
/** * @private */ function _tokenizeHtmlTag($tag) { $tokens = array(); preg_match_all("/([^=]+)=\\s*([^\\s>]+)/", $tag, $regs); $count = count($regs[1]); //foreach ($regs[2] as $value) for ($i = 0; $i < $count; $i++) { $value = $regs[2][$i]; $prefix = ""; if (eregi("(href|src)", $regs[1][$i])) { $prefix = TOKEN_URL_MARK; } $token = $this->_unquoteToken($value); $tokensTemp = $this->_tokenize($token); foreach ($tokensTemp as $tokenTemp) { if (BayesianToken::isValid($tokenTemp)) { array_push($tokens, $prefix . $tokenTemp); } } } return $tokens; }