private static function _markTicketAs($ticket_id, $spam = true) { // [TODO] Make sure we can't retrain tickets which are already spam trained // [TODO] This is a performance killer $ticket = DAO_Ticket::getTicket($ticket_id); if ($ticket->spam_training != CerberusTicketSpamTraining::BLANK) { return TRUE; } // pull up text of first ticket message // [TODO] This is a performance killer $first_message = DAO_Ticket::getMessage($ticket->first_message_id); if (empty($first_message)) { return FALSE; } // [TODO] This is a performance killer $headers = DAO_MessageHeader::getAll($first_message->id); // Pass text to analyze() to get back interesting words $content = ''; if (!empty($ticket->subject)) { // SplitCamelCapsSubjects $hits = preg_split("{(?<=[a-z])(?=[A-Z])}x", $ticket->subject); if (is_array($hits) && !empty($hits)) { $content .= implode(' ', $hits); } } $content .= ' ' . $first_message->getContent(); if (strlen($content) > self::MAX_BODY_LENGTH) { $content = substr($content, 0, strrpos(substr($content, 0, self::MAX_BODY_LENGTH), ' ')); } $words = self::processText($content); // Train interesting words as spam/notspam // $out = self::_calculateSpamProbability($words); // self::_trainWords($out['words'],$spam); self::_trainWords($words, $spam); // [TODO] Testing, train all words // Increase the bayes_stats spam or notspam total count by 1 // [TODO] This is a performance killer (could be done in batches) if ($spam) { DAO_Bayes::addOneToSpamTotal(); DAO_Address::addOneToSpamTotal($ticket->first_wrote_address_id); } else { DAO_Bayes::addOneToNonSpamTotal(); DAO_Address::addOneToNonSpamTotal($ticket->first_wrote_address_id); } // Forced training should leave a cache of 0.0001 or 0.9999 on the ticket table $fields = array('spam_score' => $spam ? 0.9999 : 0.0001, 'spam_training' => $spam ? CerberusTicketSpamTraining::SPAM : CerberusTicketSpamTraining::NOT_SPAM); DAO_Ticket::updateTicket($ticket_id, $fields); return TRUE; }