예제 #1
0
파일: Bayes.php 프로젝트: joegeck/cerb4
 private static function _markTicketAs($ticket_id, $spam = true)
 {
     // [TODO] Make sure we can't retrain tickets which are already spam trained
     // [TODO] This is a performance killer
     $ticket = DAO_Ticket::getTicket($ticket_id);
     if ($ticket->spam_training != CerberusTicketSpamTraining::BLANK) {
         return TRUE;
     }
     // pull up text of first ticket message
     // [TODO] This is a performance killer
     $first_message = DAO_Ticket::getMessage($ticket->first_message_id);
     if (empty($first_message)) {
         return FALSE;
     }
     // [TODO] This is a performance killer
     $headers = DAO_MessageHeader::getAll($first_message->id);
     // Pass text to analyze() to get back interesting words
     $content = '';
     if (!empty($ticket->subject)) {
         // SplitCamelCapsSubjects
         $hits = preg_split("{(?<=[a-z])(?=[A-Z])}x", $ticket->subject);
         if (is_array($hits) && !empty($hits)) {
             $content .= implode(' ', $hits);
         }
     }
     $content .= ' ' . $first_message->getContent();
     if (strlen($content) > self::MAX_BODY_LENGTH) {
         $content = substr($content, 0, strrpos(substr($content, 0, self::MAX_BODY_LENGTH), ' '));
     }
     $words = self::processText($content);
     // Train interesting words as spam/notspam
     //		$out = self::_calculateSpamProbability($words);
     //		self::_trainWords($out['words'],$spam);
     self::_trainWords($words, $spam);
     // [TODO] Testing, train all words
     // Increase the bayes_stats spam or notspam total count by 1
     // [TODO] This is a performance killer (could be done in batches)
     if ($spam) {
         DAO_Bayes::addOneToSpamTotal();
         DAO_Address::addOneToSpamTotal($ticket->first_wrote_address_id);
     } else {
         DAO_Bayes::addOneToNonSpamTotal();
         DAO_Address::addOneToNonSpamTotal($ticket->first_wrote_address_id);
     }
     // Forced training should leave a cache of 0.0001 or 0.9999 on the ticket table
     $fields = array('spam_score' => $spam ? 0.9999 : 0.0001, 'spam_training' => $spam ? CerberusTicketSpamTraining::SPAM : CerberusTicketSpamTraining::NOT_SPAM);
     DAO_Ticket::updateTicket($ticket_id, $fields);
     return TRUE;
 }