コード例 #1
0
ファイル: ServerController.php プロジェクト: omero/camp-gdl
 /**
  * API callback for mollom.checkContent to perform textual analysis.
  *
  * @todo Add support for 'redirect' and 'refresh' values.
  */
 protected function checkContent($data)
 {
     $response = array();
     // If only a single value for checks is passed, it is a string.
     if (isset($data['checks']) && is_string($data['checks'])) {
         $data['checks'] = array($data['checks']);
     }
     $header = $this->parser->getRestOAuthHeader();
     $publicKey = $header['oauth_consumer_key'];
     // Fetch blacklist.
     $blacklist = $this->state()->get(self::KEY_BLACKLIST . '_' . $publicKey, array());
     // Determine content keys to analyze.
     $post_keys = array('postTitle' => 1, 'postBody' => 1);
     $type = FALSE;
     if (isset($data['type']) && in_array($data['type'], array('user'))) {
         $type = $data['type'];
         if ($type == 'user') {
             $post_keys += array('authorName' => 1, 'authorMail' => 1);
         }
     }
     $post = implode('\\n', array_intersect_key($data, $post_keys));
     $update = isset($data['stored']);
     // Spam filter: Check post_title and post_body for ham, spam, or unsure.
     if (!$update && (!isset($data['checks']) || in_array('spam', $data['checks']))) {
         $spam = FALSE;
         $ham = FALSE;
         // 'spam' always has precedence.
         if (strpos($post, 'spam') !== FALSE) {
             $spam = TRUE;
         } elseif (strpos($post, 'ham') !== FALSE) {
             $ham = TRUE;
         } elseif (strpos($post, 'unsure') !== FALSE) {
             // Enabled unsure mode.
             if (!isset($data['unsure']) || $data['unsure']) {
                 $spam = TRUE;
                 $ham = TRUE;
             } else {
                 $spam = FALSE;
                 $ham = TRUE;
             }
         }
         // Check blacklist.
         if ($matches = $this->checkBlacklist($post, $blacklist, 'spam')) {
             $spam = TRUE;
             $ham = FALSE;
             $response['reason'] = 'blacklist';
             $response['blacklistSpam'] = $matches;
         }
         if ($spam && $ham) {
             $response['spamScore'] = 0.5;
             $response['spamClassification'] = 'unsure';
             $qualityScore = 0.5;
         } elseif ($spam) {
             $response['spamScore'] = 1.0;
             $response['spamClassification'] = 'spam';
             $qualityScore = 0.0;
         } elseif ($ham) {
             $response['spamScore'] = 0.0;
             $response['spamClassification'] = 'ham';
             $qualityScore = 1.0;
         } else {
             $response['spamScore'] = 0.5;
             $response['spamClassification'] = 'unsure';
             $qualityScore = NULL;
         }
         // In case a previous spam check was unsure and a CAPTCHA was solved, the
         // result is supposed to be ham - unless the new content is spam.
         if (!empty($data['id']) && $response['spamClassification'] == 'unsure') {
             $content_captchas = $this->state()->get(self::KEY_CONTENT_CAPTCHA, array());
             if (!empty($content_captchas[$data['id']])) {
                 $response['spamScore'] = 0.0;
                 $response['spamClassification'] = 'ham';
             }
         }
     }
     // Quality filter.
     if (isset($data['checks']) && in_array('quality', $data['checks'])) {
         if (isset($qualityScore)) {
             $response['qualityScore'] = $qualityScore;
         } else {
             $response['qualityScore'] = 0;
         }
     }
     // Profanity filter.
     if (isset($data['checks']) && in_array('profanity', $data['checks'])) {
         $profanityScore = 0.0;
         if (strpos($post, 'profanity') !== FALSE) {
             $profanityScore = 1.0;
         }
         // Check blacklist.
         if ($matches = $this->checkBlacklist($post, $blacklist, 'profanity')) {
             $profanityScore = 1.0;
             $response['blacklistProfanity'] = $matches;
         }
         $response['profanityScore'] = $profanityScore;
     }
     // Language detection.
     if (isset($data['checks']) && in_array('language', $data['checks'])) {
         $languages = array();
         if (stripos($post, 'ist seit der Mitte')) {
             $languages[] = array('languageCode' => 'de');
         }
         if (stripos($post, 'it is the most populous city')) {
             $languages[] = array('languageCode' => 'en');
         }
         if (count($languages) == 0) {
             $languages[] = array('languageCode' => 'zxx');
         }
         $score = 1 / count($languages);
         foreach ($languages as $id => &$langObj) {
             $langObj['languageScore'] = $score;
         }
         if (count($languages) === 1) {
             $response['languages']['language'] = reset($languages);
         } else {
             $response['languages'] = [$languages];
         }
     }
     $storage = $this->state()->get(self::KEY_CONTENT, array());
     $contentId = !empty($data['id']) ? $data['id'] : md5(mt_rand());
     if (isset($storage[$contentId])) {
         $storage[$contentId] = array_merge($storage[$contentId], $data);
     } else {
         $storage[$contentId] = $data;
     }
     if ($update) {
         $response = array_merge($storage[$contentId], $response);
     }
     $response['id'] = $contentId;
     $this->state()->set(self::KEY_CONTENT, $storage);
     return $response;
 }