/** * API callback for mollom.checkContent to perform textual analysis. * * @todo Add support for 'redirect' and 'refresh' values. */ protected function checkContent($data) { $response = array(); // If only a single value for checks is passed, it is a string. if (isset($data['checks']) && is_string($data['checks'])) { $data['checks'] = array($data['checks']); } $header = $this->parser->getRestOAuthHeader(); $publicKey = $header['oauth_consumer_key']; // Fetch blacklist. $blacklist = $this->state()->get(self::KEY_BLACKLIST . '_' . $publicKey, array()); // Determine content keys to analyze. $post_keys = array('postTitle' => 1, 'postBody' => 1); $type = FALSE; if (isset($data['type']) && in_array($data['type'], array('user'))) { $type = $data['type']; if ($type == 'user') { $post_keys += array('authorName' => 1, 'authorMail' => 1); } } $post = implode('\\n', array_intersect_key($data, $post_keys)); $update = isset($data['stored']); // Spam filter: Check post_title and post_body for ham, spam, or unsure. if (!$update && (!isset($data['checks']) || in_array('spam', $data['checks']))) { $spam = FALSE; $ham = FALSE; // 'spam' always has precedence. if (strpos($post, 'spam') !== FALSE) { $spam = TRUE; } elseif (strpos($post, 'ham') !== FALSE) { $ham = TRUE; } elseif (strpos($post, 'unsure') !== FALSE) { // Enabled unsure mode. if (!isset($data['unsure']) || $data['unsure']) { $spam = TRUE; $ham = TRUE; } else { $spam = FALSE; $ham = TRUE; } } // Check blacklist. if ($matches = $this->checkBlacklist($post, $blacklist, 'spam')) { $spam = TRUE; $ham = FALSE; $response['reason'] = 'blacklist'; $response['blacklistSpam'] = $matches; } if ($spam && $ham) { $response['spamScore'] = 0.5; $response['spamClassification'] = 'unsure'; $qualityScore = 0.5; } elseif ($spam) { $response['spamScore'] = 1.0; $response['spamClassification'] = 'spam'; $qualityScore = 0.0; } elseif ($ham) { $response['spamScore'] = 0.0; $response['spamClassification'] = 'ham'; $qualityScore = 1.0; } else { $response['spamScore'] = 0.5; $response['spamClassification'] = 'unsure'; $qualityScore = NULL; } // In case a previous spam check was unsure and a CAPTCHA was solved, the // result is supposed to be ham - unless the new content is spam. if (!empty($data['id']) && $response['spamClassification'] == 'unsure') { $content_captchas = $this->state()->get(self::KEY_CONTENT_CAPTCHA, array()); if (!empty($content_captchas[$data['id']])) { $response['spamScore'] = 0.0; $response['spamClassification'] = 'ham'; } } } // Quality filter. if (isset($data['checks']) && in_array('quality', $data['checks'])) { if (isset($qualityScore)) { $response['qualityScore'] = $qualityScore; } else { $response['qualityScore'] = 0; } } // Profanity filter. if (isset($data['checks']) && in_array('profanity', $data['checks'])) { $profanityScore = 0.0; if (strpos($post, 'profanity') !== FALSE) { $profanityScore = 1.0; } // Check blacklist. if ($matches = $this->checkBlacklist($post, $blacklist, 'profanity')) { $profanityScore = 1.0; $response['blacklistProfanity'] = $matches; } $response['profanityScore'] = $profanityScore; } // Language detection. if (isset($data['checks']) && in_array('language', $data['checks'])) { $languages = array(); if (stripos($post, 'ist seit der Mitte')) { $languages[] = array('languageCode' => 'de'); } if (stripos($post, 'it is the most populous city')) { $languages[] = array('languageCode' => 'en'); } if (count($languages) == 0) { $languages[] = array('languageCode' => 'zxx'); } $score = 1 / count($languages); foreach ($languages as $id => &$langObj) { $langObj['languageScore'] = $score; } if (count($languages) === 1) { $response['languages']['language'] = reset($languages); } else { $response['languages'] = [$languages]; } } $storage = $this->state()->get(self::KEY_CONTENT, array()); $contentId = !empty($data['id']) ? $data['id'] : md5(mt_rand()); if (isset($storage[$contentId])) { $storage[$contentId] = array_merge($storage[$contentId], $data); } else { $storage[$contentId] = $data; } if ($update) { $response = array_merge($storage[$contentId], $response); } $response['id'] = $contentId; $this->state()->set(self::KEY_CONTENT, $storage); return $response; }