/** * Validate that the provided value is a valid alphabetic string (strictly a-zA-Z). * * @since 1.0 */ public function testIsAlpha() { $this->assertTrue(Validator::isAlpha('test')); $this->assertTrue(Validator::isAlpha('Test')); $this->assertTrue(Validator::isAlpha('TEST')); $this->assertFalse(Validator::isAlpha('number5')); $this->assertFalse(Validator::isAlpha('!-++#')); $this->assertFalse(Validator::isAlpha('100')); }
/** * Splits the passed content by spaces, filters (removes) stop words from stopwords.ini, * and returns an array of Tag instances. * * @param $content * @param $taggedClass Optionally provide a BO class name (with namespace) * @param $taggedOID Optionally provide a BO instance OID * @param $applyStopwords Defaults true, set to false if you want to ignore the stopwords. * * @return array * * @throws Alpha\Exception\AlphaException * * @since 1.0 */ public static function tokenize($content, $taggedClass = '', $taggedOID = '', $applyStopwords = true) { if (self::$logger == null) { self::$logger = new Logger('Tag'); } $config = ConfigProvider::getInstance(); // apply stop words $lowerWords = preg_split("/[\\s,.:-]+/", $content); array_walk($lowerWords, 'Alpha\\Model\\Tag::lowercaseArrayElement'); if ($applyStopwords) { if (file_exists($config->get('app.root') . 'config/stopwords-' . $config->get('search.stop.words.size') . '.ini')) { $stopwords = file($config->get('app.root') . 'config/stopwords-' . $config->get('search.stop.words.size') . '.ini', FILE_IGNORE_NEW_LINES); } elseif (file_exists($config->get('app.root') . 'Alpha/stopwords-' . $config->get('search.stop.words.size') . '.ini')) { $stopwords = file($config->get('app.root') . 'Alpha/stopwords-' . $config->get('search.stop.words.size') . '.ini', FILE_IGNORE_NEW_LINES); } else { throw new AlphaException('Unable to find a stopwords-' . $config->get('search.stop.words.size') . '.ini file in the application!'); } array_walk($stopwords, 'Alpha\\Model\\Tag::lowercaseArrayElement'); $filtered = array_diff($lowerWords, $stopwords); } else { $filtered = $lowerWords; } $tagObjects = array(); $tagContents = array(); foreach ($filtered as $tagContent) { // we only want to create word tags if (Validator::isAlpha($tagContent)) { // just making sure that we haven't added this one in already if (!in_array($tagContent, $tagContents) && !empty($tagContent)) { $tag = new self(); $tag->set('content', trim(mb_strtolower($tagContent))); if (!empty($taggedClass)) { $tag->set('taggedClass', $taggedClass); } if (!empty($taggedOID)) { $tag->set('taggedOID', $taggedOID); } array_push($tagObjects, $tag); array_push($tagContents, $tagContent); } } } self::$logger->debug('Tags generated: [' . var_export($tagContents, true) . ']'); return $tagObjects; }