/** * Returns the number of syllables in the word. * Based in part on Greg Fast's Perl module Lingua::EN::Syllables * @param string $strWord Word to be measured * @param string $strEncoding Encoding of text * @return int */ public static function syllableCount($strWord, $strEncoding = '') { // Trim whitespace $strWord = trim($strWord); // Check we have some letters if (Text::letterCount(trim($strWord), $strEncoding) == 0) { return 0; } // $debug is an array containing the basic syllable counting steps for // this word. $debug = array(); $debug['Counting syllables for'] = $strWord; // Should be no non-alpha characters and lower case $strWord = preg_replace('`[^A-Za-z]`', '', $strWord); $strWord = Text::lowerCase($strWord, $strEncoding); // Check for problem words if (isset(self::$arrProblemWords[$strWord])) { return self::$arrProblemWords[$strWord]; } // Try singular $singularWord = Pluralise::getSingular($strWord); if ($singularWord != $strWord) { if (isset(self::$arrProblemWords[$singularWord])) { return self::$arrProblemWords[$singularWord]; } } $debug['After cleaning, lcase'] = $strWord; // Remove prefixes and suffixes and count how many were taken $strWord = preg_replace(self::$arrAffix, '', $strWord, -1, $intAffixCount); $strWord = preg_replace(self::$arrDoubleAffix, '', $strWord, -1, $intDoubleAffixCount); $strWord = preg_replace(self::$arrTripleAffix, '', $strWord, -1, $intTripleAffixCount); if ($intAffixCount + $intDoubleAffixCount + $intTripleAffixCount > 0) { $debug['After Prefix and Suffix Removal'] = $strWord; $debug['Prefix and suffix counts'] = $intAffixCount . ' * 1 syllable, ' . $intDoubleAffixCount . ' * 2 syllables, ' . $intTripleAffixCount . ' * 3 syllables'; } // Removed non-word characters from word $arrWordParts = preg_split('`[^aeiouy]+`', $strWord); $intWordPartCount = 0; foreach ($arrWordParts as $strWordPart) { if ($strWordPart != '') { $debug['Counting (' . $intWordPartCount . ')'] = $strWordPart; $intWordPartCount++; } } // Some syllables do not follow normal rules - check for them // Thanks to Joe Kovar for correcting a bug in the following lines $intSyllableCount = $intWordPartCount + $intAffixCount + 2 * $intDoubleAffixCount + 3 * $intTripleAffixCount; $debug['Syllables by Vowel Count'] = $intSyllableCount; foreach (self::$arrSubSyllables as $strSyllable) { $_intSyllableCount = $intSyllableCount; $intSyllableCount -= preg_match('`' . $strSyllable . '`', $strWord); if ($_intSyllableCount != $intSyllableCount) { $debug['Subtracting (' . $strSyllable . ')'] = $strSyllable; } } foreach (self::$arrAddSyllables as $strSyllable) { $_intSyllableCount = $intSyllableCount; $intSyllableCount += preg_match('`' . $strSyllable . '`', $strWord); if ($_intSyllableCount != $intSyllableCount) { $debug['Adding (' . $strSyllable . ')'] = $strSyllable; } } $intSyllableCount = $intSyllableCount == 0 ? 1 : $intSyllableCount; $debug['Result'] = $intSyllableCount; return $intSyllableCount; }
/** * Returns the number of unique words NOT on the Spache easy word list * @param boolean|string $strText Text to be measured * @return int */ public function spacheDifficultWordCount($strText = false) { $strText = $this->setText($strText); $intDifficultWords = 0; $arrWords = explode(' ', strtolower(preg_replace('`[^A-za-z\' ]`', '', $strText))); // Fetch Spache Words $wordsCounted = array(); // Get the Spache word list $arrSpache = Resource::fetchSpacheWordList(); for ($i = 0, $intWordCount = count($arrWords); $i < $intWordCount; $i++) { // Single letters are counted as easy if (strlen(trim($arrWords[$i])) < 2) { continue; } $singularWord = Pluralise::getSingular($arrWords[$i]); if (!in_array(Pluralise::getPlural($arrWords[$i]), $arrSpache) && !in_array($singularWord, $arrSpache)) { if (!in_array($singularWord, $wordsCounted)) { $intDifficultWords++; $wordsCounted[] = $singularWord; } } } return $intDifficultWords; }