private function generateKeywords($oldKeys, $text, $article, $processGlobals, $author = true, $sectcat = true) { // Keywords to preserve if ($this->addkeyParams->preserveKeys == 1) { //$oldEncoding = plgSystemAddKeywords::fixEncoding($oldKeys); $oldKeys = html_entity_decode($oldKeys, ENT_QUOTES, "UTF-8"); if (preg_match('#{([\\s\\S]*)}#u', $oldKeys, $matches)) { $savedKeys = $matches[1]; } } else { $savedKeys = null; } //$encoding = plgSystemAddKeywords::fixEncoding($text); $text = html_entity_decode($text, ENT_QUOTES, "UTF-8"); // Get rid of - deprecated but kept for pre-PHP5.2 support if ($this->addkeyParams->oldphp) { $replace = array(" ", "„", "”", "’", "&Idquo;", "&Isquo;", "–", """); $text = JString::str_ireplace($replace, " ", $text); } // Start cleaning up the article text //$text = preg_replace('/#<[^>]*>#u', ' ', $text); // Cleans up plugin calls $text = preg_replace('#{[^}]*?}(?(?=[^{]*?{\\/[^}]*?})[^{]*?{\\/[^}]*?})#u', '', $text); // Cleans any numbers or punctuation/newlines etc which were causing blanks/dashes etc in the final output if ($this->addkeyParams->oldphp) { $text = preg_replace('#[\\d\\W]#u', ' ', $text); } else { // New syntax more forgiving for hyphenated words but may still break them and does not work with PHP <5.2 // Non-English character safe! $text = preg_replace("#\\P{L}#u", " ", $text); } // More efficient to change entire string to lower case here than via array_map //plgSystemAddKeywords::cleanWhitespace($text); $text = preg_replace('#[\\s]{2,}#u', ' ', $text); $text = JString::strtolower($text); // Get rid of undefined variables errors $whiteToAdd = ""; $whiteToAddArray = array(); $multiWordWhiteToAddArray = array(); $keywords = ""; if (isset($this->addkeyParams->multiWordWhiteList)) { JString::strtolower($this->addkeyParams->multiWordWhiteList); $multiWordWhiteArray = explode(",", $this->addkeyParams->multiWordWhiteList); foreach ($multiWordWhiteArray as $multiWordWhiteWord) { $multiWordWhiteWord = JString::trim($multiWordWhiteWord); if ($multiWordWhiteWord) { if ($multiWordCount = substr_count($text, $multiWordWhiteWord)) { $multiWordCount *= $this->addkeyParams->multiWordWeighting; $multiWordWhiteToAddArray[$multiWordWhiteWord] = $multiWordCount; if ($this->addkeyParams->unsetMultiWord) { JString::str_ireplace($multiWordWhiteWord, '', $text); } } } } } if (isset($this->addkeyParams->whiteList)) { JString::strtolower($this->addkeyParams->whiteList); $whiteArray = explode(",", $this->addkeyParams->whiteList); foreach ($whiteArray as $whiteWord) { $whiteWord = JString::trim($whiteWord); if ($whiteWord) { if ($whiteWordCount = substr_count($text, $whiteWord)) { $whiteWordCount *= $this->addkeyParams->whiteWordWeighting; $whiteToAddArray[$whiteWord] = $whiteWordCount; JString::str_ireplace($whiteWord, '', $text); } } } } if ($this->addkeyParams->whiteListOnly) { $textArray = array(); } else { $textArray = explode(" ", $text); $textArray = array_count_values($textArray); // Remove blacklisted words JString::strtolower($this->addkeyParams->blackList); $blackArray = explode(",", $this->addkeyParams->blackList); foreach ($blackArray as $blackWord) { if (isset($textArray[JString::trim($blackWord)])) { unset($textArray[JString::trim($blackWord)]); } } } $textArray = array_merge($textArray, $whiteToAddArray, $multiWordWhiteToAddArray); // Sort by frequency arsort($textArray); $i = 1; foreach ($textArray as $word => $instances) { if ($i > $this->addkeyParams->keyCount) { break; } if (strlen(JString::trim($word)) >= $this->addkeyParams->minLength) { if (!isset($keywordsIn)) { $keywordsIn = array(); } $keywordsIn[] = JString::trim($word); $i++; } } // Make the vars whiteToAdd and keywords, add in the whitelist words if (isset($keywordsIn)) { $keywords = implode(",", $keywordsIn); } // Add in the preserved meta keywords if (isset($savedKeys)) { //plgSystemAddKeywords::revertEncoding($this->addkeyParams->preserveKeyMeta,$oldEncoding); $keywords .= ", " . $savedKeys; } // add the author or author alias as a keyword if desired if ($author) { if ($this->addkeyParams->addAuthor == 1) { $keywords = plgSystemAddKeywords::addAuthor($article, $keywords); } } // add section/category if set if ($sectcat) { if ($this->addkeyParams->addSectCat) { $keywords = plgSystemAddKeywords::addSectCat($article, $keywords, $this->addkeyParams->addSectCat); } } if ($processGlobals) { $this->akProcessKeys = 1; } //Do we need to revert encoding for non-English characters? //plgSystemAddKeywords::revertEncoding($keywords,$encoding); return JString::trim(JString::strtolower($keywords)); }