Пример #1
0
 private function generateKeywords($oldKeys, $text, $article, $processGlobals, $author = true, $sectcat = true)
 {
     // Keywords to preserve
     if ($this->addkeyParams->preserveKeys == 1) {
         //$oldEncoding = plgSystemAddKeywords::fixEncoding($oldKeys);
         $oldKeys = html_entity_decode($oldKeys, ENT_QUOTES, "UTF-8");
         if (preg_match('#{([\\s\\S]*)}#u', $oldKeys, $matches)) {
             $savedKeys = $matches[1];
         }
     } else {
         $savedKeys = null;
     }
     //$encoding = plgSystemAddKeywords::fixEncoding($text);
     $text = html_entity_decode($text, ENT_QUOTES, "UTF-8");
     // Get rid of   - deprecated but kept for pre-PHP5.2 support
     if ($this->addkeyParams->oldphp) {
         $replace = array(" ", "„", "”", "’", "&Idquo;", "&Isquo;", "–", """);
         $text = JString::str_ireplace($replace, " ", $text);
     }
     // Start cleaning up the article text
     //$text = preg_replace('/#<[^>]*>#u', ' ', $text);
     // Cleans up plugin calls
     $text = preg_replace('#{[^}]*?}(?(?=[^{]*?{\\/[^}]*?})[^{]*?{\\/[^}]*?})#u', '', $text);
     // Cleans any numbers or punctuation/newlines etc which were causing blanks/dashes etc in the final output
     if ($this->addkeyParams->oldphp) {
         $text = preg_replace('#[\\d\\W]#u', ' ', $text);
     } else {
         // New syntax more forgiving for hyphenated words but may still break them and does not work with PHP <5.2
         // Non-English character safe!
         $text = preg_replace("#\\P{L}#u", " ", $text);
     }
     // More efficient to change entire string to lower case here than via array_map
     //plgSystemAddKeywords::cleanWhitespace($text);
     $text = preg_replace('#[\\s]{2,}#u', ' ', $text);
     $text = JString::strtolower($text);
     // Get rid of undefined variables errors
     $whiteToAdd = "";
     $whiteToAddArray = array();
     $multiWordWhiteToAddArray = array();
     $keywords = "";
     if (isset($this->addkeyParams->multiWordWhiteList)) {
         JString::strtolower($this->addkeyParams->multiWordWhiteList);
         $multiWordWhiteArray = explode(",", $this->addkeyParams->multiWordWhiteList);
         foreach ($multiWordWhiteArray as $multiWordWhiteWord) {
             $multiWordWhiteWord = JString::trim($multiWordWhiteWord);
             if ($multiWordWhiteWord) {
                 if ($multiWordCount = substr_count($text, $multiWordWhiteWord)) {
                     $multiWordCount *= $this->addkeyParams->multiWordWeighting;
                     $multiWordWhiteToAddArray[$multiWordWhiteWord] = $multiWordCount;
                     if ($this->addkeyParams->unsetMultiWord) {
                         JString::str_ireplace($multiWordWhiteWord, '', $text);
                     }
                 }
             }
         }
     }
     if (isset($this->addkeyParams->whiteList)) {
         JString::strtolower($this->addkeyParams->whiteList);
         $whiteArray = explode(",", $this->addkeyParams->whiteList);
         foreach ($whiteArray as $whiteWord) {
             $whiteWord = JString::trim($whiteWord);
             if ($whiteWord) {
                 if ($whiteWordCount = substr_count($text, $whiteWord)) {
                     $whiteWordCount *= $this->addkeyParams->whiteWordWeighting;
                     $whiteToAddArray[$whiteWord] = $whiteWordCount;
                     JString::str_ireplace($whiteWord, '', $text);
                 }
             }
         }
     }
     if ($this->addkeyParams->whiteListOnly) {
         $textArray = array();
     } else {
         $textArray = explode(" ", $text);
         $textArray = array_count_values($textArray);
         // Remove blacklisted words
         JString::strtolower($this->addkeyParams->blackList);
         $blackArray = explode(",", $this->addkeyParams->blackList);
         foreach ($blackArray as $blackWord) {
             if (isset($textArray[JString::trim($blackWord)])) {
                 unset($textArray[JString::trim($blackWord)]);
             }
         }
     }
     $textArray = array_merge($textArray, $whiteToAddArray, $multiWordWhiteToAddArray);
     // Sort by frequency
     arsort($textArray);
     $i = 1;
     foreach ($textArray as $word => $instances) {
         if ($i > $this->addkeyParams->keyCount) {
             break;
         }
         if (strlen(JString::trim($word)) >= $this->addkeyParams->minLength) {
             if (!isset($keywordsIn)) {
                 $keywordsIn = array();
             }
             $keywordsIn[] = JString::trim($word);
             $i++;
         }
     }
     // Make the vars whiteToAdd and keywords, add in the whitelist words
     if (isset($keywordsIn)) {
         $keywords = implode(",", $keywordsIn);
     }
     // Add in the preserved meta keywords
     if (isset($savedKeys)) {
         //plgSystemAddKeywords::revertEncoding($this->addkeyParams->preserveKeyMeta,$oldEncoding);
         $keywords .= ", " . $savedKeys;
     }
     // add the author or author alias as a keyword if desired
     if ($author) {
         if ($this->addkeyParams->addAuthor == 1) {
             $keywords = plgSystemAddKeywords::addAuthor($article, $keywords);
         }
     }
     // add section/category if set
     if ($sectcat) {
         if ($this->addkeyParams->addSectCat) {
             $keywords = plgSystemAddKeywords::addSectCat($article, $keywords, $this->addkeyParams->addSectCat);
         }
     }
     if ($processGlobals) {
         $this->akProcessKeys = 1;
     }
     //Do we need to revert encoding for non-English characters?
     //plgSystemAddKeywords::revertEncoding($keywords,$encoding);
     return JString::trim(JString::strtolower($keywords));
 }