Пример #1
0
 public function search_post_by_text($input_text, $input_options = false)
 {
     $classMethodKey = crc32(__CLASS__ . __METHOD__);
     if (!$input_options) {
         $input_options = array();
     }
     $input_options['limit'] = (int) $input_options['limit'];
     $input_options['exclude_posts_ids'] = (array) $input_options['exclude_posts_ids'];
     if (isset(PepVN_Data::$cacheData[$classMethodKey]['posts_ids_added']) && PepVN_Data::$cacheData[$classMethodKey]['posts_ids_added'] && !PepVN_Data::isEmptyArray(PepVN_Data::$cacheData[$classMethodKey]['posts_ids_added'])) {
         $valueTemp = array_keys(PepVN_Data::$cacheData[$classMethodKey]['posts_ids_added']);
         $input_options['exclude_posts_ids'] = array_merge($input_options['exclude_posts_ids'], $valueTemp);
     }
     $input_options['exclude_posts_ids'] = array_unique($input_options['exclude_posts_ids']);
     arsort($input_options['exclude_posts_ids']);
     $input_options['exclude_posts_ids'] = array_values($input_options['exclude_posts_ids']);
     if (!isset($input_options['key_cache'])) {
         $input_options['key_cache'] = array();
     }
     $input_options['key_cache'] = (array) $input_options['key_cache'];
     $keyCacheProcessText = Utils::hashKey(array($classMethodKey, $input_text, $input_options, 'search_post_by_text'));
     $tmp = TempDataAndCacheFile::get_cache($keyCacheProcessText, true);
     if (null !== $tmp) {
         return $tmp;
     }
     $wpExtend = $this->di->getShared('wpExtend');
     $analyzeText = $this->di->getShared('analyzeText');
     $keyCacheGroupNameOfTagsAndCategories = array($classMethodKey, 'groupNameOfTagsAndCategories');
     $keyCacheGroupNameOfTagsAndCategories = Utils::hashKey($keyCacheGroupNameOfTagsAndCategories);
     $groupNameOfTagsAndCategories = TempDataAndCacheFile::get_cache($keyCacheGroupNameOfTagsAndCategories);
     if (null === $groupNameOfTagsAndCategories) {
         $groupNameOfTagsAndCategories = array();
         $valueTemp = $wpExtend->getAndParseCategories();
         $valueTemp = array_keys($valueTemp);
         $groupNameOfTagsAndCategories = array_merge($groupNameOfTagsAndCategories, $valueTemp);
         unset($valueTemp);
         $valueTemp = $wpExtend->getAndParseTags();
         $valueTemp = array_keys($valueTemp);
         $groupNameOfTagsAndCategories = array_merge($groupNameOfTagsAndCategories, $valueTemp);
         unset($valueTemp);
         $groupNameOfTagsAndCategories = array_values($groupNameOfTagsAndCategories);
         $groupNameOfTagsAndCategories = $this->_clean_terms($groupNameOfTagsAndCategories);
         TempDataAndCacheFile::set_cache($keyCacheGroupNameOfTagsAndCategories, $groupNameOfTagsAndCategories);
     }
     $rsGetKeywordsFromText = AnalyzeText::analysisKeyword_GetKeywordsFromText(array('contents' => $input_text, 'min_word' => 1, 'max_word' => 6, 'min_occur' => 2, 'min_char_each_word' => 4));
     $groupKeywordsFromText = array();
     if (isset($rsGetKeywordsFromText['data']) && is_array($rsGetKeywordsFromText['data'])) {
         foreach ($rsGetKeywordsFromText['data'] as $keyOne => $valueOne) {
             if ($valueOne && is_array($valueOne)) {
                 foreach ($valueOne as $keyTwo => $valueTwo) {
                     if ($keyTwo) {
                         $valueTwo = (int) $valueTwo;
                         $groupKeywordsFromText[$keyTwo] = ceil($valueTwo * (PepVN_Data::countWords($keyTwo) * 1.5));
                     }
                 }
             }
         }
     }
     arsort($groupKeywordsFromText);
     $groupKeywordsFromText2 = $groupKeywordsFromText;
     $groupKeywordsFromText2 = array_slice($groupKeywordsFromText2, 0, 10);
     $groupKeywordsFromText3 = array();
     foreach ($groupKeywordsFromText as $keyOne => $valueOne) {
         unset($groupKeywordsFromText[$keyOne]);
         if (in_array($keyOne, $groupNameOfTagsAndCategories)) {
             $groupKeywordsFromText3[$keyOne] = $valueOne;
         }
     }
     unset($groupNameOfTagsAndCategories);
     arsort($groupKeywordsFromText3);
     $groupKeywordsFromText3 = array_slice($groupKeywordsFromText3, 0, 10);
     $groupKeywordsFromText4 = array();
     foreach ($groupKeywordsFromText2 as $keyOne => $valueOne) {
         unset($groupKeywordsFromText2[$keyOne]);
         if (!isset($groupKeywordsFromText4[$keyOne])) {
             $groupKeywordsFromText4[$keyOne] = 0;
         }
         $groupKeywordsFromText4[$keyOne] += (int) $valueOne;
     }
     foreach ($groupKeywordsFromText3 as $keyOne => $valueOne) {
         unset($groupKeywordsFromText3[$keyOne]);
         if (!isset($groupKeywordsFromText4[$keyOne])) {
             $groupKeywordsFromText4[$keyOne] = 0;
         }
         $groupKeywordsFromText4[$keyOne] += (int) $valueOne * 2;
     }
     if (isset($input_options['group_text_weight']) && !PepVN_Data::isEmptyArray($input_options['group_text_weight'])) {
         foreach ($input_options['group_text_weight'] as $keyOne => $valueOne) {
             if (isset($valueOne['text']) && $valueOne['text']) {
                 $valueOne['text'] = AnalyzeText::cleanRawTextForProcessSearch($valueOne['text']);
                 if ($valueOne['text']) {
                     foreach ($groupKeywordsFromText4 as $keyTwo => $valueTwo) {
                         $valueTemp2 = substr_count($valueOne['text'], PepVN_Data::strtolower($keyTwo));
                         if ($valueTemp2) {
                             $weight1 = $valueOne['weight'];
                             $numberImportantOccurrenceConsecutive = 1;
                             $keyTwoTmp1 = explode(' ', $keyTwo);
                             foreach ($keyTwoTmp1 as $keyThree => $valueThree) {
                                 if (preg_match('#^[^a-z0-9' . preg_quote('`~!@#$%^&*()-_=+{}[]\\\\|;:\'",.<>/?+', '#') . ']+#', $valueThree)) {
                                     $weight1 = $weight1 * 1.6 * ($numberImportantOccurrenceConsecutive * 1.11);
                                     $numberImportantOccurrenceConsecutive++;
                                 } else {
                                     $numberImportantOccurrenceConsecutive = 1;
                                 }
                             }
                             unset($keyTwoTmp1);
                             $groupKeywordsFromText4[$keyTwo] += (int) $valueTemp2 * $weight1;
                         }
                     }
                 }
             }
         }
     }
     $rsSearchPosts = $analyzeText->find_related_posts(array('keywords' => $groupKeywordsFromText4, 'limit' => $input_options['limit'], 'exclude_posts_ids' => $input_options['exclude_posts_ids'], 'post_id_less_than' => $input_options['post_id_less_than']));
     if ($rsSearchPosts && !empty($rsSearchPosts)) {
         foreach ($rsSearchPosts as $key1 => $value1) {
             if (isset($value1['post_id']) && $value1['post_id']) {
                 PepVN_Data::$cacheData[$classMethodKey]['posts_ids_added'][$value1['post_id']] = $value1;
             }
         }
     }
     TempDataAndCacheFile::set_cache($keyCacheProcessText, $rsSearchPosts, true);
     return $rsSearchPosts;
 }
Пример #2
0
 public static function cleanRawTextForProcessSearch($input_text)
 {
     $input_text = (array) $input_text;
     $input_text = implode(' ', $input_text);
     $input_text = self::decodeText($input_text);
     $input_text = strip_tags($input_text);
     $input_text = PepVN_Data::strtolower($input_text);
     $input_text = self::analysisKeyword_RemovePunctuations($input_text);
     $input_text = Text::replaceSpecialChar($input_text);
     $input_text = PepVN_Data::reduceSpace($input_text);
     return $input_text;
 }
Пример #3
0
 public function process_text($text)
 {
     $options = self::getOption();
     $classMethodKey = Hash::crc32b(__CLASS__ . '_' . __METHOD__);
     $textChecksum = Utils::hashKey($text);
     $keyCacheProcessText = Utils::hashKey(array($classMethodKey, $textChecksum, 'keyCacheProcessText', $options));
     $tmp = TempDataAndCacheFile::get_cache($keyCacheProcessText, true);
     if (null !== $tmp) {
         return $tmp;
     }
     global $post;
     $wpExtend = $this->di->getShared('wpExtend');
     $analyzeText = $this->di->getShared('analyzeText');
     $rsGetTerms = $wpExtend->getTermsByPostId($post->ID);
     $autolinks_case_sensitive = false;
     if (isset($options['autolinks_case_sensitive']) && 'on' === $options['autolinks_case_sensitive']) {
         $autolinks_case_sensitive = true;
     }
     $maxlinks = (int) $options['maxlinks'];
     if ($maxlinks < 0) {
         $maxlinks = 0;
     }
     $currentPostId = 0;
     if (isset($post->ID) && $post->ID) {
         $currentPostId = (int) $post->ID;
     }
     $currentPostType = '';
     if (isset($post->post_type) && $post->post_type) {
         $currentPostType = $post->post_type;
     }
     $patternsEscaped = array();
     $rsOne = PepVN_Data::escapeHtmlTagsAndContents($text, 'a;pre;script;style;link;meta;input;textarea;iframe;video;audio;object');
     $text = $rsOne['content'];
     if (!empty($rsOne['patterns'])) {
         $patternsEscaped = array_merge($patternsEscaped, $rsOne['patterns']);
     }
     unset($rsOne);
     if (isset($options['exclude_heading']) && 'on' === $options['exclude_heading']) {
         //escape a and h1 -> h6
         $rsOne = PepVN_Data::escapeHtmlTagsAndContents($text, 'a;h1;h2;h3;h4;h5;h6');
         $text = $rsOne['content'];
         if (!empty($rsOne['patterns'])) {
             $patternsEscaped = array_merge($patternsEscaped, $rsOne['patterns']);
         }
         unset($rsOne);
     }
     $rsOne = PepVN_Data::escapeHtmlTags($text);
     $text = $rsOne['content'];
     if (!empty($rsOne['patterns'])) {
         $patternsEscaped = array_merge($patternsEscaped, $rsOne['patterns']);
     }
     unset($rsOne);
     $text = ' ' . trim($text) . ' ';
     $group_keywords1 = $this->_get_data_custom_keywords();
     $numberTotalLinksAdded = 0;
     $targetPostTypesForSearch = array('post', 'page');
     if ($group_keywords1) {
         if (!empty($group_keywords1)) {
             //calculate weights of keywords
             $group_keywords2 = array_keys($group_keywords1);
             if (!$autolinks_case_sensitive) {
                 $group_keywords2 = implode(';', $group_keywords2);
                 $group_keywords2 = PepVN_Data::strtolower($group_keywords2);
                 $group_keywords2 = $analyzeText->frequencyOfAppearanceKeywordsInText($group_keywords2, PepVN_Data::strtolower($text));
             } else {
                 $group_keywords2 = implode(';', $group_keywords2);
                 $group_keywords2 = $analyzeText->frequencyOfAppearanceKeywordsInText($group_keywords2, $text);
             }
             if (!empty($group_keywords2)) {
                 arsort($group_keywords2);
                 $numberTotalLinks = 0;
                 foreach ($group_keywords2 as $key1 => $value1) {
                     if ($maxlinks > 0) {
                         if ($numberTotalLinksAdded >= $maxlinks) {
                             break;
                         }
                     }
                     $targetKeywordClean = PepVN_Data::strtolower(PepVN_Data::cleanKeyword($key1));
                     $checkStatus1 = false;
                     $targetLink1 = false;
                     if (isset($group_keywords1[$key1])) {
                         $targetLink2 = false;
                         $targetLinkTitle2 = false;
                         if ($group_keywords1[$key1] && !empty($group_keywords1[$key1])) {
                             $targetLinks1 = $group_keywords1[$key1];
                             if (!empty($targetLinks1)) {
                                 shuffle($targetLinks1);
                                 foreach ($targetLinks1 as $key2 => $value2) {
                                     $value2 = trim($value2);
                                     if ($value2) {
                                         if (!isset(PepVN_Data::$cacheData[$classMethodKey]['linksAdded'][$value2])) {
                                             $targetLink2 = $value2;
                                             $targetLinkTitle2 = $key1;
                                             break;
                                         }
                                     }
                                 }
                             }
                         }
                         if (!$targetLink2) {
                             if ($targetPostTypesForSearch && !empty($targetPostTypesForSearch)) {
                                 $rsTwo = $analyzeText->search_posts(array('keyword' => $key1, 'post_types' => $targetPostTypesForSearch));
                                 foreach ($rsTwo as $keyTwo => $valueTwo) {
                                     unset($rsTwo[$keyTwo]);
                                     $checkStatus2 = false;
                                     if ($valueTwo['post_id'] != $currentPostId) {
                                         $checkStatus2 = true;
                                     } else {
                                         if ($currentPostType === 'post') {
                                             if ('on' === $options['link_to_postself']) {
                                                 $checkStatus2 = true;
                                             }
                                         } else {
                                             if ($currentPostType === 'page') {
                                                 if ('on' === $options['link_to_pageself']) {
                                                     $checkStatus2 = true;
                                                 }
                                             }
                                         }
                                     }
                                     if ($checkStatus2) {
                                         if (isset(PepVN_Data::$cacheData[$classMethodKey]['linksAdded'][$valueTwo['post_link']])) {
                                             $checkStatus2 = false;
                                         }
                                     }
                                     if ($checkStatus2) {
                                         $targetLink2 = $valueTwo['post_link'];
                                         $targetLinkTitle2 = $valueTwo['post_title'];
                                         break;
                                     }
                                 }
                                 unset($rsTwo);
                             }
                         }
                         if ($targetLink2) {
                             $patterns2 = '#([\\s ,;\\.\\t]+)(' . Utils::preg_quote($key1) . ')([\\s ,;\\.\\t]+)#';
                             if (!$autolinks_case_sensitive) {
                                 $patterns2 .= 'i';
                             }
                             $replace2 = '\\1<a href="' . $targetLink2 . '" ' . ('on' === $options['autolinks_new_window'] ? ' target="_bank" ' : '') . ' title="';
                             if ($targetLinkTitle2) {
                                 $targetLinkTitle2 = PepVN_Data::cleanKeyword($targetLinkTitle2);
                             }
                             if ($targetLinkTitle2) {
                                 $replace2 .= $targetLinkTitle2 . '">';
                             } else {
                                 $replace2 .= '\\2">';
                             }
                             $replace2 .= '<strong>\\2</strong></a>\\3';
                             $text = preg_replace($patterns2, $replace2, $text, 1, $count2);
                             $count2 = (int) $count2;
                             if ($count2 > 0) {
                                 PepVN_Data::$cacheData[$classMethodKey]['linksAdded'][$targetLink2] = 1;
                                 PepVN_Data::$cacheData[$classMethodKey]['keywordsAdded'][$targetKeywordClean] = 1;
                                 $rsTwo = PepVN_Data::escapeHtmlTagsAndContents($text, 'a;strong');
                                 $text = $rsTwo['content'];
                                 if (!empty($rsTwo['patterns'])) {
                                     $patternsEscaped = array_merge($patternsEscaped, $rsTwo['patterns']);
                                 }
                                 unset($rsTwo);
                                 $numberTotalLinksAdded += $count2;
                                 if ($maxlinks > 0) {
                                     if ($numberTotalLinksAdded >= $maxlinks) {
                                         break;
                                     }
                                 }
                             }
                         }
                     }
                 }
             }
         }
     }
     if (!empty($patternsEscaped)) {
         $text = str_replace(array_values($patternsEscaped), array_keys($patternsEscaped), $text);
     }
     unset($patternsEscaped);
     $text = $this->process_attributes_links($text);
     $text = trim($text);
     TempDataAndCacheFile::set_cache($keyCacheProcessText, $text, true);
     return $text;
 }