public function search_post_by_text($input_text, $input_options = false) { $classMethodKey = crc32(__CLASS__ . __METHOD__); if (!$input_options) { $input_options = array(); } $input_options['limit'] = (int) $input_options['limit']; $input_options['exclude_posts_ids'] = (array) $input_options['exclude_posts_ids']; if (isset(PepVN_Data::$cacheData[$classMethodKey]['posts_ids_added']) && PepVN_Data::$cacheData[$classMethodKey]['posts_ids_added'] && !PepVN_Data::isEmptyArray(PepVN_Data::$cacheData[$classMethodKey]['posts_ids_added'])) { $valueTemp = array_keys(PepVN_Data::$cacheData[$classMethodKey]['posts_ids_added']); $input_options['exclude_posts_ids'] = array_merge($input_options['exclude_posts_ids'], $valueTemp); } $input_options['exclude_posts_ids'] = array_unique($input_options['exclude_posts_ids']); arsort($input_options['exclude_posts_ids']); $input_options['exclude_posts_ids'] = array_values($input_options['exclude_posts_ids']); if (!isset($input_options['key_cache'])) { $input_options['key_cache'] = array(); } $input_options['key_cache'] = (array) $input_options['key_cache']; $keyCacheProcessText = Utils::hashKey(array($classMethodKey, $input_text, $input_options, 'search_post_by_text')); $tmp = TempDataAndCacheFile::get_cache($keyCacheProcessText, true); if (null !== $tmp) { return $tmp; } $wpExtend = $this->di->getShared('wpExtend'); $analyzeText = $this->di->getShared('analyzeText'); $keyCacheGroupNameOfTagsAndCategories = array($classMethodKey, 'groupNameOfTagsAndCategories'); $keyCacheGroupNameOfTagsAndCategories = Utils::hashKey($keyCacheGroupNameOfTagsAndCategories); $groupNameOfTagsAndCategories = TempDataAndCacheFile::get_cache($keyCacheGroupNameOfTagsAndCategories); if (null === $groupNameOfTagsAndCategories) { $groupNameOfTagsAndCategories = array(); $valueTemp = $wpExtend->getAndParseCategories(); $valueTemp = array_keys($valueTemp); $groupNameOfTagsAndCategories = array_merge($groupNameOfTagsAndCategories, $valueTemp); unset($valueTemp); $valueTemp = $wpExtend->getAndParseTags(); $valueTemp = array_keys($valueTemp); $groupNameOfTagsAndCategories = array_merge($groupNameOfTagsAndCategories, $valueTemp); unset($valueTemp); $groupNameOfTagsAndCategories = array_values($groupNameOfTagsAndCategories); $groupNameOfTagsAndCategories = $this->_clean_terms($groupNameOfTagsAndCategories); TempDataAndCacheFile::set_cache($keyCacheGroupNameOfTagsAndCategories, $groupNameOfTagsAndCategories); } $rsGetKeywordsFromText = AnalyzeText::analysisKeyword_GetKeywordsFromText(array('contents' => $input_text, 'min_word' => 1, 'max_word' => 6, 'min_occur' => 2, 'min_char_each_word' => 4)); $groupKeywordsFromText = array(); if (isset($rsGetKeywordsFromText['data']) && is_array($rsGetKeywordsFromText['data'])) { foreach ($rsGetKeywordsFromText['data'] as $keyOne => $valueOne) { if ($valueOne && is_array($valueOne)) { foreach ($valueOne as $keyTwo => $valueTwo) { if ($keyTwo) { $valueTwo = (int) $valueTwo; $groupKeywordsFromText[$keyTwo] = ceil($valueTwo * (PepVN_Data::countWords($keyTwo) * 1.5)); } } } } } arsort($groupKeywordsFromText); $groupKeywordsFromText2 = $groupKeywordsFromText; $groupKeywordsFromText2 = array_slice($groupKeywordsFromText2, 0, 10); $groupKeywordsFromText3 = array(); foreach ($groupKeywordsFromText as $keyOne => $valueOne) { unset($groupKeywordsFromText[$keyOne]); if (in_array($keyOne, $groupNameOfTagsAndCategories)) { $groupKeywordsFromText3[$keyOne] = $valueOne; } } unset($groupNameOfTagsAndCategories); arsort($groupKeywordsFromText3); $groupKeywordsFromText3 = array_slice($groupKeywordsFromText3, 0, 10); $groupKeywordsFromText4 = array(); foreach ($groupKeywordsFromText2 as $keyOne => $valueOne) { unset($groupKeywordsFromText2[$keyOne]); if (!isset($groupKeywordsFromText4[$keyOne])) { $groupKeywordsFromText4[$keyOne] = 0; } $groupKeywordsFromText4[$keyOne] += (int) $valueOne; } foreach ($groupKeywordsFromText3 as $keyOne => $valueOne) { unset($groupKeywordsFromText3[$keyOne]); if (!isset($groupKeywordsFromText4[$keyOne])) { $groupKeywordsFromText4[$keyOne] = 0; } $groupKeywordsFromText4[$keyOne] += (int) $valueOne * 2; } if (isset($input_options['group_text_weight']) && !PepVN_Data::isEmptyArray($input_options['group_text_weight'])) { foreach ($input_options['group_text_weight'] as $keyOne => $valueOne) { if (isset($valueOne['text']) && $valueOne['text']) { $valueOne['text'] = AnalyzeText::cleanRawTextForProcessSearch($valueOne['text']); if ($valueOne['text']) { foreach ($groupKeywordsFromText4 as $keyTwo => $valueTwo) { $valueTemp2 = substr_count($valueOne['text'], PepVN_Data::strtolower($keyTwo)); if ($valueTemp2) { $weight1 = $valueOne['weight']; $numberImportantOccurrenceConsecutive = 1; $keyTwoTmp1 = explode(' ', $keyTwo); foreach ($keyTwoTmp1 as $keyThree => $valueThree) { if (preg_match('#^[^a-z0-9' . preg_quote('`~!@#$%^&*()-_=+{}[]\\\\|;:\'",.<>/?+', '#') . ']+#', $valueThree)) { $weight1 = $weight1 * 1.6 * ($numberImportantOccurrenceConsecutive * 1.11); $numberImportantOccurrenceConsecutive++; } else { $numberImportantOccurrenceConsecutive = 1; } } unset($keyTwoTmp1); $groupKeywordsFromText4[$keyTwo] += (int) $valueTemp2 * $weight1; } } } } } } $rsSearchPosts = $analyzeText->find_related_posts(array('keywords' => $groupKeywordsFromText4, 'limit' => $input_options['limit'], 'exclude_posts_ids' => $input_options['exclude_posts_ids'], 'post_id_less_than' => $input_options['post_id_less_than'])); if ($rsSearchPosts && !empty($rsSearchPosts)) { foreach ($rsSearchPosts as $key1 => $value1) { if (isset($value1['post_id']) && $value1['post_id']) { PepVN_Data::$cacheData[$classMethodKey]['posts_ids_added'][$value1['post_id']] = $value1; } } } TempDataAndCacheFile::set_cache($keyCacheProcessText, $rsSearchPosts, true); return $rsSearchPosts; }
public static function cleanRawTextForProcessSearch($input_text) { $input_text = (array) $input_text; $input_text = implode(' ', $input_text); $input_text = self::decodeText($input_text); $input_text = strip_tags($input_text); $input_text = PepVN_Data::strtolower($input_text); $input_text = self::analysisKeyword_RemovePunctuations($input_text); $input_text = Text::replaceSpecialChar($input_text); $input_text = PepVN_Data::reduceSpace($input_text); return $input_text; }
public function process_text($text) { $options = self::getOption(); $classMethodKey = Hash::crc32b(__CLASS__ . '_' . __METHOD__); $textChecksum = Utils::hashKey($text); $keyCacheProcessText = Utils::hashKey(array($classMethodKey, $textChecksum, 'keyCacheProcessText', $options)); $tmp = TempDataAndCacheFile::get_cache($keyCacheProcessText, true); if (null !== $tmp) { return $tmp; } global $post; $wpExtend = $this->di->getShared('wpExtend'); $analyzeText = $this->di->getShared('analyzeText'); $rsGetTerms = $wpExtend->getTermsByPostId($post->ID); $autolinks_case_sensitive = false; if (isset($options['autolinks_case_sensitive']) && 'on' === $options['autolinks_case_sensitive']) { $autolinks_case_sensitive = true; } $maxlinks = (int) $options['maxlinks']; if ($maxlinks < 0) { $maxlinks = 0; } $currentPostId = 0; if (isset($post->ID) && $post->ID) { $currentPostId = (int) $post->ID; } $currentPostType = ''; if (isset($post->post_type) && $post->post_type) { $currentPostType = $post->post_type; } $patternsEscaped = array(); $rsOne = PepVN_Data::escapeHtmlTagsAndContents($text, 'a;pre;script;style;link;meta;input;textarea;iframe;video;audio;object'); $text = $rsOne['content']; if (!empty($rsOne['patterns'])) { $patternsEscaped = array_merge($patternsEscaped, $rsOne['patterns']); } unset($rsOne); if (isset($options['exclude_heading']) && 'on' === $options['exclude_heading']) { //escape a and h1 -> h6 $rsOne = PepVN_Data::escapeHtmlTagsAndContents($text, 'a;h1;h2;h3;h4;h5;h6'); $text = $rsOne['content']; if (!empty($rsOne['patterns'])) { $patternsEscaped = array_merge($patternsEscaped, $rsOne['patterns']); } unset($rsOne); } $rsOne = PepVN_Data::escapeHtmlTags($text); $text = $rsOne['content']; if (!empty($rsOne['patterns'])) { $patternsEscaped = array_merge($patternsEscaped, $rsOne['patterns']); } unset($rsOne); $text = ' ' . trim($text) . ' '; $group_keywords1 = $this->_get_data_custom_keywords(); $numberTotalLinksAdded = 0; $targetPostTypesForSearch = array('post', 'page'); if ($group_keywords1) { if (!empty($group_keywords1)) { //calculate weights of keywords $group_keywords2 = array_keys($group_keywords1); if (!$autolinks_case_sensitive) { $group_keywords2 = implode(';', $group_keywords2); $group_keywords2 = PepVN_Data::strtolower($group_keywords2); $group_keywords2 = $analyzeText->frequencyOfAppearanceKeywordsInText($group_keywords2, PepVN_Data::strtolower($text)); } else { $group_keywords2 = implode(';', $group_keywords2); $group_keywords2 = $analyzeText->frequencyOfAppearanceKeywordsInText($group_keywords2, $text); } if (!empty($group_keywords2)) { arsort($group_keywords2); $numberTotalLinks = 0; foreach ($group_keywords2 as $key1 => $value1) { if ($maxlinks > 0) { if ($numberTotalLinksAdded >= $maxlinks) { break; } } $targetKeywordClean = PepVN_Data::strtolower(PepVN_Data::cleanKeyword($key1)); $checkStatus1 = false; $targetLink1 = false; if (isset($group_keywords1[$key1])) { $targetLink2 = false; $targetLinkTitle2 = false; if ($group_keywords1[$key1] && !empty($group_keywords1[$key1])) { $targetLinks1 = $group_keywords1[$key1]; if (!empty($targetLinks1)) { shuffle($targetLinks1); foreach ($targetLinks1 as $key2 => $value2) { $value2 = trim($value2); if ($value2) { if (!isset(PepVN_Data::$cacheData[$classMethodKey]['linksAdded'][$value2])) { $targetLink2 = $value2; $targetLinkTitle2 = $key1; break; } } } } } if (!$targetLink2) { if ($targetPostTypesForSearch && !empty($targetPostTypesForSearch)) { $rsTwo = $analyzeText->search_posts(array('keyword' => $key1, 'post_types' => $targetPostTypesForSearch)); foreach ($rsTwo as $keyTwo => $valueTwo) { unset($rsTwo[$keyTwo]); $checkStatus2 = false; if ($valueTwo['post_id'] != $currentPostId) { $checkStatus2 = true; } else { if ($currentPostType === 'post') { if ('on' === $options['link_to_postself']) { $checkStatus2 = true; } } else { if ($currentPostType === 'page') { if ('on' === $options['link_to_pageself']) { $checkStatus2 = true; } } } } if ($checkStatus2) { if (isset(PepVN_Data::$cacheData[$classMethodKey]['linksAdded'][$valueTwo['post_link']])) { $checkStatus2 = false; } } if ($checkStatus2) { $targetLink2 = $valueTwo['post_link']; $targetLinkTitle2 = $valueTwo['post_title']; break; } } unset($rsTwo); } } if ($targetLink2) { $patterns2 = '#([\\s ,;\\.\\t]+)(' . Utils::preg_quote($key1) . ')([\\s ,;\\.\\t]+)#'; if (!$autolinks_case_sensitive) { $patterns2 .= 'i'; } $replace2 = '\\1<a href="' . $targetLink2 . '" ' . ('on' === $options['autolinks_new_window'] ? ' target="_bank" ' : '') . ' title="'; if ($targetLinkTitle2) { $targetLinkTitle2 = PepVN_Data::cleanKeyword($targetLinkTitle2); } if ($targetLinkTitle2) { $replace2 .= $targetLinkTitle2 . '">'; } else { $replace2 .= '\\2">'; } $replace2 .= '<strong>\\2</strong></a>\\3'; $text = preg_replace($patterns2, $replace2, $text, 1, $count2); $count2 = (int) $count2; if ($count2 > 0) { PepVN_Data::$cacheData[$classMethodKey]['linksAdded'][$targetLink2] = 1; PepVN_Data::$cacheData[$classMethodKey]['keywordsAdded'][$targetKeywordClean] = 1; $rsTwo = PepVN_Data::escapeHtmlTagsAndContents($text, 'a;strong'); $text = $rsTwo['content']; if (!empty($rsTwo['patterns'])) { $patternsEscaped = array_merge($patternsEscaped, $rsTwo['patterns']); } unset($rsTwo); $numberTotalLinksAdded += $count2; if ($maxlinks > 0) { if ($numberTotalLinksAdded >= $maxlinks) { break; } } } } } } } } } if (!empty($patternsEscaped)) { $text = str_replace(array_values($patternsEscaped), array_keys($patternsEscaped), $text); } unset($patternsEscaped); $text = $this->process_attributes_links($text); $text = trim($text); TempDataAndCacheFile::set_cache($keyCacheProcessText, $text, true); return $text; }