/**
  * Retrieve synonyms
  *
  * @param $processed_term
  * @param $engine
  * @param $term
  *
  * @return array|string
  */
 function find_synonyms($processed_term, $engine, $term)
 {
     global $searchwp;
     if (isset($engine)) {
         $engine = null;
     }
     if (!class_exists('SearchWP') || version_compare($searchwp->version, $this->min_searchwp_version, '<')) {
         return $term;
     }
     $synonyms = get_option($this->prefix . 'settings');
     // convert everything to lowercase
     if (!empty($synonyms)) {
         foreach ($synonyms as $synonym_id => $synonym) {
             if (!empty($synonyms[$synonym_id]['term'])) {
                 $synonyms[$synonym_id]['term'] = strtolower($synonyms[$synonym_id]['term']);
             }
             if (is_array($synonyms[$synonym_id]['synonyms']) && !empty($synonyms[$synonym_id]['synonyms'])) {
                 array_map('strtolower', $synonyms[$synonym_id]['synonyms']);
             }
         }
     }
     // we expect $term to be an array
     if (is_string($term)) {
         $term = array($term);
     }
     // we need to know whether stemming was enabled, we can deduce that based on whether the processed term is
     // different than the actual term, so we'll check that
     $stemming_enabled = false;
     if (is_array($processed_term) && is_array($term)) {
         foreach ($processed_term as $maybe_stemmed_term) {
             if (!in_array($maybe_stemmed_term, $term)) {
                 $stemming_enabled = true;
                 break;
             }
         }
     }
     if (is_array($term) && is_array($synonyms) && !empty($synonyms)) {
         foreach ($synonyms as $synonym) {
             if (in_array($synonym['term'], $term)) {
                 // there is a match, handle it
                 // break out where applicable
                 if (is_array($synonym['synonyms']) && !empty($synonym['synonyms'])) {
                     foreach ($synonym['synonyms'] as $maybe_synonym) {
                         if (false !== strpos($maybe_synonym, ' ')) {
                             $maybe_synonym = explode(' ', $maybe_synonym);
                             $synonym['synonyms'] = array_merge($synonym['synonyms'], $maybe_synonym);
                         }
                     }
                 }
                 // if the term was stemmed that means stemming is enabled so we need to stem the synonym(s) too...
                 if ($stemming_enabled) {
                     if (is_array($synonym['synonyms']) && class_exists('SearchWPStemmer')) {
                         foreach ($synonym['synonyms'] as $key => $unstemmed_synonym) {
                             $unstemmed = $unstemmed_synonym;
                             $maybeStemmed = apply_filters('searchwp_custom_stemmer', $unstemmed);
                             $stemmer = new SearchWPStemmer();
                             // if the term was stemmed via the filter use it, else generate our own
                             $stemmed_term = $unstemmed == $maybeStemmed ? $stemmer->stem($unstemmed_synonym) : $maybeStemmed;
                             $synonym['synonyms'][$key] = $stemmed_term;
                         }
                     }
                 }
                 // merge everything together
                 $term = array_merge($term, $synonym['synonyms']);
             }
         }
     }
     // if there's a processed term that means it was stemmed
     if (!is_array($processed_term)) {
         $processed_term = array($processed_term);
     }
     if (is_array($term)) {
         $term = array_merge($processed_term, $term);
     } else {
         $term = $processed_term;
     }
     // handle any replacements
     if (is_array($synonyms) && !empty($synonyms)) {
         foreach ($synonyms as $synonym) {
             if (in_array($synonym['term'], $term)) {
                 if ($synonym['replace']) {
                     // remove the source term
                     foreach ($term as $key => $term_term) {
                         if ($term_term == $synonym['term']) {
                             unset($term[$key]);
                         }
                     }
                 }
             }
         }
     }
     $term = array_values(array_unique($term));
     $term = array_map('sanitize_text_field', $term);
     $term = array_map('strtolower', $term);
     return $term;
 }
	/**
	 * Insert an array of terms into the terms table and retrieve all term IDs from submitted terms
	 *
	 * @since 1.0
	 *
	 * @param array $termsArray
	 *
	 * @return array
	 */
	function pre_process_terms( $termsArray = array() ) {
		global $wpdb;

		if ( ! is_array( $termsArray ) || empty( $termsArray ) ) {
			return array();
		}

		// get our database vars prepped
		$termsTable = $wpdb->prefix . SEARCHWP_DBPREFIX . 'terms';

		$stemmer = new SearchWPStemmer();

		$terms = $newTerms = $newTermsSQL = array();

		while ( ( $counts = current( $termsArray ) ) !== false ) {
			$termToAdd = (string) $counts['term'];

			// WordPress 4.2 added emoji support which caused problems for the array storage
			// of terms and their term counts since the terms themselves were array keys
			// and PHP doesn't allow emoji in array keys so the array keys were switched to
			// an underscore-prefixed md5 value and the term stored within that

			// generate the reverse (UTF-8)
			preg_match_all( '/./us', $termToAdd, $contentr );
			$revTerm = join( '', array_reverse( $contentr[0] ) );

			// find the stem
			$unstemmed = $termToAdd;
			$maybeStemmed = apply_filters( 'searchwp_custom_stemmer', $unstemmed );

			// if the term was stemmed via the filter use it, else generate our own
			$stem = ( $unstemmed == $maybeStemmed ) ? $stemmer->stem( $termToAdd ) : $maybeStemmed;

			// store the record
			$terms[] = $wpdb->prepare( '%s', $termToAdd );
			$newTermsSQL[] = '(%s,%s,%s)';
			$newTerms = array_merge( $newTerms, array( $termToAdd, $revTerm, $stem ) );
			next( $termsArray );
		}
		reset( $termsArray );

		// insert all of the terms into the terms table so each gets an ID
		$attemptCount = 1;
		$maxAttempts = absint( apply_filters( 'searchwp_indexer_max_attempts', 4 ) ) + 1;  // try to recover 5 times
		$insert_sql = $wpdb->prepare( "INSERT IGNORE INTO {$termsTable} (term,reverse,stem) VALUES " . implode( ',', $newTermsSQL ), $newTerms );
		$insert_result = $wpdb->query( $insert_sql );
		while ( ( is_wp_error( $insert_result ) || false === $insert_result ) && $attemptCount < $maxAttempts ) {
			// sometimes a deadlock can happen, wait a second then try again
			do_action( 'searchwp_log', 'INSERT Deadlock ' . $attemptCount . '/' . $maxAttempts );
			sleep( 3 );
			$attemptCount++;
		}

		// deadlocking could be a red herring, there's a remote chance the database table
		// doesn't even exist, so we need to handle that
		if ( ( is_wp_error( $insert_result ) || false === $insert_result ) ) {
			do_action( 'searchwp_log', 'Post failed indexing, flagging ' . $this->post->ID );

			// this will call out this post as problematic in the WP admin
			update_post_meta( $this->post->ID, '_' . SEARCHWP_PREFIX . 'attempts', absint( $this->maxAttemptsToIndex ) + 1 );
			update_post_meta( $this->post->ID, '_' . SEARCHWP_PREFIX . 'skip', true );
			delete_post_meta( $this->post->ID, '_' . SEARCHWP_PREFIX . 'last_index' );

			die(); // this is only an issue if there was a catastrophic problem (e.g. database tables didn't exist)

		} elseif ( $attemptCount > 1 ) {
			do_action( 'searchwp_log', 'Recovered from Deadlock at ' . $attemptCount . '/' . $maxAttempts );
		}

		// retrieve IDs for all terms
		$terms_sql = "-- noinspection SqlDialectInspection
					SELECT id, term FROM {$termsTable} WHERE term IN( " . implode( ',', $terms ) . ' )';  // already prepared earlier in this method
		$termIDs = $wpdb->get_results( $terms_sql, 'OBJECT_K' );

		// match term IDs to original terms with counts
		if ( is_array( $termIDs ) ) {
			while ( ( $termIDMeta = current( $termIDs ) ) !== false ) {

				/** @noinspection PhpUnusedLocalVariableInspection */
				$termID = key( $termIDs );

				// append the term ID to the original $termsArray
				while ( ( $counts = current( $termsArray ) ) !== false ) {
					$termsArrayTerm = (string) $counts['term'];
					if ( $termsArrayTerm == $termIDMeta->term ) {
						$term_id = '_' . md5( $termIDMeta->term );
						if ( isset( $termIDMeta->id ) ) {
							$termsArray[ $term_id ]['id'] = absint( $termIDMeta->id );
						}
						break;
					}
					next( $termsArray );
				}
				reset( $termsArray );
				next( $termIDs );
			}
			reset( $termIDs );
		}

		return $termsArray;
	}
Esempio n. 3
0
 /**
  * Insert an array of terms into the terms table and retrieve all term IDs from submitted terms
  * @param array $termsArray
  *
  * @return array
  * @since 1.0
  */
 function preProcessTerms($termsArray = array())
 {
     global $wpdb;
     if (!is_array($termsArray) || empty($termsArray)) {
         return array();
     }
     // get our database vars prepped
     $termsTable = $wpdb->prefix . SEARCHWP_DBPREFIX . 'terms';
     $stemmer = new SearchWPStemmer();
     $terms = $newTerms = $newTermsSQL = array();
     while (($counts = current($termsArray)) !== false) {
         $termToAdd = (string) key($termsArray);
         // generate the reverse (UTF-8)
         preg_match_all('/./us', $termToAdd, $contentr);
         $revTerm = join('', array_reverse($contentr[0]));
         // find the stem
         $unstemmed = $termToAdd;
         $maybeStemmed = apply_filters('searchwp_custom_stemmer', $unstemmed);
         // if the term was stemmed via the filter use it, else generate our own
         $stem = $unstemmed == $maybeStemmed ? $stemmer->stem($termToAdd) : $maybeStemmed;
         // store the record
         $terms[] = $wpdb->prepare('%s', $termToAdd);
         $newTermsSQL[] = "(%s,%s,%s)";
         $newTerms = array_merge($newTerms, array($termToAdd, $revTerm, $stem));
         next($termsArray);
     }
     reset($termsArray);
     // insert all of the terms into the terms table so each gets an ID
     $attemptCount = 1;
     $maxAttempts = absint(apply_filters('searchwp_indexer_max_attempts', 4)) + 1;
     // try to recover 5 times
     $insert_result = $wpdb->query($wpdb->prepare("INSERT IGNORE INTO {$termsTable} (term,reverse,stem) VALUES " . implode(',', $newTermsSQL), $newTerms));
     while ((is_wp_error($insert_result) || false === $insert_result) && $attemptCount < $maxAttempts) {
         // sometimes a deadlock can happen, wait a second then try again
         do_action('searchwp_log', 'INSERT Deadlock ' . $attemptCount . '/' . $maxAttempts);
         sleep(3);
         $attemptCount++;
     }
     if ($attemptCount > 1) {
         do_action('searchwp_log', 'Recovered from Deadlock at ' . $attemptCount . '/' . $maxAttempts);
     }
     // retrieve IDs for all terms
     $terms_sql = "SELECT id, term FROM {$termsTable} WHERE term IN( " . implode(',', $terms) . " )";
     // already prepared
     $termIDs = $wpdb->get_results($terms_sql, 'OBJECT_K');
     // match term IDs to original terms with counts
     if (is_array($termIDs)) {
         while (($termIDMeta = current($termIDs)) !== false) {
             $termID = key($termIDs);
             // append the term ID to the original $termsArray
             while (($counts = current($termsArray)) !== false) {
                 $termsArrayTerm = (string) key($termsArray);
                 if ($termsArrayTerm == $termIDMeta->term) {
                     if (isset($termIDMeta->id)) {
                         $termsArray[$termsArrayTerm]['id'] = absint($termIDMeta->id);
                     }
                     break;
                 }
                 next($termsArray);
             }
             reset($termsArray);
             next($termIDs);
         }
         reset($termIDs);
     }
     return $termsArray;
 }
 /**
  * Prepare (tokenize) terms
  *
  * @param $terms
  *
  * @return mixed|string|void
  */
 function prep_terms($terms)
 {
     global $wpdb;
     $searchwp = SWP();
     $original_terms = $terms;
     $whitelisted_terms = array();
     // allow developers to manually define which variable should be used for the search term
     $terms = apply_filters('searchwp_th_query', $terms);
     if (empty($terms)) {
         $terms = get_search_query();
     }
     // make sure it's a string
     if (is_array($terms)) {
         $terms = implode(' ', $terms);
     } else {
         $terms = (string) $terms;
     }
     // check against the regex pattern whitelist
     $terms = ' ' . $terms . ' ';
     if (method_exists($searchwp, 'extract_terms_using_pattern_whitelist')) {
         // added in SearchWP 1.9.5
         // extract terms based on whitelist pattern, allowing for approved indexing of terms with punctuation
         $whitelisted_terms = $searchwp->extract_terms_using_pattern_whitelist($terms);
         // add the buffer so we can whole-word replace
         $terms = str_replace(' ', '  ', $terms);
         // remove the matches
         if (!empty($whitelisted_terms)) {
             $terms = str_ireplace($whitelisted_terms, '', $terms);
         }
         // clean up the double space flag we used
         $terms = str_replace('  ', ' ', $terms);
     }
     // rebuild our terms array
     $terms = explode(' ', $terms);
     // maybe append our whitelist
     if (is_array($whitelisted_terms) && !empty($whitelisted_terms)) {
         $whitelisted_terms = array_map('trim', $whitelisted_terms);
         $terms = array_merge($terms, $whitelisted_terms);
     }
     // make sure it's an array
     if (!is_array($terms)) {
         $terms = array($terms);
     }
     // if stemming is enabled, append the stems of all terms
     $engine = $this->search_args['engine'];
     $stemming_enabled = false;
     if (!empty($searchwp->settings['engines'][$engine])) {
         foreach ($searchwp->settings['engines'][$engine] as $post_type => $post_type_settings) {
             if (!empty($post_type_settings['options']['stem'])) {
                 $stemming_enabled = true;
                 break;
             }
         }
     }
     $terms = array_filter($terms, 'strlen');
     $stems = array();
     if ($stemming_enabled && class_exists('SearchWPStemmer')) {
         $stemmer = new SearchWPStemmer();
         foreach ($terms as $term) {
             // append stems to the array
             $unstemmed = $term;
             $maybe_stemmed = apply_filters('searchwp_custom_stemmer', $unstemmed);
             // if the term was stemmed via the filter use it, else generate our own
             $stem = $unstemmed === $maybe_stemmed ? $stemmer->stem($term) : $maybe_stemmed;
             $stems[] = $stem;
         }
         $terms = array_merge($terms, $stems);
         $terms = array_unique($terms);
         // we also need the inverse (grab all of the source terms that have the same stem)
         if (!empty($stems)) {
             $prefix = $wpdb->prefix . SEARCHWP_DBPREFIX;
             $prepare = '';
             foreach ($stems as $stem) {
                 $prepare[] = '%s';
             }
             $sql = "SELECT term\n\t\t\t\t\tFROM {$prefix}terms\n\t\t\t\t\tWHERE stem IN ( " . implode(',', $prepare) . " )";
             $prepared = $wpdb->prepare($sql, $stems);
             $source_terms = $wpdb->get_col($prepared);
             $terms = array_merge($terms, $source_terms);
             $terms = array_unique($terms);
         }
     }
     // make sure the search query has priority so it's processed first
     if (!is_array($original_terms)) {
         $original_terms = array($original_terms);
     }
     $terms = array_merge($original_terms, $terms);
     $terms = array_unique($terms);
     // TODO: BEGIN REFACTOR002
     // apply the same term processing that SearchWP core would
     // (which requires the search query be formatted as an array)
     if (!is_array($terms)) {
         $terms = explode(' ', $terms);
     }
     foreach ($terms as $key => $term) {
         $these_terms = apply_filters('searchwp_term_in', array($term), 'searchwp_term_highlight', $term);
         if (!empty($these_terms)) {
             $terms = array_merge($terms, $these_terms);
         }
     }
     // implode back into a string because that's what we're working with in this context
     $terms = array_unique($terms);
     // END REFACTOR002
     // sanitize
     $terms = array_map('sanitize_text_field', $terms);
     return $terms;
 }