/** * Retrieve synonyms * * @param $processed_term * @param $engine * @param $term * * @return array|string */ function find_synonyms($processed_term, $engine, $term) { global $searchwp; if (isset($engine)) { $engine = null; } if (!class_exists('SearchWP') || version_compare($searchwp->version, $this->min_searchwp_version, '<')) { return $term; } $synonyms = get_option($this->prefix . 'settings'); // convert everything to lowercase if (!empty($synonyms)) { foreach ($synonyms as $synonym_id => $synonym) { if (!empty($synonyms[$synonym_id]['term'])) { $synonyms[$synonym_id]['term'] = strtolower($synonyms[$synonym_id]['term']); } if (is_array($synonyms[$synonym_id]['synonyms']) && !empty($synonyms[$synonym_id]['synonyms'])) { array_map('strtolower', $synonyms[$synonym_id]['synonyms']); } } } // we expect $term to be an array if (is_string($term)) { $term = array($term); } // we need to know whether stemming was enabled, we can deduce that based on whether the processed term is // different than the actual term, so we'll check that $stemming_enabled = false; if (is_array($processed_term) && is_array($term)) { foreach ($processed_term as $maybe_stemmed_term) { if (!in_array($maybe_stemmed_term, $term)) { $stemming_enabled = true; break; } } } if (is_array($term) && is_array($synonyms) && !empty($synonyms)) { foreach ($synonyms as $synonym) { if (in_array($synonym['term'], $term)) { // there is a match, handle it // break out where applicable if (is_array($synonym['synonyms']) && !empty($synonym['synonyms'])) { foreach ($synonym['synonyms'] as $maybe_synonym) { if (false !== strpos($maybe_synonym, ' ')) { $maybe_synonym = explode(' ', $maybe_synonym); $synonym['synonyms'] = array_merge($synonym['synonyms'], $maybe_synonym); } } } // if the term was stemmed that means stemming is enabled so we need to stem the synonym(s) too... if ($stemming_enabled) { if (is_array($synonym['synonyms']) && class_exists('SearchWPStemmer')) { foreach ($synonym['synonyms'] as $key => $unstemmed_synonym) { $unstemmed = $unstemmed_synonym; $maybeStemmed = apply_filters('searchwp_custom_stemmer', $unstemmed); $stemmer = new SearchWPStemmer(); // if the term was stemmed via the filter use it, else generate our own $stemmed_term = $unstemmed == $maybeStemmed ? $stemmer->stem($unstemmed_synonym) : $maybeStemmed; $synonym['synonyms'][$key] = $stemmed_term; } } } // merge everything together $term = array_merge($term, $synonym['synonyms']); } } } // if there's a processed term that means it was stemmed if (!is_array($processed_term)) { $processed_term = array($processed_term); } if (is_array($term)) { $term = array_merge($processed_term, $term); } else { $term = $processed_term; } // handle any replacements if (is_array($synonyms) && !empty($synonyms)) { foreach ($synonyms as $synonym) { if (in_array($synonym['term'], $term)) { if ($synonym['replace']) { // remove the source term foreach ($term as $key => $term_term) { if ($term_term == $synonym['term']) { unset($term[$key]); } } } } } } $term = array_values(array_unique($term)); $term = array_map('sanitize_text_field', $term); $term = array_map('strtolower', $term); return $term; }
/** * Insert an array of terms into the terms table and retrieve all term IDs from submitted terms * * @since 1.0 * * @param array $termsArray * * @return array */ function pre_process_terms( $termsArray = array() ) { global $wpdb; if ( ! is_array( $termsArray ) || empty( $termsArray ) ) { return array(); } // get our database vars prepped $termsTable = $wpdb->prefix . SEARCHWP_DBPREFIX . 'terms'; $stemmer = new SearchWPStemmer(); $terms = $newTerms = $newTermsSQL = array(); while ( ( $counts = current( $termsArray ) ) !== false ) { $termToAdd = (string) $counts['term']; // WordPress 4.2 added emoji support which caused problems for the array storage // of terms and their term counts since the terms themselves were array keys // and PHP doesn't allow emoji in array keys so the array keys were switched to // an underscore-prefixed md5 value and the term stored within that // generate the reverse (UTF-8) preg_match_all( '/./us', $termToAdd, $contentr ); $revTerm = join( '', array_reverse( $contentr[0] ) ); // find the stem $unstemmed = $termToAdd; $maybeStemmed = apply_filters( 'searchwp_custom_stemmer', $unstemmed ); // if the term was stemmed via the filter use it, else generate our own $stem = ( $unstemmed == $maybeStemmed ) ? $stemmer->stem( $termToAdd ) : $maybeStemmed; // store the record $terms[] = $wpdb->prepare( '%s', $termToAdd ); $newTermsSQL[] = '(%s,%s,%s)'; $newTerms = array_merge( $newTerms, array( $termToAdd, $revTerm, $stem ) ); next( $termsArray ); } reset( $termsArray ); // insert all of the terms into the terms table so each gets an ID $attemptCount = 1; $maxAttempts = absint( apply_filters( 'searchwp_indexer_max_attempts', 4 ) ) + 1; // try to recover 5 times $insert_sql = $wpdb->prepare( "INSERT IGNORE INTO {$termsTable} (term,reverse,stem) VALUES " . implode( ',', $newTermsSQL ), $newTerms ); $insert_result = $wpdb->query( $insert_sql ); while ( ( is_wp_error( $insert_result ) || false === $insert_result ) && $attemptCount < $maxAttempts ) { // sometimes a deadlock can happen, wait a second then try again do_action( 'searchwp_log', 'INSERT Deadlock ' . $attemptCount . '/' . $maxAttempts ); sleep( 3 ); $attemptCount++; } // deadlocking could be a red herring, there's a remote chance the database table // doesn't even exist, so we need to handle that if ( ( is_wp_error( $insert_result ) || false === $insert_result ) ) { do_action( 'searchwp_log', 'Post failed indexing, flagging ' . $this->post->ID ); // this will call out this post as problematic in the WP admin update_post_meta( $this->post->ID, '_' . SEARCHWP_PREFIX . 'attempts', absint( $this->maxAttemptsToIndex ) + 1 ); update_post_meta( $this->post->ID, '_' . SEARCHWP_PREFIX . 'skip', true ); delete_post_meta( $this->post->ID, '_' . SEARCHWP_PREFIX . 'last_index' ); die(); // this is only an issue if there was a catastrophic problem (e.g. database tables didn't exist) } elseif ( $attemptCount > 1 ) { do_action( 'searchwp_log', 'Recovered from Deadlock at ' . $attemptCount . '/' . $maxAttempts ); } // retrieve IDs for all terms $terms_sql = "-- noinspection SqlDialectInspection SELECT id, term FROM {$termsTable} WHERE term IN( " . implode( ',', $terms ) . ' )'; // already prepared earlier in this method $termIDs = $wpdb->get_results( $terms_sql, 'OBJECT_K' ); // match term IDs to original terms with counts if ( is_array( $termIDs ) ) { while ( ( $termIDMeta = current( $termIDs ) ) !== false ) { /** @noinspection PhpUnusedLocalVariableInspection */ $termID = key( $termIDs ); // append the term ID to the original $termsArray while ( ( $counts = current( $termsArray ) ) !== false ) { $termsArrayTerm = (string) $counts['term']; if ( $termsArrayTerm == $termIDMeta->term ) { $term_id = '_' . md5( $termIDMeta->term ); if ( isset( $termIDMeta->id ) ) { $termsArray[ $term_id ]['id'] = absint( $termIDMeta->id ); } break; } next( $termsArray ); } reset( $termsArray ); next( $termIDs ); } reset( $termIDs ); } return $termsArray; }
/** * Insert an array of terms into the terms table and retrieve all term IDs from submitted terms * @param array $termsArray * * @return array * @since 1.0 */ function preProcessTerms($termsArray = array()) { global $wpdb; if (!is_array($termsArray) || empty($termsArray)) { return array(); } // get our database vars prepped $termsTable = $wpdb->prefix . SEARCHWP_DBPREFIX . 'terms'; $stemmer = new SearchWPStemmer(); $terms = $newTerms = $newTermsSQL = array(); while (($counts = current($termsArray)) !== false) { $termToAdd = (string) key($termsArray); // generate the reverse (UTF-8) preg_match_all('/./us', $termToAdd, $contentr); $revTerm = join('', array_reverse($contentr[0])); // find the stem $unstemmed = $termToAdd; $maybeStemmed = apply_filters('searchwp_custom_stemmer', $unstemmed); // if the term was stemmed via the filter use it, else generate our own $stem = $unstemmed == $maybeStemmed ? $stemmer->stem($termToAdd) : $maybeStemmed; // store the record $terms[] = $wpdb->prepare('%s', $termToAdd); $newTermsSQL[] = "(%s,%s,%s)"; $newTerms = array_merge($newTerms, array($termToAdd, $revTerm, $stem)); next($termsArray); } reset($termsArray); // insert all of the terms into the terms table so each gets an ID $attemptCount = 1; $maxAttempts = absint(apply_filters('searchwp_indexer_max_attempts', 4)) + 1; // try to recover 5 times $insert_result = $wpdb->query($wpdb->prepare("INSERT IGNORE INTO {$termsTable} (term,reverse,stem) VALUES " . implode(',', $newTermsSQL), $newTerms)); while ((is_wp_error($insert_result) || false === $insert_result) && $attemptCount < $maxAttempts) { // sometimes a deadlock can happen, wait a second then try again do_action('searchwp_log', 'INSERT Deadlock ' . $attemptCount . '/' . $maxAttempts); sleep(3); $attemptCount++; } if ($attemptCount > 1) { do_action('searchwp_log', 'Recovered from Deadlock at ' . $attemptCount . '/' . $maxAttempts); } // retrieve IDs for all terms $terms_sql = "SELECT id, term FROM {$termsTable} WHERE term IN( " . implode(',', $terms) . " )"; // already prepared $termIDs = $wpdb->get_results($terms_sql, 'OBJECT_K'); // match term IDs to original terms with counts if (is_array($termIDs)) { while (($termIDMeta = current($termIDs)) !== false) { $termID = key($termIDs); // append the term ID to the original $termsArray while (($counts = current($termsArray)) !== false) { $termsArrayTerm = (string) key($termsArray); if ($termsArrayTerm == $termIDMeta->term) { if (isset($termIDMeta->id)) { $termsArray[$termsArrayTerm]['id'] = absint($termIDMeta->id); } break; } next($termsArray); } reset($termsArray); next($termIDs); } reset($termIDs); } return $termsArray; }
/** * Prepare (tokenize) terms * * @param $terms * * @return mixed|string|void */ function prep_terms($terms) { global $wpdb; $searchwp = SWP(); $original_terms = $terms; $whitelisted_terms = array(); // allow developers to manually define which variable should be used for the search term $terms = apply_filters('searchwp_th_query', $terms); if (empty($terms)) { $terms = get_search_query(); } // make sure it's a string if (is_array($terms)) { $terms = implode(' ', $terms); } else { $terms = (string) $terms; } // check against the regex pattern whitelist $terms = ' ' . $terms . ' '; if (method_exists($searchwp, 'extract_terms_using_pattern_whitelist')) { // added in SearchWP 1.9.5 // extract terms based on whitelist pattern, allowing for approved indexing of terms with punctuation $whitelisted_terms = $searchwp->extract_terms_using_pattern_whitelist($terms); // add the buffer so we can whole-word replace $terms = str_replace(' ', ' ', $terms); // remove the matches if (!empty($whitelisted_terms)) { $terms = str_ireplace($whitelisted_terms, '', $terms); } // clean up the double space flag we used $terms = str_replace(' ', ' ', $terms); } // rebuild our terms array $terms = explode(' ', $terms); // maybe append our whitelist if (is_array($whitelisted_terms) && !empty($whitelisted_terms)) { $whitelisted_terms = array_map('trim', $whitelisted_terms); $terms = array_merge($terms, $whitelisted_terms); } // make sure it's an array if (!is_array($terms)) { $terms = array($terms); } // if stemming is enabled, append the stems of all terms $engine = $this->search_args['engine']; $stemming_enabled = false; if (!empty($searchwp->settings['engines'][$engine])) { foreach ($searchwp->settings['engines'][$engine] as $post_type => $post_type_settings) { if (!empty($post_type_settings['options']['stem'])) { $stemming_enabled = true; break; } } } $terms = array_filter($terms, 'strlen'); $stems = array(); if ($stemming_enabled && class_exists('SearchWPStemmer')) { $stemmer = new SearchWPStemmer(); foreach ($terms as $term) { // append stems to the array $unstemmed = $term; $maybe_stemmed = apply_filters('searchwp_custom_stemmer', $unstemmed); // if the term was stemmed via the filter use it, else generate our own $stem = $unstemmed === $maybe_stemmed ? $stemmer->stem($term) : $maybe_stemmed; $stems[] = $stem; } $terms = array_merge($terms, $stems); $terms = array_unique($terms); // we also need the inverse (grab all of the source terms that have the same stem) if (!empty($stems)) { $prefix = $wpdb->prefix . SEARCHWP_DBPREFIX; $prepare = ''; foreach ($stems as $stem) { $prepare[] = '%s'; } $sql = "SELECT term\n\t\t\t\t\tFROM {$prefix}terms\n\t\t\t\t\tWHERE stem IN ( " . implode(',', $prepare) . " )"; $prepared = $wpdb->prepare($sql, $stems); $source_terms = $wpdb->get_col($prepared); $terms = array_merge($terms, $source_terms); $terms = array_unique($terms); } } // make sure the search query has priority so it's processed first if (!is_array($original_terms)) { $original_terms = array($original_terms); } $terms = array_merge($original_terms, $terms); $terms = array_unique($terms); // TODO: BEGIN REFACTOR002 // apply the same term processing that SearchWP core would // (which requires the search query be formatted as an array) if (!is_array($terms)) { $terms = explode(' ', $terms); } foreach ($terms as $key => $term) { $these_terms = apply_filters('searchwp_term_in', array($term), 'searchwp_term_highlight', $term); if (!empty($these_terms)) { $terms = array_merge($terms, $these_terms); } } // implode back into a string because that's what we're working with in this context $terms = array_unique($terms); // END REFACTOR002 // sanitize $terms = array_map('sanitize_text_field', $terms); return $terms; }