function relevanssi_extract_locations($words, $fulltext) { $locations = array(); foreach ($words as $word) { $wordlen = relevanssi_strlen($word); $loc = relevanssi_stripos($fulltext, $word); while ($loc !== FALSE) { $locations[] = $loc; $loc = relevanssi_stripos($fulltext, $word, $loc + $wordlen); } } $locations = array_unique($locations); sort($locations); return $locations; }
/** * Creates an excerpt from content. * * @return array - element 0 is the excerpt, element 1 the number of term hits, element 2 is * true, if the excerpt is from the start of the content. */ function relevanssi_create_excerpt($content, $terms, $query) { // If you need to modify these on the go, use 'pre_option_relevanssi_excerpt_length' filter. $excerpt_length = get_option("relevanssi_excerpt_length"); $type = get_option("relevanssi_excerpt_type"); $best_excerpt_term_hits = -1; $excerpt = ""; $content = preg_replace('/\\s+/', ' ', $content); $content = " {$content}"; $phrases = relevanssi_extract_phrases(stripslashes($query)); $non_phrase_terms = array(); foreach ($phrases as $phrase) { $phrase_terms = array_keys(relevanssi_tokenize($phrase, $remove_stopwords = false)); foreach (array_keys($terms) as $term) { if (!in_array($term, $phrase_terms)) { $non_phrase_terms[] = $term; } } $terms = $non_phrase_terms; $terms[$phrase] = 1; } uksort($terms, 'relevanssi_strlen_sort'); $start = false; if ("chars" == $type) { $term_positions = array(); foreach (array_keys($terms) as $term) { $term = trim($term); $term_key = $term; get_option('relevanssi_fuzzy') != 'none' ? $term = "{$term}" : ($term = " {$term}"); $pos = 0; $n = 0; while (false !== $pos) { $pos = relevanssi_stripos($content, $term, $pos); if (false !== $pos) { $term_positions[$pos] = $term_key; function_exists('mb_strlen') ? $pos = $pos + mb_strlen($term) : ($pos = $pos + strlen(utf8_decode($term))); } } } ksort($term_positions); $positions = array_keys($term_positions); $best_position = 0; $best_position_hits = 0; $quarter = floor($excerpt_length / 4); // adjustment, so the excerpt doesn't start with the search term for ($i = 0; $i < count($positions); $i++) { $key = $positions[$i]; $orig_key = $key; $key = $key - $quarter; if ($key < 0) { $key = 0; } $j = $i + 1; $this_excerpt_terms = array(); if (isset($term_positions[$orig_key])) { $this_excerpt_terms[$term_positions[$orig_key]] = true; } while (isset($positions[$j])) { if (isset($positions[$j])) { $next_key = $positions[$j]; } if ($key + $excerpt_length > $next_key) { $this_excerpt_terms[$term_positions[$next_key]] = true; } else { break; // farther than the excerpt length } $j++; } if (count($this_excerpt_terms) > $best_position_hits) { $best_position_hits = count($this_excerpt_terms); $best_position = $key; } } if ($best_position + $excerpt_length < strlen($content)) { if (function_exists('mb_substr')) { $excerpt = mb_substr($content, $best_position, $excerpt_length); } else { $excerpt = substr($content, $best_position, $excerpt_length); } } else { $fixed_position = strlen($content) - $excerpt_length; if ($fixed_position > 0) { if (function_exists('mb_substr')) { $excerpt = mb_substr($content, $fixed_position, $excerpt_length); } else { $excerpt = substr($content, $fixed_position, $excerpt_length); } } } if ($best_position == 0) { $start = true; } if ("" == $excerpt) { if (function_exists('mb_substr')) { $excerpt = mb_substr($content, 0, $excerpt_length); } else { $excerpt = substr($content, 0, $excerpt_length); } $start = true; } } else { $words = explode(' ', $content); $i = 0; while ($i < count($words)) { if ($i + $excerpt_length > count($words)) { $i = count($words) - $excerpt_length; if ($i < 0) { $i = 0; } } $excerpt_slice = array_slice($words, $i, $excerpt_length); $excerpt_slice = implode(' ', $excerpt_slice); $excerpt_slice = " {$excerpt_slice}"; $term_hits = 0; foreach (array_keys($terms) as $term) { $term = " {$term}"; if (function_exists('mb_stripos')) { $pos = "" == $excerpt_slice ? false : mb_stripos($excerpt_slice, $term); // To avoid "empty haystack" warnings } else { if (function_exists('mb_strpos')) { $pos = mb_strpos($excerpt_slice, $term); if (false === $pos) { if (function_exists('mb_strtoupper') && function_exists('mb_strpos') && function_exists('mb_substr')) { $titlecased = mb_strtoupper(mb_substr($term, 0, 1)) . mb_substr($term, 1); $pos = mb_strpos($excerpt_slice, $titlecased); if (false === $pos) { $pos = mb_strpos($excerpt_slice, mb_strtoupper($term)); } } else { $titlecased = strtoupper(substr($term, 0, 1)) . substr($term, 1); $pos = strpos($excerpt_slice, $titlecased); if (false === $pos) { $pos = strpos($excerpt_slice, strtoupper($term)); } } } } else { $pos = strpos($excerpt_slice, $term); if (false === $pos) { $titlecased = strtoupper(substr($term, 0, 1)) . substr($term, 1); $pos = strpos($excerpt_slice, $titlecased); if (false === $pos) { $pos = strpos($excerpt_slice, strtoupper($term)); } } } } if (false !== $pos) { $term_hits++; if (0 == $i) { $start = true; } if ($term_hits > $best_excerpt_term_hits) { $best_excerpt_term_hits = $term_hits; $excerpt = $excerpt_slice; } } } $i += $excerpt_length; } if ("" == $excerpt) { $excerpt = explode(' ', $content, $excerpt_length); array_pop($excerpt); $excerpt = implode(' ', $excerpt); $start = true; } } return array($excerpt, $best_excerpt_term_hits, $start); }