예제 #1
0
function relevanssi_extract_locations($words, $fulltext)
{
    $locations = array();
    foreach ($words as $word) {
        $wordlen = relevanssi_strlen($word);
        $loc = relevanssi_stripos($fulltext, $word);
        while ($loc !== FALSE) {
            $locations[] = $loc;
            $loc = relevanssi_stripos($fulltext, $word, $loc + $wordlen);
        }
    }
    $locations = array_unique($locations);
    sort($locations);
    return $locations;
}
예제 #2
0
/**
 * Creates an excerpt from content.
 *
 * @return array - element 0 is the excerpt, element 1 the number of term hits, element 2 is
 * true, if the excerpt is from the start of the content.
 */
function relevanssi_create_excerpt($content, $terms, $query)
{
    // If you need to modify these on the go, use 'pre_option_relevanssi_excerpt_length' filter.
    $excerpt_length = get_option("relevanssi_excerpt_length");
    $type = get_option("relevanssi_excerpt_type");
    $best_excerpt_term_hits = -1;
    $excerpt = "";
    $content = preg_replace('/\\s+/', ' ', $content);
    $content = " {$content}";
    $phrases = relevanssi_extract_phrases(stripslashes($query));
    $non_phrase_terms = array();
    foreach ($phrases as $phrase) {
        $phrase_terms = array_keys(relevanssi_tokenize($phrase, $remove_stopwords = false));
        foreach (array_keys($terms) as $term) {
            if (!in_array($term, $phrase_terms)) {
                $non_phrase_terms[] = $term;
            }
        }
        $terms = $non_phrase_terms;
        $terms[$phrase] = 1;
    }
    uksort($terms, 'relevanssi_strlen_sort');
    $start = false;
    if ("chars" == $type) {
        $term_positions = array();
        foreach (array_keys($terms) as $term) {
            $term = trim($term);
            $term_key = $term;
            get_option('relevanssi_fuzzy') != 'none' ? $term = "{$term}" : ($term = " {$term}");
            $pos = 0;
            $n = 0;
            while (false !== $pos) {
                $pos = relevanssi_stripos($content, $term, $pos);
                if (false !== $pos) {
                    $term_positions[$pos] = $term_key;
                    function_exists('mb_strlen') ? $pos = $pos + mb_strlen($term) : ($pos = $pos + strlen(utf8_decode($term)));
                }
            }
        }
        ksort($term_positions);
        $positions = array_keys($term_positions);
        $best_position = 0;
        $best_position_hits = 0;
        $quarter = floor($excerpt_length / 4);
        // adjustment, so the excerpt doesn't start with the search term
        for ($i = 0; $i < count($positions); $i++) {
            $key = $positions[$i];
            $orig_key = $key;
            $key = $key - $quarter;
            if ($key < 0) {
                $key = 0;
            }
            $j = $i + 1;
            $this_excerpt_terms = array();
            if (isset($term_positions[$orig_key])) {
                $this_excerpt_terms[$term_positions[$orig_key]] = true;
            }
            while (isset($positions[$j])) {
                if (isset($positions[$j])) {
                    $next_key = $positions[$j];
                }
                if ($key + $excerpt_length > $next_key) {
                    $this_excerpt_terms[$term_positions[$next_key]] = true;
                } else {
                    break;
                    // farther than the excerpt length
                }
                $j++;
            }
            if (count($this_excerpt_terms) > $best_position_hits) {
                $best_position_hits = count($this_excerpt_terms);
                $best_position = $key;
            }
        }
        if ($best_position + $excerpt_length < strlen($content)) {
            if (function_exists('mb_substr')) {
                $excerpt = mb_substr($content, $best_position, $excerpt_length);
            } else {
                $excerpt = substr($content, $best_position, $excerpt_length);
            }
        } else {
            $fixed_position = strlen($content) - $excerpt_length;
            if ($fixed_position > 0) {
                if (function_exists('mb_substr')) {
                    $excerpt = mb_substr($content, $fixed_position, $excerpt_length);
                } else {
                    $excerpt = substr($content, $fixed_position, $excerpt_length);
                }
            }
        }
        if ($best_position == 0) {
            $start = true;
        }
        if ("" == $excerpt) {
            if (function_exists('mb_substr')) {
                $excerpt = mb_substr($content, 0, $excerpt_length);
            } else {
                $excerpt = substr($content, 0, $excerpt_length);
            }
            $start = true;
        }
    } else {
        $words = explode(' ', $content);
        $i = 0;
        while ($i < count($words)) {
            if ($i + $excerpt_length > count($words)) {
                $i = count($words) - $excerpt_length;
                if ($i < 0) {
                    $i = 0;
                }
            }
            $excerpt_slice = array_slice($words, $i, $excerpt_length);
            $excerpt_slice = implode(' ', $excerpt_slice);
            $excerpt_slice = " {$excerpt_slice}";
            $term_hits = 0;
            foreach (array_keys($terms) as $term) {
                $term = " {$term}";
                if (function_exists('mb_stripos')) {
                    $pos = "" == $excerpt_slice ? false : mb_stripos($excerpt_slice, $term);
                    // To avoid "empty haystack" warnings
                } else {
                    if (function_exists('mb_strpos')) {
                        $pos = mb_strpos($excerpt_slice, $term);
                        if (false === $pos) {
                            if (function_exists('mb_strtoupper') && function_exists('mb_strpos') && function_exists('mb_substr')) {
                                $titlecased = mb_strtoupper(mb_substr($term, 0, 1)) . mb_substr($term, 1);
                                $pos = mb_strpos($excerpt_slice, $titlecased);
                                if (false === $pos) {
                                    $pos = mb_strpos($excerpt_slice, mb_strtoupper($term));
                                }
                            } else {
                                $titlecased = strtoupper(substr($term, 0, 1)) . substr($term, 1);
                                $pos = strpos($excerpt_slice, $titlecased);
                                if (false === $pos) {
                                    $pos = strpos($excerpt_slice, strtoupper($term));
                                }
                            }
                        }
                    } else {
                        $pos = strpos($excerpt_slice, $term);
                        if (false === $pos) {
                            $titlecased = strtoupper(substr($term, 0, 1)) . substr($term, 1);
                            $pos = strpos($excerpt_slice, $titlecased);
                            if (false === $pos) {
                                $pos = strpos($excerpt_slice, strtoupper($term));
                            }
                        }
                    }
                }
                if (false !== $pos) {
                    $term_hits++;
                    if (0 == $i) {
                        $start = true;
                    }
                    if ($term_hits > $best_excerpt_term_hits) {
                        $best_excerpt_term_hits = $term_hits;
                        $excerpt = $excerpt_slice;
                    }
                }
            }
            $i += $excerpt_length;
        }
        if ("" == $excerpt) {
            $excerpt = explode(' ', $content, $excerpt_length);
            array_pop($excerpt);
            $excerpt = implode(' ', $excerpt);
            $start = true;
        }
    }
    return array($excerpt, $best_excerpt_term_hits, $start);
}