Esempio n. 1
0
/**
 * Handle truncation symbols in all their complexity
 *
 * @param  array			Parameters passed to the symbol (0=text, 1=amount, 2=tooltip?, 3=is_html?, 4=use as grammatical length rather than HTML byte length, 5=fractional-deviation-tolerance for grammar-preservation)
 * @param  string			The type of truncation to do
 * @set    left right spread
 * @param  ?mixed			Tooltip to add on, but only if we end up creating our own tooltip (NULL: none)
 * @return string			The result.
 */
function symbol_truncator($param, $type, $tooltip_if_truncated = NULL)
{
    $value = '';
    if (is_object($param[0])) {
        $param[0] = $param[0]->evaluate();
        if (!isset($param[2])) {
            $param[2] = '0';
        }
        $param[3] = '1';
    }
    if ($GLOBALS['XSS_DETECT']) {
        $is_escaped = ocp_is_escaped($param[0]);
    }
    $amount = intval(isset($param[1]) ? $param[1] : '60');
    $is_html = isset($param[3]) && $param[3] == '1';
    if ($is_html) {
        $not_html = @html_entity_decode(strip_tags($param[0]), ENT_QUOTES, get_charset());
        // In case it contains HTML. This is imperfect, but having to cut something up is imperfect from the offset.
        $html = $param[0];
        if ($GLOBALS['XSS_DETECT']) {
            ocp_mark_as_escaped($html);
        }
        if ($html == $not_html && strpos($html, '&') === false && strpos($html, '<') === false) {
            $is_html = false;
        }
        // Conserve memory
    } else {
        $not_html = $param[0];
        $html = escape_html($param[0]);
    }
    if (ocp_mb_strlen($not_html) > $amount) {
        $tooltip = isset($param[2]) && $param[2] == '1';
        $literal_pos = isset($param[4]) ? $param[4] == '1' : false;
        $grammar_completeness_tolerance = isset($param[5]) ? floatval($param[5]) : 0.0;
        if ($is_html || $grammar_completeness_tolerance != 0.0) {
            require_code('xhtml');
        }
        $truncated = $not_html;
        switch ($type) {
            case 'left':
                $temp = $is_html || $grammar_completeness_tolerance != 0.0 ? xhtml_substr($html, 0, $amount - 3, $literal_pos, false, $grammar_completeness_tolerance) : escape_html(ocp_mb_substr($not_html, 0, $amount - 3));
                if ($temp != $html && in_array(substr($temp, -1), array('.', '?', '!'))) {
                    $temp .= '<br />';
                }
                // so the "..." does not go right after the sentence terminator
                $truncated = $temp == $html ? $temp : str_replace(array('</p>&hellip;', '</div>&hellip;'), array('&hellip;</p>', '&hellip;</div>'), rtrim($temp) . '&hellip;');
                break;
            case 'expand':
                $temp = $is_html || $grammar_completeness_tolerance != 0.0 ? xhtml_substr($html, 0, $amount - 3, $literal_pos, false, $grammar_completeness_tolerance) : escape_html(ocp_mb_substr($not_html, 0, $amount - 3));
                if ($temp != $html && in_array(substr($temp, -1), array('.', '?', '!'))) {
                    $temp .= '<br />';
                }
                // so the "..." does not go right after the sentence terminator
                $_truncated = do_template('COMCODE_HIDE', array('TEXT' => protect_from_escaping($temp), 'CONTENT' => protect_from_escaping($html)));
                $truncated = $_truncated->evaluate();
                break;
            case 'right':
                $truncated = str_replace(array('</p>&hellip;', '</div>&hellip;'), array('&hellip;</p>', '&hellip;</div>'), '&hellip;' . ltrim($is_html || $grammar_completeness_tolerance != 0.0 ? xhtml_substr($html, -$amount - 3, NULL, $literal_pos, false, $grammar_completeness_tolerance) : escape_html(ocp_mb_substr($not_html, -$amount - 3))));
                break;
            case 'spread':
                $pos = intval(floor(floatval($amount) / 2.0)) - 1;
                $truncated = str_replace(array('</p>&hellip;', '</div>&hellip;'), array('&hellip;</p>', '&hellip;</div>'), rtrim(($is_html || $grammar_completeness_tolerance != 0.0 ? xhtml_substr($html, 0, $pos, $literal_pos, false, $grammar_completeness_tolerance) : escape_html(ocp_mb_substr($not_html, 0, $pos))) . '&hellip;' . ltrim($is_html || $grammar_completeness_tolerance != 0.0 ? xhtml_substr($html, -$pos - 1) : escape_html(ocp_mb_substr($not_html, -$pos - 1)))));
                break;
        }
        if ($tooltip) {
            if (!is_null($tooltip_if_truncated)) {
                $tif = is_object($tooltip_if_truncated) ? $tooltip_if_truncated->evaluate() : $tooltip_if_truncated;
                if (strpos($tif, $html) !== false) {
                    $html = $tif;
                } else {
                    $html .= ' &ndash; ' . $tif;
                }
            }
            $tpl = strpos($truncated, '<div') !== false || strpos($truncated, '<p') !== false || strpos($truncated, '<table') !== false ? 'CROP_TEXT_MOUSE_OVER' : 'CROP_TEXT_MOUSE_OVER_INLINE';
            $value_tempcode = do_template($tpl, array('_GUID' => '36ae945ed864633cfa0d67e5c3f2d1c8', 'TEXT_SMALL' => $truncated, 'TEXT_LARGE' => $html));
            $value = $value_tempcode->evaluate();
            if ($GLOBALS['XSS_DETECT']) {
                ocp_mark_as_escaped($value);
            }
        } else {
            $value = $truncated;
        }
    } else {
        $value = $html;
    }
    if ($GLOBALS['XSS_DETECT']) {
        if ($is_escaped || !$is_html) {
            ocp_mark_as_escaped($value);
        }
    }
    return $value;
}
Esempio n. 2
0
/**
 * Highlight keywords in an extracted portion of a piece of text.
 *
 * @param  string			What was searched
 * @param  array			List of words searched
 * @return string			Highlighted portion
 */
function generate_text_summary($_temp_summary, $words_searched)
{
    require_code('xhtml');
    $summary = '';
    global $SEARCH__CONTENT_BITS;
    $_temp_summary_lower = strtolower($_temp_summary);
    // Add in some highlighting direct to XHTML
    $all_occurrences = array();
    foreach ($words_searched as $content_bit) {
        if ($content_bit == '') {
            continue;
        }
        $last_pos = 0;
        $content_bit_pos = 0;
        do {
            $content_bit_matched = $content_bit;
            if (strtoupper($content_bit) == $content_bit) {
                $content_bit_pos = strpos($_temp_summary, $content_bit, $last_pos);
            } else {
                $content_bit_pos = strpos($_temp_summary_lower, strtolower($content_bit), $last_pos);
                if (strpos($content_bit, '-') !== false) {
                    $content_bit_pos_2 = strpos($_temp_summary_lower, strtolower(str_replace('-', '', $content_bit)), $last_pos);
                    if ($content_bit_pos_2 !== false && ($content_bit_pos === false || $content_bit_pos_2 < $content_bit_pos)) {
                        $content_bit_pos = $content_bit_pos_2;
                        $content_bit_matched = str_replace('-', '', $content_bit);
                    }
                }
            }
            if ($content_bit_pos !== false) {
                $last_gt = strrpos(substr($_temp_summary, 0, $content_bit_pos), '>');
                $last_lt = strrpos(substr($_temp_summary, 0, $content_bit_pos), '<');
                if ($last_gt === false || $last_gt > $last_lt) {
                    $extra_pre = '<span class="comcode_highlight">';
                    $extra_post = '</span>';
                    $_temp_summary = substr($_temp_summary, 0, $content_bit_pos) . $extra_pre . substr($_temp_summary, $content_bit_pos, strlen($content_bit_matched)) . $extra_post . substr($_temp_summary, $content_bit_pos + strlen($content_bit_matched));
                    $_temp_summary_lower = strtolower($_temp_summary);
                    $last_pos = $content_bit_pos + strlen($extra_pre) + strlen($content_bit_matched) + strlen($extra_post);
                    // Adjust all stores occurrence offsets
                    foreach ($all_occurrences as $i => $occ) {
                        if ($occ[0] > $last_pos) {
                            $all_occurrences[$i][0] += strlen($extra_pre) + strlen($extra_post);
                            $all_occurrences[$i][1] += strlen($extra_pre) + strlen($extra_post);
                        } elseif ($occ[0] > $content_bit_pos) {
                            $all_occurrences[$i][0] += strlen($extra_pre);
                            $all_occurrences[$i][1] += strlen($extra_pre);
                        }
                    }
                    $all_occurrences[] = array($content_bit_pos, $last_pos);
                } else {
                    $last_pos = $content_bit_pos + strlen($content_bit_matched);
                }
            }
        } while ($content_bit_pos !== false);
    }
    if (strlen($_temp_summary) < 500) {
        $summary = $_temp_summary;
    } else {
        // Find optimal position
        $len = strlen($_temp_summary);
        $best_yet = 0;
        $best_pos_min = 250;
        $best_pos_max = 250;
        if (count($all_occurrences) < 60) {
            for ($i = 250; $i < $len - 250; $i++) {
                $count = 0;
                $i_pre = $i - 250;
                $i_post = $i + 250;
                foreach ($all_occurrences as $occ) {
                    $occ_pre = $occ[0];
                    $occ_post = $occ[1];
                    if ($occ_pre >= $i_pre && $occ_pre <= $i_post && $occ_post >= $i_pre && $occ_post <= $i_post) {
                        $count++;
                        if ($count > 5) {
                            break;
                        }
                        // Good enough
                    }
                }
                if ($count > $best_yet || $best_yet == $count && $i - 500 < $best_pos_min) {
                    if ($best_yet == $count) {
                        $best_pos_max = $i;
                    } else {
                        $best_yet = $count;
                        $best_pos_min = $i;
                        $best_pos_max = $i;
                    }
                    if ($count > 5) {
                        break;
                    }
                    // Good enough
                }
            }
            $best_pos = intval(floatval($best_pos_min + $best_pos_max) / 2.0) - 250;
            // Move it from center pos, to where we want to start from
        } else {
            $best_pos = 0;
        }
        // Render (with ellipses if required)
        if (false) {
            // Far far too slow
            $summary = xhtml_substr($_temp_summary, $best_pos, min(500, $len - $best_pos), true, true);
        } else {
            $summary = substr($_temp_summary, $best_pos, min(500, $len - $best_pos));
            $summary = xhtmlise_html($summary, true);
            if ($best_pos > 0) {
                $summary = '&hellip;' . $summary;
            }
            if ($best_pos + 500 < strlen($_temp_summary)) {
                $summary .= '&hellip;';
            }
        }
    }
    return $summary;
}