/** * Handle truncation symbols in all their complexity * * @param array Parameters passed to the symbol (0=text, 1=amount, 2=tooltip?, 3=is_html?, 4=use as grammatical length rather than HTML byte length, 5=fractional-deviation-tolerance for grammar-preservation) * @param string The type of truncation to do * @set left right spread * @param ?mixed Tooltip to add on, but only if we end up creating our own tooltip (NULL: none) * @return string The result. */ function symbol_truncator($param, $type, $tooltip_if_truncated = NULL) { $value = ''; if (is_object($param[0])) { $param[0] = $param[0]->evaluate(); if (!isset($param[2])) { $param[2] = '0'; } $param[3] = '1'; } if ($GLOBALS['XSS_DETECT']) { $is_escaped = ocp_is_escaped($param[0]); } $amount = intval(isset($param[1]) ? $param[1] : '60'); $is_html = isset($param[3]) && $param[3] == '1'; if ($is_html) { $not_html = @html_entity_decode(strip_tags($param[0]), ENT_QUOTES, get_charset()); // In case it contains HTML. This is imperfect, but having to cut something up is imperfect from the offset. $html = $param[0]; if ($GLOBALS['XSS_DETECT']) { ocp_mark_as_escaped($html); } if ($html == $not_html && strpos($html, '&') === false && strpos($html, '<') === false) { $is_html = false; } // Conserve memory } else { $not_html = $param[0]; $html = escape_html($param[0]); } if (ocp_mb_strlen($not_html) > $amount) { $tooltip = isset($param[2]) && $param[2] == '1'; $literal_pos = isset($param[4]) ? $param[4] == '1' : false; $grammar_completeness_tolerance = isset($param[5]) ? floatval($param[5]) : 0.0; if ($is_html || $grammar_completeness_tolerance != 0.0) { require_code('xhtml'); } $truncated = $not_html; switch ($type) { case 'left': $temp = $is_html || $grammar_completeness_tolerance != 0.0 ? xhtml_substr($html, 0, $amount - 3, $literal_pos, false, $grammar_completeness_tolerance) : escape_html(ocp_mb_substr($not_html, 0, $amount - 3)); if ($temp != $html && in_array(substr($temp, -1), array('.', '?', '!'))) { $temp .= '<br />'; } // so the "..." does not go right after the sentence terminator $truncated = $temp == $html ? $temp : str_replace(array('</p>…', '</div>…'), array('…</p>', '…</div>'), rtrim($temp) . '…'); break; case 'expand': $temp = $is_html || $grammar_completeness_tolerance != 0.0 ? xhtml_substr($html, 0, $amount - 3, $literal_pos, false, $grammar_completeness_tolerance) : escape_html(ocp_mb_substr($not_html, 0, $amount - 3)); if ($temp != $html && in_array(substr($temp, -1), array('.', '?', '!'))) { $temp .= '<br />'; } // so the "..." does not go right after the sentence terminator $_truncated = do_template('COMCODE_HIDE', array('TEXT' => protect_from_escaping($temp), 'CONTENT' => protect_from_escaping($html))); $truncated = $_truncated->evaluate(); break; case 'right': $truncated = str_replace(array('</p>…', '</div>…'), array('…</p>', '…</div>'), '…' . ltrim($is_html || $grammar_completeness_tolerance != 0.0 ? xhtml_substr($html, -$amount - 3, NULL, $literal_pos, false, $grammar_completeness_tolerance) : escape_html(ocp_mb_substr($not_html, -$amount - 3)))); break; case 'spread': $pos = intval(floor(floatval($amount) / 2.0)) - 1; $truncated = str_replace(array('</p>…', '</div>…'), array('…</p>', '…</div>'), rtrim(($is_html || $grammar_completeness_tolerance != 0.0 ? xhtml_substr($html, 0, $pos, $literal_pos, false, $grammar_completeness_tolerance) : escape_html(ocp_mb_substr($not_html, 0, $pos))) . '…' . ltrim($is_html || $grammar_completeness_tolerance != 0.0 ? xhtml_substr($html, -$pos - 1) : escape_html(ocp_mb_substr($not_html, -$pos - 1))))); break; } if ($tooltip) { if (!is_null($tooltip_if_truncated)) { $tif = is_object($tooltip_if_truncated) ? $tooltip_if_truncated->evaluate() : $tooltip_if_truncated; if (strpos($tif, $html) !== false) { $html = $tif; } else { $html .= ' – ' . $tif; } } $tpl = strpos($truncated, '<div') !== false || strpos($truncated, '<p') !== false || strpos($truncated, '<table') !== false ? 'CROP_TEXT_MOUSE_OVER' : 'CROP_TEXT_MOUSE_OVER_INLINE'; $value_tempcode = do_template($tpl, array('_GUID' => '36ae945ed864633cfa0d67e5c3f2d1c8', 'TEXT_SMALL' => $truncated, 'TEXT_LARGE' => $html)); $value = $value_tempcode->evaluate(); if ($GLOBALS['XSS_DETECT']) { ocp_mark_as_escaped($value); } } else { $value = $truncated; } } else { $value = $html; } if ($GLOBALS['XSS_DETECT']) { if ($is_escaped || !$is_html) { ocp_mark_as_escaped($value); } } return $value; }
/** * Highlight keywords in an extracted portion of a piece of text. * * @param string What was searched * @param array List of words searched * @return string Highlighted portion */ function generate_text_summary($_temp_summary, $words_searched) { require_code('xhtml'); $summary = ''; global $SEARCH__CONTENT_BITS; $_temp_summary_lower = strtolower($_temp_summary); // Add in some highlighting direct to XHTML $all_occurrences = array(); foreach ($words_searched as $content_bit) { if ($content_bit == '') { continue; } $last_pos = 0; $content_bit_pos = 0; do { $content_bit_matched = $content_bit; if (strtoupper($content_bit) == $content_bit) { $content_bit_pos = strpos($_temp_summary, $content_bit, $last_pos); } else { $content_bit_pos = strpos($_temp_summary_lower, strtolower($content_bit), $last_pos); if (strpos($content_bit, '-') !== false) { $content_bit_pos_2 = strpos($_temp_summary_lower, strtolower(str_replace('-', '', $content_bit)), $last_pos); if ($content_bit_pos_2 !== false && ($content_bit_pos === false || $content_bit_pos_2 < $content_bit_pos)) { $content_bit_pos = $content_bit_pos_2; $content_bit_matched = str_replace('-', '', $content_bit); } } } if ($content_bit_pos !== false) { $last_gt = strrpos(substr($_temp_summary, 0, $content_bit_pos), '>'); $last_lt = strrpos(substr($_temp_summary, 0, $content_bit_pos), '<'); if ($last_gt === false || $last_gt > $last_lt) { $extra_pre = '<span class="comcode_highlight">'; $extra_post = '</span>'; $_temp_summary = substr($_temp_summary, 0, $content_bit_pos) . $extra_pre . substr($_temp_summary, $content_bit_pos, strlen($content_bit_matched)) . $extra_post . substr($_temp_summary, $content_bit_pos + strlen($content_bit_matched)); $_temp_summary_lower = strtolower($_temp_summary); $last_pos = $content_bit_pos + strlen($extra_pre) + strlen($content_bit_matched) + strlen($extra_post); // Adjust all stores occurrence offsets foreach ($all_occurrences as $i => $occ) { if ($occ[0] > $last_pos) { $all_occurrences[$i][0] += strlen($extra_pre) + strlen($extra_post); $all_occurrences[$i][1] += strlen($extra_pre) + strlen($extra_post); } elseif ($occ[0] > $content_bit_pos) { $all_occurrences[$i][0] += strlen($extra_pre); $all_occurrences[$i][1] += strlen($extra_pre); } } $all_occurrences[] = array($content_bit_pos, $last_pos); } else { $last_pos = $content_bit_pos + strlen($content_bit_matched); } } } while ($content_bit_pos !== false); } if (strlen($_temp_summary) < 500) { $summary = $_temp_summary; } else { // Find optimal position $len = strlen($_temp_summary); $best_yet = 0; $best_pos_min = 250; $best_pos_max = 250; if (count($all_occurrences) < 60) { for ($i = 250; $i < $len - 250; $i++) { $count = 0; $i_pre = $i - 250; $i_post = $i + 250; foreach ($all_occurrences as $occ) { $occ_pre = $occ[0]; $occ_post = $occ[1]; if ($occ_pre >= $i_pre && $occ_pre <= $i_post && $occ_post >= $i_pre && $occ_post <= $i_post) { $count++; if ($count > 5) { break; } // Good enough } } if ($count > $best_yet || $best_yet == $count && $i - 500 < $best_pos_min) { if ($best_yet == $count) { $best_pos_max = $i; } else { $best_yet = $count; $best_pos_min = $i; $best_pos_max = $i; } if ($count > 5) { break; } // Good enough } } $best_pos = intval(floatval($best_pos_min + $best_pos_max) / 2.0) - 250; // Move it from center pos, to where we want to start from } else { $best_pos = 0; } // Render (with ellipses if required) if (false) { // Far far too slow $summary = xhtml_substr($_temp_summary, $best_pos, min(500, $len - $best_pos), true, true); } else { $summary = substr($_temp_summary, $best_pos, min(500, $len - $best_pos)); $summary = xhtmlise_html($summary, true); if ($best_pos > 0) { $summary = '…' . $summary; } if ($best_pos + 500 < strlen($_temp_summary)) { $summary .= '…'; } } } return $summary; }