/** * Implements parseToken to format the XML tags. * It uses the syntax <token type="TYPE" link="URL">. * The URL is only there if specified. * * @param string $token The token to put tags around * @param string $context_name The name of the context that the tag is in * @param array $data Miscellaneous data about the context * @return string The token wrapped in XML * @todo [blocking 1.2.2] Make it so that CSS is optional */ function parseToken($token, $context_name, $data) { // Ignore blank tokens if ('' == $token || geshi_is_whitespace($token)) { return $token; } // Initialize the result variable $result = ''; // Add the basic tag $result .= '<token type="' . $context_name . '"'; // Check if we should use an URL if (isset($data['url'])) { // Hey, we got an URL! Yayy~ $result .= ' url="' . GeSHi::hsc($data['url']) . '"'; } // Are we gonna add in CSS? if ($this->_addCSS) { // Heh... $result .= ' css="' . $this->_styler->getStyle($context_name) . '"'; } // Finish the opening tag $result .= '>'; // Now add in the token $result .= '<![CDATA[' . $token . ']]>'; // Add the closing tag $result .= '</token>\\n'; // Return the result return $result; }
/** * Takes the parsed code and various options, and creates the HTML * surrounding it to make it look nice. * * @param string The code already parsed * @return string The code nicely finalised * @since 1.0.0 * @access private */ function finalise(&$parsed_code) { // Remove end parts of important declarations // This is BUGGY!! My fault for bad code: fix coming in 1.2 // @todo Remove this crap if ($this->enable_important_blocks && strpos($parsed_code, GeSHi::hsc(GESHI_START_IMPORTANT)) === false) { $parsed_code = str_replace(GeSHi::hsc(GESHI_END_IMPORTANT), '', $parsed_code); } // Add HTML whitespace stuff if we're using the <div> header if ($this->header_type != GESHI_HEADER_PRE) { $parsed_code = $this->indent($parsed_code); } // purge some unnecessary stuff $parsed_code = preg_replace('#<span[^>]+>(\\s*)</span>#', '\\1', $parsed_code); $parsed_code = preg_replace('#<div[^>]+>(\\s*)</div>#', '\\1', $parsed_code); // If we are using IDs for line numbers, there needs to be an overall // ID set to prevent collisions. if ($this->add_ids && !$this->overall_id) { $this->overall_id = 'geshi-' . substr(md5(microtime()), 0, 4); } // Get code into lines $code = explode("\n", $parsed_code); $parsed_code = ''; // If we're using line numbers, we insert <li>s and appropriate // markup to style them (otherwise we don't need to do anything) if ($this->line_numbers != GESHI_NO_LINE_NUMBERS) { // If we're using the <pre> header, we shouldn't add newlines because // the <pre> will line-break them (and the <li>s already do this for us) $ls = $this->header_type != GESHI_HEADER_PRE ? "\n" : ''; // Set vars to defaults for following loop $i = 0; // Foreach line... for ($i = 0, $n = count($code); $i < $n;) { //Reset the attributes for a new line ... $attrs = array(); // Make lines have at least one space in them if they're empty // BenBE: Checking emptiness using trim instead of relying on blanks if ('' == trim($code[$i])) { $code[$i] = ' '; } // If this is a "special line"... if ($this->line_numbers == GESHI_FANCY_LINE_NUMBERS && $i % $this->line_nth_row == $this->line_nth_row - 1) { // Set the attributes to style the line if ($this->use_classes) { //$attr = ' class="li2"'; $attrs['class'][] = 'li2'; $def_attr = ' class="de2"'; } else { //$attr = ' style="' . $this->line_style2 . '"'; $attrs['style'][] = $this->line_style2; // This style "covers up" the special styles set for special lines // so that styles applied to special lines don't apply to the actual // code on that line $def_attr = ' style="' . $this->code_style . '"'; } // Span or div? $start = "<div{$def_attr}>"; $end = '</div>'; } else { if ($this->use_classes) { //$attr = ' class="li1"'; $attrs['class'][] = 'li1'; $def_attr = ' class="de1"'; } else { //$attr = ' style="' . $this->line_style1 . '"'; $attrs['style'][] = $this->line_style1; $def_attr = ' style="' . $this->code_style . '"'; } $start = "<div{$def_attr}>"; $end = '</div>'; } ++$i; // Are we supposed to use ids? If so, add them if ($this->add_ids) { $attrs['id'][] = "{$this->overall_id}-{$i}"; } if (in_array($i, $this->highlight_extra_lines)) { if ($this->use_classes) { if (isset($this->highlight_extra_lines_styles[$i])) { $attrs['class'][] = "lx{$i}"; } else { $attrs['class'][] = "ln-xtra"; } } else { array_push($attrs['style'], $this->get_line_style($i)); } } // Add in the line surrounded by appropriate list HTML $attr_string = ''; foreach ($attrs as $key => $attr) { $attr_string .= ' ' . $key . '="' . implode(' ', $attr) . '"'; } $parsed_code .= "<li{$attr_string}>{$start}{$code[$i - 1]}{$end}</li>{$ls}"; } } else { // No line numbers, but still need to handle highlighting lines extra. // Have to use divs so the full width of the code is highlighted for ($i = 0, $n = count($code); $i < $n; ++$i) { // Make lines have at least one space in them if they're empty // BenBE: Checking emptiness using trim instead of relying on blanks if ('' == trim($code[$i])) { $code[$i] = ' '; } if (in_array($i + 1, $this->highlight_extra_lines)) { if ($this->use_classes) { if (isset($this->highlight_extra_lines_styles[$i])) { $parsed_code .= "<div class=\"lx{$i}\">"; } else { $parsed_code .= "<div class=\"ln-xtra\">"; } } else { $parsed_code .= "<div style=\"" . $this->get_line_style($i) . "\">"; } // Remove \n because it stuffs up <pre> header $parsed_code .= $code[$i] . "</div>"; } else { $parsed_code .= $code[$i] . "\n"; } } } unset($code); return $this->header() . chop($parsed_code) . $this->footer(); }
/** * Returns the code in $this->source, highlighted and surrounded by the * nessecary HTML. * * This should only be called ONCE, cos it's SLOW! If you want to highlight * the same source multiple times, you're better off doing a whole lot of * str_replaces to replace the <span>s * * @since 1.0.0 */ function parse_code() { // Start the timer $start_time = microtime(); // Replace all newlines to a common form. $code = str_replace("\r\n", "\n", $this->source); $code = str_replace("\r", "\n", $code); // Firstly, if there is an error, we won't highlight if ($this->error) { //Escape the source for output $result = $this->hsc($this->source); //This fix is related to SF#1923020, but has to be applied regardless of //actually highlighting symbols. $result = str_replace(array('<SEMI>', '<PIPE>'), array(';', '|'), $result); // Timing is irrelevant $this->set_time($start_time, $start_time); $this->finalise($result); return $result; } // make sure the parse cache is up2date if (!$this->parse_cache_built) { $this->build_parse_cache(); } // Initialise various stuff $length = strlen($code); $COMMENT_MATCHED = false; $stuff_to_parse = ''; $endresult = ''; // "Important" selections are handled like multiline comments // @todo GET RID OF THIS SHIZ if ($this->enable_important_blocks) { $this->language_data['COMMENT_MULTI'][GESHI_START_IMPORTANT] = GESHI_END_IMPORTANT; } if ($this->strict_mode) { // Break the source into bits. Each bit will be a portion of the code // within script delimiters - for example, HTML between < and > $k = 0; $parts = array(); $matches = array(); $next_match_pointer = null; // we use a copy to unset delimiters on demand (when they are not found) $delim_copy = $this->language_data['SCRIPT_DELIMITERS']; $i = 0; while ($i < $length) { $next_match_pos = $length + 1; // never true foreach ($delim_copy as $dk => $delimiters) { if (is_array($delimiters)) { foreach ($delimiters as $open => $close) { // make sure the cache is setup properly if (!isset($matches[$dk][$open])) { $matches[$dk][$open] = array('next_match' => -1, 'dk' => $dk, 'open' => $open, 'open_strlen' => strlen($open), 'close' => $close, 'close_strlen' => strlen($close)); } // Get the next little bit for this opening string if ($matches[$dk][$open]['next_match'] < $i) { // only find the next pos if it was not already cached $open_pos = strpos($code, $open, $i); if ($open_pos === false) { // no match for this delimiter ever unset($delim_copy[$dk][$open]); continue; } $matches[$dk][$open]['next_match'] = $open_pos; } if ($matches[$dk][$open]['next_match'] < $next_match_pos) { //So we got a new match, update the close_pos $matches[$dk][$open]['close_pos'] = strpos($code, $close, $matches[$dk][$open]['next_match'] + 1); $next_match_pointer =& $matches[$dk][$open]; $next_match_pos = $matches[$dk][$open]['next_match']; } } } else { //So we should match an RegExp as Strict Block ... /** * The value in $delimiters is expected to be an RegExp * containing exactly 2 matching groups: * - Group 1 is the opener * - Group 2 is the closer */ if (!GESHI_PHP_PRE_433 && preg_match($delimiters, $code, $matches_rx, PREG_OFFSET_CAPTURE, $i)) { //We got a match ... if (isset($matches_rx['start']) && isset($matches_rx['end'])) { $matches[$dk] = array('next_match' => $matches_rx['start'][1], 'dk' => $dk, 'close_strlen' => strlen($matches_rx['end'][0]), 'close_pos' => $matches_rx['end'][1]); } else { $matches[$dk] = array('next_match' => $matches_rx[1][1], 'dk' => $dk, 'close_strlen' => strlen($matches_rx[2][0]), 'close_pos' => $matches_rx[2][1]); } } else { // no match for this delimiter ever unset($delim_copy[$dk]); continue; } if ($matches[$dk]['next_match'] <= $next_match_pos) { $next_match_pointer =& $matches[$dk]; $next_match_pos = $matches[$dk]['next_match']; } } } // non-highlightable text $parts[$k] = array(1 => substr($code, $i, $next_match_pos - $i)); ++$k; if ($next_match_pos > $length) { // out of bounds means no next match was found break; } // highlightable code $parts[$k][0] = $next_match_pointer['dk']; //Only combine for non-rx script blocks if (is_array($delim_copy[$next_match_pointer['dk']])) { // group adjacent script blocks, e.g. <foobar><asdf> should be one block, not three! $i = $next_match_pos + $next_match_pointer['open_strlen']; while (true) { $close_pos = strpos($code, $next_match_pointer['close'], $i); if ($close_pos == false) { break; } $i = $close_pos + $next_match_pointer['close_strlen']; if ($i == $length) { break; } if ($code[$i] == $next_match_pointer['open'][0] && ($next_match_pointer['open_strlen'] == 1 || substr($code, $i, $next_match_pointer['open_strlen']) == $next_match_pointer['open'])) { // merge adjacent but make sure we don't merge things like <tag><!-- comment --> foreach ($matches as $submatches) { foreach ($submatches as $match) { if ($match['next_match'] == $i) { // a different block already matches here! break 3; } } } } else { break; } } } else { $close_pos = $next_match_pointer['close_pos'] + $next_match_pointer['close_strlen']; $i = $close_pos; } if ($close_pos === false) { // no closing delimiter found! $parts[$k][1] = substr($code, $next_match_pos); ++$k; break; } else { $parts[$k][1] = substr($code, $next_match_pos, $i - $next_match_pos); ++$k; } } unset($delim_copy, $next_match_pointer, $next_match_pos, $matches); $num_parts = $k; if ($num_parts == 1 && $this->strict_mode == GESHI_MAYBE) { // when we have only one part, we don't have anything to highlight at all. // if we have a "maybe" strict language, this should be handled as highlightable code $parts = array(0 => array(0 => '', 1 => ''), 1 => array(0 => null, 1 => $parts[0][1])); $num_parts = 2; } } else { // Not strict mode - simply dump the source into // the array at index 1 (the first highlightable block) $parts = array(0 => array(0 => '', 1 => ''), 1 => array(0 => null, 1 => $code)); $num_parts = 2; } //Unset variables we won't need any longer unset($code); //Preload some repeatedly used values regarding hardquotes ... $hq = isset($this->language_data['HARDQUOTE']) ? $this->language_data['HARDQUOTE'][0] : false; $hq_strlen = strlen($hq); //Preload if line numbers are to be generated afterwards //Added a check if line breaks should be forced even without line numbers, fixes SF#1727398 $check_linenumbers = $this->line_numbers != GESHI_NO_LINE_NUMBERS || !empty($this->highlight_extra_lines) || !$this->allow_multiline_span; //preload the escape char for faster checking ... $escaped_escape_char = $this->hsc($this->language_data['ESCAPE_CHAR']); // this is used for single-line comments $sc_disallowed_before = ""; $sc_disallowed_after = ""; if (isset($this->language_data['PARSER_CONTROL'])) { if (isset($this->language_data['PARSER_CONTROL']['COMMENTS'])) { if (isset($this->language_data['PARSER_CONTROL']['COMMENTS']['DISALLOWED_BEFORE'])) { $sc_disallowed_before = $this->language_data['PARSER_CONTROL']['COMMENTS']['DISALLOWED_BEFORE']; } if (isset($this->language_data['PARSER_CONTROL']['COMMENTS']['DISALLOWED_AFTER'])) { $sc_disallowed_after = $this->language_data['PARSER_CONTROL']['COMMENTS']['DISALLOWED_AFTER']; } } } //Fix for SF#1932083: Multichar Quotemarks unsupported $is_string_starter = array(); if ($this->lexic_permissions['STRINGS']) { foreach ($this->language_data['QUOTEMARKS'] as $quotemark) { if (!isset($is_string_starter[$quotemark[0]])) { $is_string_starter[$quotemark[0]] = (string) $quotemark; } elseif (is_string($is_string_starter[$quotemark[0]])) { $is_string_starter[$quotemark[0]] = array($is_string_starter[$quotemark[0]], $quotemark); } else { $is_string_starter[$quotemark[0]][] = $quotemark; } } } // Now we go through each part. We know that even-indexed parts are // code that shouldn't be highlighted, and odd-indexed parts should // be highlighted for ($key = 0; $key < $num_parts; ++$key) { $STRICTATTRS = ''; // If this block should be highlighted... if (!($key & 1)) { // Else not a block to highlight $endresult .= $this->hsc($parts[$key][1]); unset($parts[$key]); continue; } $result = ''; $part = $parts[$key][1]; $highlight_part = true; if ($this->strict_mode && !is_null($parts[$key][0])) { // get the class key for this block of code $script_key = $parts[$key][0]; $highlight_part = $this->language_data['HIGHLIGHT_STRICT_BLOCK'][$script_key]; if ($this->language_data['STYLES']['SCRIPT'][$script_key] != '' && $this->lexic_permissions['SCRIPT']) { // Add a span element around the source to // highlight the overall source block if (!$this->use_classes && $this->language_data['STYLES']['SCRIPT'][$script_key] != '') { $attributes = ' style="' . $this->language_data['STYLES']['SCRIPT'][$script_key] . '"'; } else { $attributes = ' class="sc' . $script_key . '"'; } $result .= "<span{$attributes}>"; $STRICTATTRS = $attributes; } } if ($highlight_part) { // Now, highlight the code in this block. This code // is really the engine of GeSHi (along with the method // parse_non_string_part). // cache comment regexps incrementally $next_comment_regexp_key = ''; $next_comment_regexp_pos = -1; $next_comment_multi_pos = -1; $next_comment_single_pos = -1; $comment_regexp_cache_per_key = array(); $comment_multi_cache_per_key = array(); $comment_single_cache_per_key = array(); $next_open_comment_multi = ''; $next_comment_single_key = ''; $escape_regexp_cache_per_key = array(); $next_escape_regexp_key = ''; $next_escape_regexp_pos = -1; $length = strlen($part); for ($i = 0; $i < $length; ++$i) { // Get the next char $char = $part[$i]; $char_len = 1; // update regexp comment cache if needed if (isset($this->language_data['COMMENT_REGEXP']) && $next_comment_regexp_pos < $i) { $next_comment_regexp_pos = $length; foreach ($this->language_data['COMMENT_REGEXP'] as $comment_key => $regexp) { $match_i = false; if (isset($comment_regexp_cache_per_key[$comment_key]) && ($comment_regexp_cache_per_key[$comment_key]['pos'] >= $i || $comment_regexp_cache_per_key[$comment_key]['pos'] === false)) { // we have already matched something if ($comment_regexp_cache_per_key[$comment_key]['pos'] === false) { // this comment is never matched continue; } $match_i = $comment_regexp_cache_per_key[$comment_key]['pos']; } elseif (GESHI_PHP_PRE_433 && preg_match($regexp, substr($part, $i), $match, PREG_OFFSET_CAPTURE) || !GESHI_PHP_PRE_433 && preg_match($regexp, $part, $match, PREG_OFFSET_CAPTURE, $i)) { $match_i = $match[0][1]; if (GESHI_PHP_PRE_433) { $match_i += $i; } $comment_regexp_cache_per_key[$comment_key] = array('key' => $comment_key, 'length' => strlen($match[0][0]), 'pos' => $match_i); } else { $comment_regexp_cache_per_key[$comment_key]['pos'] = false; continue; } if ($match_i !== false && $match_i < $next_comment_regexp_pos) { $next_comment_regexp_pos = $match_i; $next_comment_regexp_key = $comment_key; if ($match_i === $i) { break; } } } } $string_started = false; if (isset($is_string_starter[$char])) { // Possibly the start of a new string ... //Check which starter it was ... //Fix for SF#1932083: Multichar Quotemarks unsupported if (is_array($is_string_starter[$char])) { $char_new = ''; foreach ($is_string_starter[$char] as $testchar) { if ($testchar === substr($part, $i, strlen($testchar)) && strlen($testchar) > strlen($char_new)) { $char_new = $testchar; $string_started = true; } } if ($string_started) { $char = $char_new; } } else { $testchar = $is_string_starter[$char]; if ($testchar === substr($part, $i, strlen($testchar))) { $char = $testchar; $string_started = true; } } $char_len = strlen($char); } if ($string_started && $i != $next_comment_regexp_pos) { // Hand out the correct style information for this string $string_key = array_search($char, $this->language_data['QUOTEMARKS']); if (!isset($this->language_data['STYLES']['STRINGS'][$string_key]) || !isset($this->language_data['STYLES']['ESCAPE_CHAR'][$string_key])) { $string_key = 0; } // parse the stuff before this $result .= $this->parse_non_string_part($stuff_to_parse); $stuff_to_parse = ''; if (!$this->use_classes) { $string_attributes = ' style="' . $this->language_data['STYLES']['STRINGS'][$string_key] . '"'; } else { $string_attributes = ' class="st' . $string_key . '"'; } // now handle the string $string = "<span{$string_attributes}>" . GeSHi::hsc($char); $start = $i + $char_len; $string_open = true; if (empty($this->language_data['ESCAPE_REGEXP'])) { $next_escape_regexp_pos = $length; } do { //Get the regular ending pos ... $close_pos = strpos($part, $char, $start); if (false === $close_pos) { $close_pos = $length; } if ($this->lexic_permissions['ESCAPE_CHAR']) { // update escape regexp cache if needed if (isset($this->language_data['ESCAPE_REGEXP']) && $next_escape_regexp_pos < $start) { $next_escape_regexp_pos = $length; foreach ($this->language_data['ESCAPE_REGEXP'] as $escape_key => $regexp) { $match_i = false; if (isset($escape_regexp_cache_per_key[$escape_key]) && ($escape_regexp_cache_per_key[$escape_key]['pos'] >= $start || $escape_regexp_cache_per_key[$escape_key]['pos'] === false)) { // we have already matched something if ($escape_regexp_cache_per_key[$escape_key]['pos'] === false) { // this comment is never matched continue; } $match_i = $escape_regexp_cache_per_key[$escape_key]['pos']; } elseif (GESHI_PHP_PRE_433 && preg_match($regexp, substr($part, $start), $match, PREG_OFFSET_CAPTURE) || !GESHI_PHP_PRE_433 && preg_match($regexp, $part, $match, PREG_OFFSET_CAPTURE, $start)) { $match_i = $match[0][1]; if (GESHI_PHP_PRE_433) { $match_i += $start; } $escape_regexp_cache_per_key[$escape_key] = array('key' => $escape_key, 'length' => strlen($match[0][0]), 'pos' => $match_i); } else { $escape_regexp_cache_per_key[$escape_key]['pos'] = false; continue; } if ($match_i !== false && $match_i < $next_escape_regexp_pos) { $next_escape_regexp_pos = $match_i; $next_escape_regexp_key = $escape_key; if ($match_i === $start) { break; } } } } //Find the next simple escape position if ('' != $this->language_data['ESCAPE_CHAR']) { $simple_escape = strpos($part, $this->language_data['ESCAPE_CHAR'], $start); if (false === $simple_escape) { $simple_escape = $length; } } else { $simple_escape = $length; } } else { $next_escape_regexp_pos = $length; $simple_escape = $length; } if ($simple_escape < $next_escape_regexp_pos && $simple_escape < $length && $simple_escape < $close_pos) { //The nexxt escape sequence is a simple one ... $es_pos = $simple_escape; //Add the stuff not in the string yet ... $string .= $this->hsc(substr($part, $start, $es_pos - $start)); //Get the style for this escaped char ... if (!$this->use_classes) { $escape_char_attributes = ' style="' . $this->language_data['STYLES']['ESCAPE_CHAR'][0] . '"'; } else { $escape_char_attributes = ' class="es0"'; } //Add the style for the escape char ... $string .= "<span{$escape_char_attributes}>" . GeSHi::hsc($this->language_data['ESCAPE_CHAR']); //Get the byte AFTER the ESCAPE_CHAR we just found $es_char = $part[$es_pos + 1]; if ($es_char == "\n") { // don't put a newline around newlines $string .= "</span>\n"; $start = $es_pos + 2; } elseif (ord($es_char) >= 128) { //This is an non-ASCII char (UTF8 or single byte) //This code tries to work around SF#2037598 ... if (function_exists('mb_substr')) { $es_char_m = mb_substr(substr($part, $es_pos + 1, 16), 0, 1, $this->encoding); $string .= $es_char_m . '</span>'; } elseif (!GESHI_PHP_PRE_433 && 'utf-8' == $this->encoding) { if (preg_match("/[¬-я][А-њ]" . "|а[†-њ][А-њ]" . "|[б-моп][А-њ]{2}" . "|н[А-Я][А-њ]" . "|р[Р-њ][А-њ]{2}" . "|[с-у][А-њ]{3}" . "|ф[А-П][А-њ]{2}/s", $part, $es_char_m, null, $es_pos + 1)) { $es_char_m = $es_char_m[0]; } else { $es_char_m = $es_char; } $string .= $this->hsc($es_char_m) . '</span>'; } else { $es_char_m = $this->hsc($es_char); } $start = $es_pos + strlen($es_char_m) + 1; } else { $string .= $this->hsc($es_char) . '</span>'; $start = $es_pos + 2; } } elseif ($next_escape_regexp_pos < $length && $next_escape_regexp_pos < $close_pos) { $es_pos = $next_escape_regexp_pos; //Add the stuff not in the string yet ... $string .= $this->hsc(substr($part, $start, $es_pos - $start)); //Get the key and length of this match ... $escape = $escape_regexp_cache_per_key[$next_escape_regexp_key]; $escape_str = substr($part, $es_pos, $escape['length']); $escape_key = $escape['key']; //Get the style for this escaped char ... if (!$this->use_classes) { $escape_char_attributes = ' style="' . $this->language_data['STYLES']['ESCAPE_CHAR'][$escape_key] . '"'; } else { $escape_char_attributes = ' class="es' . $escape_key . '"'; } //Add the style for the escape char ... $string .= "<span{$escape_char_attributes}>" . $this->hsc($escape_str) . '</span>'; $start = $es_pos + $escape['length']; } else { //Copy the remainder of the string ... $string .= $this->hsc(substr($part, $start, $close_pos - $start + $char_len)) . '</span>'; $start = $close_pos + $char_len; $string_open = false; } } while ($string_open); if ($check_linenumbers) { // Are line numbers used? If, we should end the string before // the newline and begin it again (so when <li>s are put in the source // remains XHTML compliant) // note to self: This opens up possibility of config files specifying // that languages can/cannot have multiline strings??? $string = str_replace("\n", "</span>\n<span{$string_attributes}>", $string); } $result .= $string; $string = ''; $i = $start - 1; continue; } elseif ($this->lexic_permissions['STRINGS'] && $hq && $hq[0] == $char && substr($part, $i, $hq_strlen) == $hq && $i != $next_comment_regexp_pos) { // The start of a hard quoted string if (!$this->use_classes) { $string_attributes = ' style="' . $this->language_data['STYLES']['STRINGS']['HARD'] . '"'; $escape_char_attributes = ' style="' . $this->language_data['STYLES']['ESCAPE_CHAR']['HARD'] . '"'; } else { $string_attributes = ' class="st_h"'; $escape_char_attributes = ' class="es_h"'; } // parse the stuff before this $result .= $this->parse_non_string_part($stuff_to_parse); $stuff_to_parse = ''; // now handle the string $string = ''; // look for closing quote $start = $i + $hq_strlen; while ($close_pos = strpos($part, $this->language_data['HARDQUOTE'][1], $start)) { $start = $close_pos + 1; if ($this->lexic_permissions['ESCAPE_CHAR'] && $part[$close_pos - 1] == $this->language_data['HARDCHAR'] && $i + $hq_strlen != $close_pos) { //Support empty string for HQ escapes if Starter = Escape // make sure this quote is not escaped foreach ($this->language_data['HARDESCAPE'] as $hardescape) { if (substr($part, $close_pos - 1, strlen($hardescape)) == $hardescape) { // check wether this quote is escaped or if it is something like '\\' $escape_char_pos = $close_pos - 1; while ($escape_char_pos > 0 && $part[$escape_char_pos - 1] == $this->language_data['HARDCHAR']) { --$escape_char_pos; } if ($close_pos - $escape_char_pos & 1) { // uneven number of escape chars => this quote is escaped continue 2; } } } } // found closing quote break; } //Found the closing delimiter? if (!$close_pos) { // span till the end of this $part when no closing delimiter is found $close_pos = $length; } //Get the actual string $string = substr($part, $i, $close_pos - $i + 1); $i = $close_pos; // handle escape chars and encode html chars // (special because when we have escape chars within our string they may not be escaped) if ($this->lexic_permissions['ESCAPE_CHAR'] && $this->language_data['ESCAPE_CHAR']) { $start = 0; $new_string = ''; while ($es_pos = strpos($string, $this->language_data['ESCAPE_CHAR'], $start)) { // hmtl escape stuff before $new_string .= $this->hsc(substr($string, $start, $es_pos - $start)); // check if this is a hard escape foreach ($this->language_data['HARDESCAPE'] as $hardescape) { if (substr($string, $es_pos, strlen($hardescape)) == $hardescape) { // indeed, this is a hardescape $new_string .= "<span{$escape_char_attributes}>" . $this->hsc($hardescape) . '</span>'; $start = $es_pos + strlen($hardescape); continue 2; } } // not a hard escape, but a normal escape // they come in pairs of two $c = 0; while (isset($string[$es_pos + $c]) && isset($string[$es_pos + $c + 1]) && $string[$es_pos + $c] == $this->language_data['ESCAPE_CHAR'] && $string[$es_pos + $c + 1] == $this->language_data['ESCAPE_CHAR']) { $c += 2; } if ($c) { $new_string .= "<span{$escape_char_attributes}>" . str_repeat($escaped_escape_char, $c) . '</span>'; $start = $es_pos + $c; } else { // this is just a single lonely escape char... $new_string .= $escaped_escape_char; $start = $es_pos + 1; } } $string = $new_string . $this->hsc(substr($string, $start)); } else { $string = $this->hsc($string); } if ($check_linenumbers) { // Are line numbers used? If, we should end the string before // the newline and begin it again (so when <li>s are put in the source // remains XHTML compliant) // note to self: This opens up possibility of config files specifying // that languages can/cannot have multiline strings??? $string = str_replace("\n", "</span>\n<span{$string_attributes}>", $string); } $result .= "<span{$string_attributes}>" . $string . '</span>'; $string = ''; continue; } else { //Have a look for regexp comments if ($i == $next_comment_regexp_pos) { $COMMENT_MATCHED = true; $comment = $comment_regexp_cache_per_key[$next_comment_regexp_key]; $test_str = $this->hsc(substr($part, $i, $comment['length'])); //@todo If remove important do remove here if ($this->lexic_permissions['COMMENTS']['MULTI']) { if (!$this->use_classes) { $attributes = ' style="' . $this->language_data['STYLES']['COMMENTS'][$comment['key']] . '"'; } else { $attributes = ' class="co' . $comment['key'] . '"'; } $test_str = "<span{$attributes}>" . $test_str . "</span>"; // Short-cut through all the multiline code if ($check_linenumbers) { // strreplace to put close span and open span around multiline newlines $test_str = str_replace("\n", "</span>\n<span{$attributes}>", str_replace("\n ", "\n ", $test_str)); } } $i += $comment['length'] - 1; // parse the rest $result .= $this->parse_non_string_part($stuff_to_parse); $stuff_to_parse = ''; } // If we haven't matched a regexp comment, try multi-line comments if (!$COMMENT_MATCHED) { // Is this a multiline comment? if (!empty($this->language_data['COMMENT_MULTI']) && $next_comment_multi_pos < $i) { $next_comment_multi_pos = $length; foreach ($this->language_data['COMMENT_MULTI'] as $open => $close) { $match_i = false; if (isset($comment_multi_cache_per_key[$open]) && ($comment_multi_cache_per_key[$open] >= $i || $comment_multi_cache_per_key[$open] === false)) { // we have already matched something if ($comment_multi_cache_per_key[$open] === false) { // this comment is never matched continue; } $match_i = $comment_multi_cache_per_key[$open]; } elseif (($match_i = stripos($part, $open, $i)) !== false) { $comment_multi_cache_per_key[$open] = $match_i; } else { $comment_multi_cache_per_key[$open] = false; continue; } if ($match_i !== false && $match_i < $next_comment_multi_pos) { $next_comment_multi_pos = $match_i; $next_open_comment_multi = $open; if ($match_i === $i) { break; } } } } if ($i == $next_comment_multi_pos) { $open = $next_open_comment_multi; $close = $this->language_data['COMMENT_MULTI'][$open]; $open_strlen = strlen($open); $close_strlen = strlen($close); $COMMENT_MATCHED = true; $test_str_match = $open; //@todo If remove important do remove here if ($this->lexic_permissions['COMMENTS']['MULTI'] || $open == GESHI_START_IMPORTANT) { if ($open != GESHI_START_IMPORTANT) { if (!$this->use_classes) { $attributes = ' style="' . $this->language_data['STYLES']['COMMENTS']['MULTI'] . '"'; } else { $attributes = ' class="coMULTI"'; } $test_str = "<span{$attributes}>" . $this->hsc($open); } else { if (!$this->use_classes) { $attributes = ' style="' . $this->important_styles . '"'; } else { $attributes = ' class="imp"'; } // We don't include the start of the comment if it's an // "important" part $test_str = "<span{$attributes}>"; } } else { $test_str = $this->hsc($open); } $close_pos = strpos($part, $close, $i + $open_strlen); if ($close_pos === false) { $close_pos = $length; } // Short-cut through all the multiline code $rest_of_comment = $this->hsc(substr($part, $i + $open_strlen, $close_pos - $i - $open_strlen + $close_strlen)); if (($this->lexic_permissions['COMMENTS']['MULTI'] || $test_str_match == GESHI_START_IMPORTANT) && $check_linenumbers) { // strreplace to put close span and open span around multiline newlines $test_str .= str_replace("\n", "</span>\n<span{$attributes}>", str_replace("\n ", "\n ", $rest_of_comment)); } else { $test_str .= $rest_of_comment; } if ($this->lexic_permissions['COMMENTS']['MULTI'] || $test_str_match == GESHI_START_IMPORTANT) { $test_str .= '</span>'; } $i = $close_pos + $close_strlen - 1; // parse the rest $result .= $this->parse_non_string_part($stuff_to_parse); $stuff_to_parse = ''; } } // If we haven't matched a multiline comment, try single-line comments if (!$COMMENT_MATCHED) { // cache potential single line comment occurances if (!empty($this->language_data['COMMENT_SINGLE']) && $next_comment_single_pos < $i) { $next_comment_single_pos = $length; foreach ($this->language_data['COMMENT_SINGLE'] as $comment_key => $comment_mark) { $match_i = false; if (isset($comment_single_cache_per_key[$comment_key]) && ($comment_single_cache_per_key[$comment_key] >= $i || $comment_single_cache_per_key[$comment_key] === false)) { // we have already matched something if ($comment_single_cache_per_key[$comment_key] === false) { // this comment is never matched continue; } $match_i = $comment_single_cache_per_key[$comment_key]; } elseif ($this->language_data['CASE_SENSITIVE'][GESHI_COMMENTS] && ($match_i = stripos($part, $comment_mark, $i)) !== false || !$this->language_data['CASE_SENSITIVE'][GESHI_COMMENTS] && ($match_i = strpos($part, $comment_mark, $i)) !== false) { $comment_single_cache_per_key[$comment_key] = $match_i; } else { $comment_single_cache_per_key[$comment_key] = false; continue; } if ($match_i !== false && $match_i < $next_comment_single_pos) { $next_comment_single_pos = $match_i; $next_comment_single_key = $comment_key; if ($match_i === $i) { break; } } } } if ($next_comment_single_pos == $i) { $comment_key = $next_comment_single_key; $comment_mark = $this->language_data['COMMENT_SINGLE'][$comment_key]; $com_len = strlen($comment_mark); // This check will find special variables like $# in bash // or compiler directives of Delphi beginning {$ if ((empty($sc_disallowed_before) || $i == 0 || false === strpos($sc_disallowed_before, $part[$i - 1])) && (empty($sc_disallowed_after) || $length <= $i + $com_len || false === strpos($sc_disallowed_after, $part[$i + $com_len]))) { // this is a valid comment $COMMENT_MATCHED = true; if ($this->lexic_permissions['COMMENTS'][$comment_key]) { if (!$this->use_classes) { $attributes = ' style="' . $this->language_data['STYLES']['COMMENTS'][$comment_key] . '"'; } else { $attributes = ' class="co' . $comment_key . '"'; } $test_str = "<span{$attributes}>" . $this->hsc($this->change_case($comment_mark)); } else { $test_str = $this->hsc($comment_mark); } //Check if this comment is the last in the source $close_pos = strpos($part, "\n", $i); $oops = false; if ($close_pos === false) { $close_pos = $length; $oops = true; } $test_str .= $this->hsc(substr($part, $i + $com_len, $close_pos - $i - $com_len)); if ($this->lexic_permissions['COMMENTS'][$comment_key]) { $test_str .= "</span>"; } // Take into account that the comment might be the last in the source if (!$oops) { $test_str .= "\n"; } $i = $close_pos; // parse the rest $result .= $this->parse_non_string_part($stuff_to_parse); $stuff_to_parse = ''; } } } } // Where are we adding this char? if (!$COMMENT_MATCHED) { $stuff_to_parse .= $char; } else { $result .= $test_str; unset($test_str); $COMMENT_MATCHED = false; } } // Parse the last bit $result .= $this->parse_non_string_part($stuff_to_parse); $stuff_to_parse = ''; } else { $result .= $this->hsc($part); } // Close the <span> that surrounds the block if ($STRICTATTRS != '') { $result = str_replace("\n", "</span>\n<span{$STRICTATTRS}>", $result); $result .= '</span>'; } $endresult .= $result; unset($part, $parts[$key], $result); } //This fix is related to SF#1923020, but has to be applied regardless of //actually highlighting symbols. /** NOTE: memorypeak #3 */ $endresult = str_replace(array('<SEMI>', '<PIPE>'), array(';', '|'), $endresult); // // Parse the last stuff (redundant?) // $result .= $this->parse_non_string_part($stuff_to_parse); // Lop off the very first and last spaces // $result = substr($result, 1, -1); // We're finished: stop timing $this->set_time($start_time, microtime()); $this->finalise($endresult); return $endresult; }
/** * Handles debugging by printing a message according to current debug level, * mask of context and other things. * * @param string The message to print out * @param int The context in which this message is to be printed out in - see * the GESHI_DBG_* constants * @param boolean Whether to add a newline to the message * @param boolean Whether to return the count of errors or not * @access private */ function geshi_dbg($message, $add_nl = true) { // shortcut if (empty($message)) { if ($add_nl) { echo "\n"; } return; } // // Message can have the following symbols at start // // @b: bold // @i: italic // @o: ok (green colour) // @w: warn (yellow colour) // @e: err (red colour) $start = ''; $end = ''; if ($message[0] == '@') { $end = '</span>'; switch (substr($message, 0, 2)) { case '@b': $start = '<span style="font-weight:bold;">'; break; case '@i': $start = '<span style="font-style:italic;">'; break; case '@o': $start = '<span style="color:green;background-color:#efe;border:1px solid #393;">'; break; case '@w': $start = '<span style="color:#660;background-color:#ffe;border:1px solid #993;">'; break; case '@e': $start = '<span style="color:red;background-color:#fee;border:1px solid #933;">'; break; default: $end = ''; } if (!empty($end)) { $message = substr($message, 2); } } elseif (preg_match('#::(?:.*?)\\((?:.*?)\\)#s', $message)) { $start = '<span style="font-weight:bold;">'; $end = '</span>'; } if ($add_nl) { $end .= "\n"; } echo $start . GeSHi::hsc(str_replace("\n", '', $message)) . $end; }
/** * Implements parseToken to format the XML tags. * * @param string $token The token to put tags around * @param string $context_name The name of the context that the tag is in * @param array $data Miscellaneous data about the context * @return string The token wrapped in Pango markup * @todo [blocking 1.2.2] Make it so that CSS is optional */ function parseToken($token, $context_name, $data) { // Ignore blank tokens if ('' == $token || geshi_is_whitespace($token)) { return $token; } // Initialize the result variable $result = ''; $style = $this->_styler->getStyle($context_name); // Add the basic tag $result .= '<span '; $result .= self::_styleToAttributes($style); // Finish the opening tag $result .= '>'; // Now add in the token $result .= GeSHi::hsc($token); // Add the closing tag $result .= '</span>'; // Return the result return $result; }
/** * Initialize a GeSHiContext. We cannot put this function into the * constructor since $this cannot be overridden there (possibly needed for * subcontexts). * * @param GeSHiContext the context to initialize * @param string the context name * @param string init function * * @todo is $context_name not superflous with $context->_contextName? * @todo remove the "nice error message" and either push it into an external * function or better yet drop it overall and rely on sane developers * which use Xdebug or similar. No need to reinvent the wheel! */ static function _initContext(GeSHiContext &$context, $context_name, $init_function = '') { // Try a list of functions that should be used to populate this context. $tried_functions = array(); // First function to try is the user-defined one if ('' != $init_function) { $function = 'geshi_' . $context->_languageName . '_' . $init_function; if (function_exists($function)) { $function($context); $context->_initPostProcess(); return; } $tried_functions[] = $function; } // Next choice is the full context name function $function = 'geshi_' . str_replace('/', '_', $context_name); if (function_exists($function)) { $function($context); $context->_initPostProcess(); return; } $tried_functions[] = $function; // Next is the dialect shortcut function $function = 'geshi' . str_replace('/', '_', substr($context_name, strpos($context_name, '/'))); if (function_exists($function)) { $function($context); $context->_initPostProcess(); return; } $tried_functions[] = $function; // Final chance is the language shortcut function $root_language_name = $context->_languageName . "/" . $context->_languageName; if ($context_name != $root_language_name && "{$root_language_name}/" != substr($context_name, 0, strlen($root_language_name) + 1)) { $function = 'geshi_' . str_replace('/', '_', $context->_languageName . substr($context_name, strpos($context_name, '/', strpos($context_name, '/') + 1))); if (function_exists($function)) { $function($context); $context->_initPostProcess(); return; } $tried_functions[] = $function; } // If we are still inside this constructor then none of the functions // we have tried have been available to call. Time to raise an error. // This will in general only ever happen to developers building new // language files, so we can afford to take our time and build a nice // error message to help them debug it. // // If PHP version is greater that 4.3.0 then debug_backtrace // can give us a nice output of the error that occurs. This // code shamelessly ripped from libheart, which got it from // a comment on the php.net manual. if (function_exists('debug_backtrace')) { $backtrace = debug_backtrace(); $calls = array(); $backtrace_output = "<pre><strong>Call stack (most recent first):</strong>\n<ul>"; foreach ($backtrace as $bt) { // Set some defaults for debug values $bt['file'] = isset($bt['file']) ? $bt['file'] : 'Unknown'; $bt['line'] = isset($bt['line']) ? $bt['line'] : 0; $bt['class'] = isset($bt['class']) ? $bt['class'] : ''; $bt['type'] = isset($bt['type']) ? $bt['type'] : ''; $bt['args'] = isset($bt['args']) ? $bt['args'] : array(); $args = ''; foreach ($bt['args'] as $arg) { if (!empty($args)) { $args .= ', '; } switch (gettype($arg)) { case 'integer': case 'double': $args .= $arg; break; case 'string': $arg = substr($arg, 0, 64) . (isset($arg[64]) ? '...' : ''); $args .= '"' . $arg . '"'; break; case 'array': $args .= 'array(' . count($arg) . ')'; break; case 'object': $args .= 'object(' . get_class($arg) . ')'; break; case 'resource': $args .= 'resource(' . strstr($arg, '#') . ')'; break; case 'boolean': $args .= $arg ? 'true' : 'false'; break; case 'NULL': $args .= 'null'; break; default: $args .= 'unknown'; } } // Build a new entry for the output. $backtrace_output .= '<li>' . GeSHi::hsc($bt['class']) . '' . GeSHi::hsc($bt['type']) . '' . '' . GeSHi::hsc($bt['function']) . '' . '(' . GeSHi::hsc($args) . ') at ' . GeSHi::hsc($bt['file']) . ':' . $bt['line'] . "</li>"; } $backtrace_output .= '</ul></pre>'; } else { $backtrace_output = '[No backtrace available - debug_backtrace() ' . 'not available]'; } trigger_error("Could not find function for context {$context_name}\n" . 'looked for ' . implode(', ', $tried_functions) . "\n" . $backtrace_output, E_USER_ERROR); }
/** * Implements parseToken to put HTML tags around the tokens * * @param string $token The token to put tags around * @param string $context_name The name of the context that the tag is in * @param array $data Miscellaneous data about the context * @return string The token wrapped in the appropriate HTML */ function parseToken($token, $context_name, $data) { // ignore blank tokens if ('' == $token || geshi_is_whitespace($token)) { return $token; } $result = ''; if (isset($data['url'])) { // There's a URL associated with this token $result .= '<a href="' . GeSHi::hsc($data['url']) . '">'; } if (!isset($this->contextCSS[$context_name])) { $this->contextCSS[$context_name] = self::_styleToCSS($this->_styler->getStyle($context_name)); } $result .= '<span style="' . $this->contextCSS[$context_name] . '" '; $result .= 'title="' . GeSHi::hsc($context_name) . '">' . GeSHi::hsc($token) . '</span>'; if (isset($data['url'])) { // Finish the link $result .= '</a>'; } return $result; }