/** * Implements parseToken to format the XML tags. * It uses the syntax <token type="TYPE" link="URL">. * The URL is only there if specified. * * @param string $token The token to put tags around * @param string $context_name The name of the context that the tag is in * @param array $data Miscellaneous data about the context * @return string The token wrapped in XML * @todo [blocking 1.2.2] Make it so that CSS is optional */ function parseToken($token, $context_name, $data) { // Ignore blank tokens if ('' == $token || geshi_is_whitespace($token)) { return $token; } // Initialize the result variable $result = ''; // Add the basic tag $result .= '<token type="' . $context_name . '"'; // Check if we should use an URL if (isset($data['url'])) { // Hey, we got an URL! Yayy~ $result .= ' url="' . GeSHi::hsc($data['url']) . '"'; } // Are we gonna add in CSS? if ($this->_addCSS) { // Heh... $result .= ' css="' . $this->_styler->getStyle($context_name) . '"'; } // Finish the opening tag $result .= '>'; // Now add in the token $result .= '<![CDATA[' . $token . ']]>'; // Add the closing tag $result .= '</token>\\n'; // Return the result return $result; }
public function parseToken($token, $context_name, $data) { if (geshi_is_whitespace($token) || false !== strpos($context_name, 'comment')) { return array($token, $context_name, $data); } // make sure that the NEXT token after this is not a (, because // then we would be clobbering a function name $ctype = substr($context_name, strlen($this->_language) + 1); if ((',' == $this->_prevToken || '(' == $this->_prevToken) && in_array($ctype, array('type', 'keyword/nonreserved'))) { $context_name = $this->_language; } $this->_prevToken = $token; $this->_prevContextName = $context_name; $this->_prevData = $data; // needed? return array($token, $context_name, $data); }
/** * Implements parseToken to format the XML tags. * * @param string $token The token to put tags around * @param string $context_name The name of the context that the tag is in * @param array $data Miscellaneous data about the context * @return string The token wrapped in Pango markup * @todo [blocking 1.2.2] Make it so that CSS is optional */ function parseToken($token, $context_name, $data) { // Ignore blank tokens if ('' == $token || geshi_is_whitespace($token)) { return $token; } // Initialize the result variable $result = ''; $style = $this->_styler->getStyle($context_name); // Add the basic tag $result .= '<span '; $result .= self::_styleToAttributes($style); // Finish the opening tag $result .= '>'; // Now add in the token $result .= GeSHi::hsc($token); // Add the closing tag $result .= '</span>'; // Return the result return $result; }
/** * Implements parseToken to put HTML tags around the tokens * * @param string $token The token to put tags around * @param string $context_name The name of the context that the tag is in * @param array $data Miscellaneous data about the context * @return string The token wrapped in the appropriate HTML */ function parseToken($token, $context_name, $data) { // ignore blank tokens if ('' == $token || geshi_is_whitespace($token)) { return $token; } $result = ''; if (isset($data['url'])) { // There's a URL associated with this token $result .= '<a href="' . GeSHi::hsc($data['url']) . '">'; } if (!isset($this->contextCSS[$context_name])) { $this->contextCSS[$context_name] = self::_styleToCSS($this->_styler->getStyle($context_name)); } $result .= '<span style="' . $this->contextCSS[$context_name] . '" '; $result .= 'title="' . GeSHi::hsc($context_name) . '">' . GeSHi::hsc($token) . '</span>'; if (isset($data['url'])) { // Finish the link $result .= '</a>'; } return $result; }
/** * Implements parseToken to output tokens * * @param string $token The token to output * @param string $context_name The name of the context that the tag is in * @param array $data Miscellaneous data about the context * * @return string Debug string */ function parseToken($token, $context_name, $data) { static $counter = 0; $flags = ''; if ($token == '') { $flags .= 'E'; //empty } elseif (geshi_is_whitespace($token)) { $flags .= 'W'; //whitespace } else { $flags .= '-'; } $nSlashes = substr_count($context_name, '/'); $nPos = strrpos($context_name, '/'); if ($nPos === false) { $contextTail = $context_name; } else { $contextTail = substr($context_name, $nPos + 1); } $context = str_repeat(' ', $nSlashes) . $contextTail; return sprintf("%8d %1s %4d %-40s - %s\n", $counter++, $flags, strlen($token), $context_name, $this->outputTokens ? addcslashes($token, "..") : ''); }
/** * This method can either return an array like * this one is doing, or nothing (false), or an * array of arrays. This way, it can hold onto * data it needs for parsing * * @todo [blocking 1.1.5] delphi fixes: * - highlight default keyword if after ; in property context * - don't highlight functions if not before "(" brackets (alpha) */ function parseToken($token, $context_name, $data) { geshi_dbg('GeSHiDelphiCodeParser::parseToken("' . substr(str_replace("\n", '\\n', $token), 0, 15) . '"...,' . $context_name . ')'); //Check for linebraks... if (false !== strpos($token, "\n")) { $this->_semicolonFlag = false; $this->_instrExpected = true; } //Check if we got a whitespace if (geshi_is_whitespace($token)) { //If there's anything in the storage, simply add the whitespace if ($this->_stack) { $this->push($token, $context_name, $data); return array(); } else { //Return the token as is ... return $this->flush($token, $context_name, $data); } } $token_l = strtolower(trim($token)); // @todo for ben: here is an example of how this could work. You can make it better and // experiment with how this functionality works. I tested this only on simple examples, and // I know that currently the _defaultFlag could be reset to 0 earlier than it is if there is // a mistake with parsing. if (2 == $this->_defaultFlag) { if ('default' == $token_l) { $context_name = $this->_language . '/keyword'; $this->_defaultFlag = 0; } elseif ('' != trim($token)) { $this->_defaultFlag = 0; } } // @todo for ben: I don't think alias_name is set anymore, maybe you want to check // that this functionality works now? if (0 == $this->_defaultFlag && isset($data['alias_name']) && $data['alias_name'] == $this->_language . '/property') { $this->_defaultFlag = 1; } if (1 == $this->_defaultFlag && ';' == trim($token)) { $this->_defaultFlag = 2; } // @todo for ben: now symbols are handed in one at a time, maybe this can be optimised? if ($context_name == $this->_language . '/brksym') { geshi_dbg('Detected bracket symbol context ...'); for ($t2 = 0; $t2 < strlen($token); $t2++) { $t2sub = substr($token, $t2, 1); // Count opening and closing brackets to avoid highlighting of parameters called register in procedure\function declarations if ('(' == $t2sub || '[' == $t2sub) { geshi_dbg('Detected opening bracket "' . $t2sub . '" on level BC' . $this->_bracketCount . '\\OBC' . $this->_openBlockCount . '...'); $this->_bracketCount++; } if (')' == $t2sub || ']' == $t2sub) { if (--$this->_bracketCount < 0) { $this->_bracketCount = 0; } geshi_dbg('Detected closing bracket "' . $t2sub . '" on level BC' . $this->_bracketCount . '\\OBC' . $this->_openBlockCount . '...'); } } } if (!stripos($context_name, 'comment')) { if (in_array($token_l, array('begin', 'case', 'class', 'object', 'record', 'try', 'asm'))) { geshi_dbg('Detected opening block "' . $token_l . '" on level BC' . $this->_bracketCount . '\\OBC' . $this->_openBlockCount . '...' . stripos($context_name, 'comment')); $this->_openBlockCount++; $this->_openBlockType[] = $token_l; if (2 <= ($obc = $this->_openBlockCount)) { //Check if we have a casxe statement inside a record definition. if ('record' == $this->_openBlockType[$obc - 2] && 'case' == $this->_openBlockType[$obc - 1]) { array_pop($this->_openBlockType); $this->_openBlockCount--; } } $this->_instrExpected = true; $this->_inASMBlock = true; } if ('end' == $token_l) { if (--$this->_openBlockCount < 0) { $this->_openBlockCount = 0; } array_pop($this->_openBlockType); geshi_dbg('Detected closing block "' . $token_l . '" on level BC' . $this->_bracketCount . '\\OBC' . $this->_openBlockCount . '...' . stripos($context_name, 'comment')); if ($this->_inASMBlock) { $this->_inASMBlock = true; } } } if ($this->_inASMBlock && strpos($this->_language, 'delphi/asm')) { if ($this->_instrExpected) { $this->_instrExpected = false; } else { if (in_array($token_l, array('and', 'not', 'or', 'shl', 'shr', 'xor'))) { $context_name = $this->_language . '/asm/keyop'; } } if ($token_l == ';') { $this->_instrExpected = true; } } // If we detect a semicolon we require remembering it, thus we can highlight the register directive correctly. if ($context_name == $this->_language && $this->_semicolonFlag) { geshi_dbg('Detected token ' . $token . ' after semi-colon on level BC' . $this->_bracketCount . '\\OBC' . $this->_openBlockCount . '...'); // Register is a directive here $this->_semicolonFlag = false; // Highlight as directive only if all previous opened brackets are closed again $isDirective = 0 == $this->_bracketCount; if ('register' == $token_l) { if (1 == $this->_openBlockCount) { $isDirective &= 'class' == $this->_openBlockType[$this->_openBlockCount - 1] || 'object' == $this->_openBlockType[$this->_openBlockCount - 1]; if ('record' == $this->_openBlockType[$this->_openBlockCount - 1]) { $isDirective = true; } } $context_name .= $isDirective ? '/keyword' : ''; } elseif ('message' == $token_l) { if (1 == $this->_openBlockCount) { $isDirective &= 'class' == $this->_openBlockType[$this->_openBlockCount - 1]; } $context_name .= $isDirective ? '/keyword' : ''; } } // There will be something else than a semicolon, so we finish semicolon detection here $this->_semicolonFlag = false; if ($token_l == ';') { $this->_semicolonFlag = true; } if ($this->_stack) { // Check for various conditions ... // If we have a store we can check now to see if the current token is a bracket if ($context_name != $this->_language . '/brksym' || substr(trim($token), 0, 1) != '(') { // Modify context to say that the keyword is actually just a bareword $this->_stack[0][1] = $this->_language; } //return $this->_stackFlush($token, $context_name, $data); return $this->flush($token, $context_name, $data); } // If we detected a keyword, instead of passing it back we will make sure it has a bracket // after it, so we know for sure that it is a keyword. So we save it to "_store" and return false if (substr($context_name, 0, strlen($this->_language . '/stdproc')) == $this->_language . '/stdproc') { $this->push($token, $context_name, $data); return array(); } // Default action: just return the token (including all stored) return $this->flush($token, $context_name, $data); }
/** * Implements parseToken to put ANSI codes around the tokens * * @param string $token The token to highlight * @param string $context_name The name of the context that the tag is in * @param array $data Miscellaneous data about the context * * @return string The token wrapped in ANSI codes */ public function parseToken($token, $context_name, $data) { // ignore blank tokens if ('' == $token || geshi_is_whitespace($token)) { return $token; } $code = $this->getAnsiCode($context_name); if ($code !== null) { return $code . $token . $this->resetCode; } else { return $token; } }
/** * Handles any parsing that uses the stack. * * The stack is used to find function calls (note: different to function definitions) * and also to find static classes that haven't otherwise been detected. * * @param string $token The source token * @param string $context_name The context of the source token * @param array $data Additional data * @return mixed As for the return value of {@link parseToken()} * @todo [blocking 1.1.5] this method could take a fourth parameter to say whether to * detect just function calls, classnames or both, when it comes to configuring what * the code parser actually highlights. */ function _handleStackParsing(&$token, &$context_name, &$data) { if (!$this->_stack) { if ($this->_language == $context_name && !$this->_tokenIsWhitespace) { $this->push($token, $context_name, $data); return array(); } // Some other random token that we don't care about return array($token, $context_name, $data); } else { if ($this->_language . '/symbol' == $context_name) { // Worth pointing out: the object splitter is :: and is forced to be // in the symbol context so that's why we check for :: instead of :. // Incidentally, that's actually quite a nice side effect of OO support. if ('(' == $token || '::' == $token) { // Change the last non-whitespace token to be a user function or static // class as depending on $token for ($i = count($this->_stack) - 1; $i >= 0; $i--) { // If the token is not whitespace, we have found the one place where // a function call could be if (!geshi_is_whitespace($this->_stack[$i][0])) { // If the token is a bareword then we convert it to a function call if ($this->_language == $this->_stack[$i][1]) { $this->_stack[$i][1] = $this->_stack[$i][1] . ('(' == $token ? '/functioncall' : '/classname'); // Add the token to the list of class names if it is one if ('::' == $token && !in_array($this->_stack[$i][1], $this->_classNames)) { $this->_classNames[] = $this->_stack[$i][1]; } } break; } } } // Add the symbol onto the stack and return return $this->flush($token, $context_name, $data); } else { // Store this token (either whitespace or a & symbol) on our mini-stack $this->push($token, $context_name, $data); return array(); } } }
/** * Implements parseToken to put ANSI codes around the tokens * * @param string $token The token to highlight * @param string $context_name The name of the context that the tag is in * @param array $data Miscellaneous data about the context * * @return string The token wrapped in ANSI codes */ public function parseToken($token, $context_name, $data) { // ignore blank tokens if ('' == $token || geshi_is_whitespace($token)) { $num = ord(strpos($token, -1)); if ($num == 10) { return $token; } return $token . "\\c\n"; } list($color, $bold) = $this->getAnsiCode($context_name); $text = str_replace('\\', '\\e', $token); $prefix = ''; if ($bold) { $prefix .= '.B '; } elseif ($token[0] == '.' || $token[0] == '\'') { //zero-width space $prefix .= '\\&'; } if ($color == 'black') { $retval = $prefix . $text . "\\c\n"; } else { $retval = '.gcolor ' . $color . "\n" . $prefix . $text . "\\c\n" . ".gcolor\n"; } return $retval; }
function parseToken($token, $context_name, $data) { $flush_stdhdr = false; $ret = array(&$token, &$context_name, &$data); // Detect the start of a preprocessor directive and set state. if (($context_name == $this->_language . '/preprocessor/start' || $context_name == $this->_language . '/preprocessor/directive') && $this->_state == GESHI_C_NORMAL) { $this->_state = GESHI_C_PPSTART; $this->_initial_hash = false; $this->_hltIfElifPPkeyWords = false; } $skipfirst = false; // Highlight and link preprocessor directives; set preliminary state. if ($context_name == $this->_language . '/preprocessor/end') { $this->_state = GESHI_C_NORMAL; } elseif ($this->_state == GESHI_C_PPSTART) { if ($token == '#') { $this->_initial_hash = true; } elseif (in_array($token, geshi_c_get_start_of_line_PP_directives_hashsym())) { $skipfirst = true; $data['url'] = geshi_c_get_start_of_line_PP_directives_hashsym_url($token); $this->_state = $token == 'include' ? GESHI_C_PPINCLUDE : ($token == 'error' ? GESHI_C_PPERROR : ($token == 'pragma' ? GESHI_C_PPPRAGMA : ($token == 'line' ? GESHI_C_PPLINE : GESHI_C_PP))); $this->_hltIfElifPPkeyWords = in_array($token, geshi_c_get_if_elif_PP_directives()); $context_name = $this->_language . '/preprocessor/directive'; } elseif (in_array($token, geshi_c_get_start_of_line_PP_directives_nohashsym()) && !$this->_initial_hash) { $skipfirst = true; $data['url'] = geshi_c_get_start_of_line_PP_directives_nohashsym_url($token); $context_name = $this->_language . '/preprocessor/directive'; $this->_state = $token == '_Pragma' ? GESHI_C_PPPRAGMA : GESHI_C_PP; } elseif ($this->_initial_hash && !geshi_is_whitespace($token) && $token != '\\') { $data['url'] = geshi_c_get_non_std_preproc_directives_url(); $context_name = $this->_language . '/preprocessor/directive'; $this->_state = GESHI_C_PPNONSTD; } } // Perform highlighting/context adjustments based on preliminary state; // adjust state as required. switch ($this->_state) { case GESHI_C_PPPRAGMA: case GESHI_C_PPERROR: // Remove from #pragma and #error directives the highlighting of // all sub-contexts except comments and the directives themselves $start = $this->_language . '/preprocessor/'; $start_len = strlen($start); if (substr($context_name, 0, $start_len) == $start) { $sub_context = substr($context_name, $start_len); if (!in_array($sub_context, array('directive', 'multi_comment', 'multi_comment/start', 'multi_comment/end', 'single_comment', 'single_comment/start'))) { $context_name = substr($start, 0, -1); $data['url'] = null; } } break; case GESHI_C_PPNONSTD: // Mark everything following a non-standard preprocessor directive; // also mark the directive itself. $context_name = $this->_language . '/preprocessor/nonstd'; break; case GESHI_C_PPINCLUDE: // Highlight and link standard headers; also concatenate tokenised // header names into a single token to remove symbol contexts. if ($token[0] == '<') { $this->_state = GESHI_C_PPHDRSTART; // special-case handling for e.g. </dir/file.h> where "</" will // be tokenised as a single symbol $this->_provisional_hdr = substr($token, 1); $token = '<'; $context_name = $this->_language . '/preprocessor/symbol/stdinclude'; } elseif ($token == '"') { // Override the highlighting as a string literal of // "headername.h" in #include "headername.h" (mostly to avoid // the highlighting of escape sequences such as \n and \t - // they aren't defined in that context) $this->_state = GESHI_C_PPIMPLHDRSTART; $context_name = $this->_language . 'preprocessor/implheader'; } elseif (trim($token) != '' && !$skipfirst) { // String literals and < and > characters could occur as part of // a macro that's substituting for the header that follows // #include, so allow their highlighting in that context. $this->_state = GESHI_C_PPINCLUDEMACROSTART; // Don't highlight the macro name itself though; an exception // can be made of any standard macros that resolve to a quoted // string (TODO). $context_name = $this->_language . '/preprocessor'; $data['url'] = null; } break; case GESHI_C_PPLINE: if (trim($token) != '' && !$skipfirst) { // Avoid highlighting when a macro substitutes for the #line // line number (optionally +filename) in the same way as when a // macro substitutes for a #include header. if ($context_name != $this->_language . '/preprocessor/num/int') { $this->_state = GESHI_C_PPINCLUDEMACROSTART; $context_name = $this->_language . '/preprocessor'; $data['url'] = null; } else { $this->_state = GESHI_C_PP; } } break; case GESHI_C_PPINCLUDEMACROSTART: if ($token == '(') { $this->_state = GESHI_C_PPINCLUDEMACRO; } break; case GESHI_C_PPINCLUDEMACRO: // Per the comments in c.php, restrict highlighting for the // arguments of macros that substitute for a #include's header or a // #line's line number (and filename). if ($context_name == $this->_language . '/preprocessor/declarator-keyword' || $context_name == $this->_language . '/preprocessor/ctlflow-keyword') { $context_name = $this->_language . '/preprocessor'; $data['url'] = null; } break; case GESHI_C_PPHDRSTART: if ($token == '>') { $context_name = $this->_language . '/preprocessor/symbol/stdinclude'; if (in_array($this->_provisional_hdr, geshi_c_get_standard_headers())) { $flush_stdhdr = 'STDLINK'; } else { $flush_stdhdr = true; } $this->_state = GESHI_C_PP; } else { $this->_provisional_hdr .= $token; $ret = false; } break; case GESHI_C_PPIMPLHDRSTART: $context_name = $this->_language . 'preprocessor/include/implheader'; if ($token == '"') { $this->_state = GESHI_C_PP; } break; } if ($flush_stdhdr) { $hdrtoken[0] = $this->_provisional_hdr; $hdrtoken[1] = $this->_language . '/preprocessor/include'; if ($flush_stdhdr === 'STDLINK') { $hdrtoken[1] .= '/stdheader'; $hdrtoken[2]['url'] = geshi_c_get_standard_headers_url($this->_provisional_hdr); } else { $hdrtoken[1] .= '/nonstdheader'; // consider: would it be appropriate to instead implement and // call a geshi_c_get_non_std_stdheader_url()? $hdrtoken[2]['url'] = null; } $tmp = array($hdrtoken, $ret); $ret = $tmp; $this->_provisional_hdr = ''; // redundant but included for safety } // // Highlight and link sub-directives that can only occur within a #if or // #elif preprocessor directive (i.e. "defined"). // if ($this->_state & GESHI_C_PP && !$skipfirst && $this->_hltIfElifPPkeyWords) { if (in_array($token, geshi_c_get_if_elif_PP_subdirectives())) { $data['url'] = geshi_c_get_if_elif_PP_subdirectives_url($token); $context_name = 'c/c/preprocessor/directive'; } } // // Now we look at the data we have from looking at the source before // parsing began. This data tells us where "\\\n" occurs in the // source. We took them out then so that the parser could highlight // everything normally, but now we need to put them back in. // // The possibility that sometimes $ret may contain an array of tokens // complicates things somewhat, but hopefully this code is portable // enough to handle that. // // Firstly: if we are storing data for later, return if (false === $ret) { return array(); } // Check that there is a location to search for. Code below removes // occurrences as they happen so once there are no more occurrences // this will never execute. if (isset($this->_escapedNewlineLocations[0])) { $result = array(); $location = $this->_parseLocation; // Get a copy of the token (not a reference) that we can clobber as // we please, and put into the "array of tokens" form. if (!is_array($ret[0])) { $thetokens = array($ret); $length = strlen($thetokens[0][0]); } else { // This makes the assumption that returned array of arrays only // has two elements. This is true for the code parser as of // 2006/06/17, if it is not true in the future then a more // portable length gathering loop will have to be written $thetokens = $ret; $length = strlen($thetokens[0][0]) + strlen($thetokens[1][0]); } foreach ($thetokens as $eachtoken) { $sublocation = 0; // Check to see if the next occurrence happens inside this // token (the while loop allows the check to be done multiple // times for the same token) while (isset($this->_escapedNewlineLocations[0]) && $location + strlen($eachtoken[0]) - $sublocation >= $this->_escapedNewlineLocations[0]) { // If inside the loop then we found an occurrence, this // gets the position of the occurrence inside the current // token. $pos = array_shift($this->_escapedNewlineLocations) - $location; // Store the part before the occurrence $result[] = array(substr($eachtoken[0], $sublocation, $pos), $eachtoken[1], $eachtoken[2]); // Store the occurrence $result[] = array("\\\n", $this->_language . '/symbol/line-continuation', array()); // Do some fancy math: // - the base location ($location) has increased // - the location inside the token ($sublocation) has // increased // - the length of the string has increased because of // the occurrence $location += $pos + 2; $sublocation += $pos; $length += 2; } // Now we add what ever is left after adding occurrences. This // may be the entire token if no occurrences happened inside it. $result[] = array(substr($eachtoken[0], $sublocation), $eachtoken[1], $eachtoken[2]); } // Increment where we are up to $this->_parseLocation += $length; return $result; } // No fancy "\\\n" replacement happening (note at this point we have // given up incrementing the parse location because it's no longer // needed). return $ret; }
function parseToken($token, $context_name, $data) { // Ignore whitespace. We put it on the store for flushing later if (geshi_is_whitespace($token)) { $this->push($token, $context_name, $data); return array(); } // Ignore doxygen if ('doxygen/doxygen' == substr($context_name, 0, 15)) { $this->push($token, $context_name, $data); return array(); } $flush = false; // Easy things first if ($this->_language == $context_name) { if (in_array($token, $this->_variableNames)) { // Variables $context_name = $this->_language . '/variable'; $flush = true; } elseif (in_array($token, $this->_classNames)) { // Class Names $context_name = $this->_language . '/class_name'; $flush = true; } elseif (in_array($token, $this->_interfaceNames)) { // Interfaces $context_name = $this->_language . '/interface'; $flush = true; } elseif (in_array($token, $this->_enumValueNames)) { // Enum Values $context_name = $this->_language . '/enum_value'; $flush = true; } elseif (in_array($token, $this->_exceptionNames)) { // Exception Names $context_name = $this->_language . '/exception'; $flush = true; } elseif (in_array($token, $this->_annotationNames)) { // Annotation names $context_name = $this->_language . '/annotation'; $flush = true; } } //Check for all important language features $this->packageImportCheck($token, $context_name); $this->staticClassCheck($token, $context_name); $this->abstractStaticCheck($token, $context_name); $this->classCheck($token, $context_name); $this->exceptionCheck($token, $context_name); $this->enumCheck($token, $context_name); $this->variableCheck($token, $context_name); $this->methodCheck($token, $context_name); $this->genericCheck($token, $context_name); $this->annotationCheck($token, $context_name); $this->push($token, $context_name, $data); // Keep references to the previous data $i = count($this->_stack) - 1; $this->_prev_token =& $this->_stack[$i][0]; $this->_prev_context =& $this->_stack[$i][1]; $this->_prev_data =& $this->_stack[$i][2]; // And data just before that // If $i, i.e. if count($this->_store) - 1, which if > 0 means there is still one // more element at least at position 0 for ($j = $i - 1; $j > 0; $j--) { if (!geshi_is_whitespace($this->_stack[$j][0])) { $this->_prev_prev_token =& $this->_stack[$j][0]; $this->_prev_prev_context =& $this->_stack[$j][1]; $this->_prev_prev_data =& $this->_stack[$j][2]; break; } } if ($flush) { return $this->flush(); } return array(); }
/** * Parses the given code */ function parseCode(&$code, $context_start_key = -1, $context_start_delimiter = '', $ignore_context = '', $first_char_of_next_context = '') { geshi_dbg('*** GeSHiContext::parseCode(' . $this->_contextName . ') ***'); geshi_dbg('CODE: ' . str_replace("\n", "\r", substr($code, 0, 100)) . "<<<<<\n"); if ($context_start_delimiter) { geshi_dbg('Delimiter: ' . $context_start_delimiter); } // Skip empty/almost empty contexts if ('' == $code || geshi_is_whitespace($code)) { $this->_addParseData($code); return; } // Add the start of this context to the parse data if it is already known // NOTE: related to bug 75: if remove childLanguage check, then the // start delimiter is marked as lang/dialect/start instead of whatever the // language would have marked it as. // This means that, for example with doxygen, beginning // doxygen within java means that the doxygen starter // is parsed as doxygen code. I guess that is reasonable // and the intended thing for GESHI_CHILD_PARSE_LEFT/BOTH // // NOTE: say we use GESHI_CHILD_PARSE_RIGHT for doxygen delimiter. // Then the left delimiter will be parsed as java/java/multi_comment_start // then the doxygen, then the ender for doxygen. But the multi_comment // will end immediately. I don't think this is a bug, it's more of a caveat. // I think this happens for embedded languages also. if ($context_start_delimiter && !$this->_isChildLanguage) { $this->_addParseDataStart($context_start_delimiter); $code = substr($code, strlen($context_start_delimiter)); } $original_length = strlen($code); while ('' != $code) { $code_len = strlen($code); if ($code_len != $original_length) { geshi_dbg('CODE: ' . str_replace("\n", "\r", substr($code, 0, 100)) . "<<<<<\n"); } // Second parameter: if we are at the start of the context or not // Pass the ignored context so it can be properly ignored $earliest_context_data = $this->_getEarliestContextData($code, $code_len == $original_length, $ignore_context); $finish_data = $this->_getContextEndData($code, $context_start_key, $context_start_delimiter, $code_len == $original_length); geshi_dbg('@bEarliest context data: pos=' . $earliest_context_data['pos'] . ', len=' . $earliest_context_data['len']); geshi_dbg('@bFinish data: pos=' . $finish_data['pos'] . ', len=' . $finish_data['len']); // If there is earliest context data we parse up to it then hand control to that context if ($earliest_context_data) { if ($finish_data) { // Merge to work out who wins if ($finish_data['pos'] <= $earliest_context_data['pos']) { geshi_dbg('Earliest context and Finish data: finish is closer'); if ($this->shouldParseEnder() && $this->_isChildLanguage) { $finish_data['pos'] += $finish_data['len']; } // Add the parse data $this->_addParseData(substr($code, 0, $finish_data['pos']), substr($code, $finish_data['pos'], 1)); // If we should pass the ender, add the parse data if ($this->shouldParseEnder() && !$this->_isChildLanguage) { $this->_addParseDataEnd(substr($code, $finish_data['pos'], $finish_data['len'])); $finish_data['pos'] += $finish_data['len']; } // Trim the code and return the unparsed delimiter $code = substr($code, $finish_data['pos']); return $finish_data['dlm']; } else { geshi_dbg('Earliest and finish data, but earliest gets priority'); $foo = true; } } else { $foo = true; /** no finish data */ } if (isset($foo)) { geshi_dbg('Earliest data but not finish data'); } // Highlight up to delimiter ///The "+ len" can be manipulated to do starter and ender data if (!$earliest_context_data['con']->shouldParseStarter()) { $earliest_context_data['pos'] += $earliest_context_data['len']; //BUGFIX: null out dlm so it doesn't squash the actual rest of context $earliest_context_data['dlm'] = ''; } // We should parseCode() the substring. // BUT we have to remember that we should ignore the child context we've matched, // else we'll have a wee recursion problem on our hands... $tmp = substr($code, 0, $earliest_context_data['pos']); $this->parseCode($tmp, -1, '', $earliest_context_data['con']->name(), $code[$earliest_context_data['pos']]); // parse with no starter $code = substr($code, $earliest_context_data['pos']); $ender = $earliest_context_data['con']->parseCode($code, $earliest_context_data['key'], $earliest_context_data['dlm']); // check that the earliest context actually wants the ender if (!$earliest_context_data['con']->shouldParseEnder() && $earliest_context_data['dlm'] == $ender) { geshi_dbg('earliest_context_data[dlm]=' . $earliest_context_data['dlm'] . ', ender=' . $ender); // second param = first char of next context $ender_len = strlen($ender); $this->_addParseData(substr($code, 0, $ender_len), $code[$ender_len]); $code = substr($code, $ender_len); } } else { if ($finish_data) { // finish early... geshi_dbg('No earliest data but finish data'); if ($this->shouldParseEnder() && $this->_isChildLanguage) { $finish_data['pos'] += $finish_data['len']; } // second param = first char of next context $this->_addParseData(substr($code, 0, $finish_data['pos']), $code[$finish_data['pos']]); if ($this->shouldParseEnder() && !$this->_isChildLanguage) { $this->_addParseDataEnd(substr($code, $finish_data['pos'], $finish_data['len'])); $finish_data['pos'] += $finish_data['len']; } $code = substr($code, $finish_data['pos']); // return the length for use above return $finish_data['dlm']; } else { geshi_dbg('No earliest or finish data'); // All remaining code is in this context $this->_addParseData($code, $first_char_of_next_context); $code = ''; return; // not really needed (?) } } } }
function parseToken($token, $context_name, $data) { if (geshi_is_whitespace($token)) { if ($this->_last_token) { $this->push($token, $context_name, $data); return array(); } else { return array($token, $context_name, $data); } } $result = false; if ($this->_last_token) { // we have a previous token which we couldnt figure out. maybe now we have enough information if ($token == ':') { // after a token we look at the parse context to see if we are in a 'tag' section $result = $this->flush(); switch ($this->_parse_context) { case GESHI_EIFFEL_TAG_BEFORE_COLON: $result[0][1] .= '/tagname'; break; case GESHI_EIFFEL_FEATURE_BEFORE_COLON: case GESHI_EIFFEL_UNKNOWN: $result[0][1] .= '/featurename'; break; } $result[] = array($token, $context_name, $data); } else { // here we can still have an attribute name, feature name or a feature call. // we default to feature name $result = $this->flush(); $result[0][1] .= '/featurename'; $result[] = array($token, $context_name, $data); } $this->_last_token = NULL; } else { // no token on the stack. check if current token is properly categorized if ($context_name == 'eiffel/eiffel') { // token has no specific context. we need to figure out one if (preg_match('#^[A-Z_]+$#', $token)) { // maybe its a class name // this works as long as the highlighted code follows standard naming conventions // but it could also be a generic parameter which would be nice // to format differently (this needs a stateful parser) $context_name = $context_name . '/classname'; $result = array($token, $context_name, $data); } else { // we save the token and see if we can decide better when we see the next token $this->push($token, $context_name, $data); $this->_last_token = $token; $result = false; } } elseif ($context_name == 'eiffel/eiffel/comment/classname') { // we have a classname in a comment. maybe we can provide a link // todo: check against default class names and add link data $result = array($token, $context_name, $data); } elseif ($context_name == 'eiffel/eiffel/keyword') { // we check if the parse context changes because of the keyword switch ($token) { case 'indexing': case 'note': case 'require': case 'ensure': case 'invariant': case 'check': $this->_parse_context = GESHI_EIFFEL_TAG_BEFORE_COLON; break; case 'local': case 'agent': case 'create': case 'feature': $this->_parse_context = GESHI_EIFFEL_FEATURE_BEFORE_COLON; break; case 'end': case 'do': case 'once': $this->_parse_context = GESHI_EIFFEL_UNKNOWN; break; } $result = array($token, $context_name, $data); } else { // normal token $result = array($token, $context_name, $data); } } if ($result === false) { return array(); } return $result; }