/** * This method can either return an array like * this one is doing, or nothing (false), or an * array of arrays. This way, it can hold onto * data it needs for parsing * * @todo [blocking 1.1.5] delphi fixes: * - highlight default keyword if after ; in property context * - don't highlight functions if not before "(" brackets (alpha) */ function parseToken($token, $context_name, $data) { geshi_dbg('GeSHiDelphiCodeParser::parseToken("' . substr(str_replace("\n", '\\n', $token), 0, 15) . '"...,' . $context_name . ')'); //Check for linebraks... if (false !== strpos($token, "\n")) { $this->_semicolonFlag = false; $this->_instrExpected = true; } //Check if we got a whitespace if (geshi_is_whitespace($token)) { //If there's anything in the storage, simply add the whitespace if ($this->_stack) { $this->push($token, $context_name, $data); return array(); } else { //Return the token as is ... return $this->flush($token, $context_name, $data); } } $token_l = strtolower(trim($token)); // @todo for ben: here is an example of how this could work. You can make it better and // experiment with how this functionality works. I tested this only on simple examples, and // I know that currently the _defaultFlag could be reset to 0 earlier than it is if there is // a mistake with parsing. if (2 == $this->_defaultFlag) { if ('default' == $token_l) { $context_name = $this->_language . '/keyword'; $this->_defaultFlag = 0; } elseif ('' != trim($token)) { $this->_defaultFlag = 0; } } // @todo for ben: I don't think alias_name is set anymore, maybe you want to check // that this functionality works now? if (0 == $this->_defaultFlag && isset($data['alias_name']) && $data['alias_name'] == $this->_language . '/property') { $this->_defaultFlag = 1; } if (1 == $this->_defaultFlag && ';' == trim($token)) { $this->_defaultFlag = 2; } // @todo for ben: now symbols are handed in one at a time, maybe this can be optimised? if ($context_name == $this->_language . '/brksym') { geshi_dbg('Detected bracket symbol context ...'); for ($t2 = 0; $t2 < strlen($token); $t2++) { $t2sub = substr($token, $t2, 1); // Count opening and closing brackets to avoid highlighting of parameters called register in procedure\function declarations if ('(' == $t2sub || '[' == $t2sub) { geshi_dbg('Detected opening bracket "' . $t2sub . '" on level BC' . $this->_bracketCount . '\\OBC' . $this->_openBlockCount . '...'); $this->_bracketCount++; } if (')' == $t2sub || ']' == $t2sub) { if (--$this->_bracketCount < 0) { $this->_bracketCount = 0; } geshi_dbg('Detected closing bracket "' . $t2sub . '" on level BC' . $this->_bracketCount . '\\OBC' . $this->_openBlockCount . '...'); } } } if (!stripos($context_name, 'comment')) { if (in_array($token_l, array('begin', 'case', 'class', 'object', 'record', 'try', 'asm'))) { geshi_dbg('Detected opening block "' . $token_l . '" on level BC' . $this->_bracketCount . '\\OBC' . $this->_openBlockCount . '...' . stripos($context_name, 'comment')); $this->_openBlockCount++; $this->_openBlockType[] = $token_l; if (2 <= ($obc = $this->_openBlockCount)) { //Check if we have a casxe statement inside a record definition. if ('record' == $this->_openBlockType[$obc - 2] && 'case' == $this->_openBlockType[$obc - 1]) { array_pop($this->_openBlockType); $this->_openBlockCount--; } } $this->_instrExpected = true; $this->_inASMBlock = true; } if ('end' == $token_l) { if (--$this->_openBlockCount < 0) { $this->_openBlockCount = 0; } array_pop($this->_openBlockType); geshi_dbg('Detected closing block "' . $token_l . '" on level BC' . $this->_bracketCount . '\\OBC' . $this->_openBlockCount . '...' . stripos($context_name, 'comment')); if ($this->_inASMBlock) { $this->_inASMBlock = true; } } } if ($this->_inASMBlock && strpos($this->_language, 'delphi/asm')) { if ($this->_instrExpected) { $this->_instrExpected = false; } else { if (in_array($token_l, array('and', 'not', 'or', 'shl', 'shr', 'xor'))) { $context_name = $this->_language . '/asm/keyop'; } } if ($token_l == ';') { $this->_instrExpected = true; } } // If we detect a semicolon we require remembering it, thus we can highlight the register directive correctly. if ($context_name == $this->_language && $this->_semicolonFlag) { geshi_dbg('Detected token ' . $token . ' after semi-colon on level BC' . $this->_bracketCount . '\\OBC' . $this->_openBlockCount . '...'); // Register is a directive here $this->_semicolonFlag = false; // Highlight as directive only if all previous opened brackets are closed again $isDirective = 0 == $this->_bracketCount; if ('register' == $token_l) { if (1 == $this->_openBlockCount) { $isDirective &= 'class' == $this->_openBlockType[$this->_openBlockCount - 1] || 'object' == $this->_openBlockType[$this->_openBlockCount - 1]; if ('record' == $this->_openBlockType[$this->_openBlockCount - 1]) { $isDirective = true; } } $context_name .= $isDirective ? '/keyword' : ''; } elseif ('message' == $token_l) { if (1 == $this->_openBlockCount) { $isDirective &= 'class' == $this->_openBlockType[$this->_openBlockCount - 1]; } $context_name .= $isDirective ? '/keyword' : ''; } } // There will be something else than a semicolon, so we finish semicolon detection here $this->_semicolonFlag = false; if ($token_l == ';') { $this->_semicolonFlag = true; } if ($this->_stack) { // Check for various conditions ... // If we have a store we can check now to see if the current token is a bracket if ($context_name != $this->_language . '/brksym' || substr(trim($token), 0, 1) != '(') { // Modify context to say that the keyword is actually just a bareword $this->_stack[0][1] = $this->_language; } //return $this->_stackFlush($token, $context_name, $data); return $this->flush($token, $context_name, $data); } // If we detected a keyword, instead of passing it back we will make sure it has a bracket // after it, so we know for sure that it is a keyword. So we save it to "_store" and return false if (substr($context_name, 0, strlen($this->_language . '/stdproc')) == $this->_language . '/stdproc') { $this->push($token, $context_name, $data); return array(); } // Default action: just return the token (including all stored) return $this->flush($token, $context_name, $data); }
/** * Checks whether the character(s) at the start of the parameter string are * characters that should be escaped. * * @param string The string to check the beginning of for escape characters * @return int|false The length of the escape character sequence, else false */ function _shouldBeEscaped($code, $chars_to_escape) { geshi_dbg('Checking: ' . substr($code, 0, 15)); foreach ($chars_to_escape as $match) { if ('REGEX' != substr($match, 0, 5)) { geshi_dbg('Test: ' . $match); if (substr($code, 0, 1) == $match) { return 1; } } else { geshi_dbg(' Testing via regex: ' . $match . '... ', false); $data = geshi_get_position($code, $match, 0); if (0 === $data['pos']) { geshi_dbg('match, data = ' . print_r($data, true)); return $data['len']; } geshi_dbg('no match'); } } // No matches... return false; }
function loadStyles($language = '', $load_theme = false) { if (!$language) { $language = $this->language; } geshi_dbg('GeSHiStyler::loadStyles(' . $language . ')'); if ($this->reloadThemeData) { geshi_dbg(' Loading theme data'); // Trash old data if ($load_theme) { geshi_dbg(' Old data trashed'); $this->_styleData = array(); } // Lie for a short while, to get extra style names to behave $tmp = $this->language; $this->language = $language; foreach ($this->themes as $theme) { $theme_file = GESHI_THEMES_ROOT . $theme . GESHI_DIR_SEP . $language . '.php'; if (is_readable($theme_file)) { require $theme_file; break; } } if ($load_theme) { $this->reloadThemeData = false; } $this->language = $tmp; } }
/** * Overrides _addParseData to add escape characters also */ function _addParseData($code, $first_char_of_next_context = '') { geshi_dbg('GeSHiSingleCharContext::_addParseData(' . substr($code, 0, 15) . '...)'); if ($this->_isEscapeSeq) { $this->_styler->addParseData($code, $this->_contextName . '/esc', $this->_getExtraParseData(), $this->_complexFlag); } else { parent::_addParseData($code, $first_char_of_next_context); } }
/** * Adds code detected as being in this context to the parse data */ function _addParseData($code, $first_char_of_next_context = '') { $parent_name = $this->_parentName; geshi_dbg('GeSHiPHPDoubleStringContext::_addParseData(' . substr($code, 0, 15) . '...)'); while (true) { $earliest_data = array('pos' => false, 'len' => 0); foreach ($this->_regexes as $regex) { $data = geshi_get_position($code, $regex, 0, false, true); // request table if (false != $data['pos'] && false === $earliest_data['pos'] || false !== $data['pos'] && ($data['pos'] < $earliest_data['pos'] || $data['pos'] == $earliest_data['pos'] && $data['len'] > $earliest_data['len'])) { $earliest_data = $data; } } if (false === $earliest_data['pos']) { // No more variables in this string break; } // bugfix: because we match a var, it might have been escaped. // so only do to -1 so we can catch slash if it has been $pos = $earliest_data['pos'] ? $earliest_data['pos'] - 1 : 0; $len = $earliest_data['pos'] ? $earliest_data['len'] + 1 : $earliest_data['len']; parent::_addParseData(substr($code, 0, $pos)); // Now the entire possible var is in: $possible_var = substr($code, $pos, $len); geshi_dbg('Found variable at position ' . $earliest_data['pos'] . '(' . $possible_var . ')'); // Check that the dollar sign that started this variable was not escaped //$first_part = str_replace('\\\\', '', substr($code, 0, $pos)); //if ('\\' == substr($first_part, -1)) { // If \\ before var and { is not next character after that... if ('\\' == substr($possible_var, 0, 1) && '{' != substr($possible_var, 1, 1)) { // This variable has been escaped, so add the escaped dollar sign // as the correct context, and the rest of the variable (recurse to catch // other variables inside this possible variable) geshi_dbg('Variable was escaped'); $this->_styler->addParseData(substr($possible_var, 0, 2), $parent_name . '/esc', $this->_getExtraParseData(), $this->_complexFlag); $this->_addParseData(substr($possible_var, 2)); } else { // Add first character that might have been a \\ but in fact isn't to the parent // but only do it if we had to modify the position if ('$' != substr($possible_var, 0, 1)) { parent::_addParseData(substr($possible_var, 0, 1)); $possible_var = substr($possible_var, 1); } // Many checks could go in here... // @todo [blocking 1.1.5] check for ${foo} variables: start { matched by end } // because at the moment ${foo is matched for example. if ('{' == substr($possible_var, 0, 1)) { if ('}' == substr($possible_var, -1)) { $start_brace = '{'; } else { $start_brace = ''; parent::_addParseData('{'); // remove brace from $possible_var. This will only be used // if the variable isn't an OO variable anyway... $possible_var = substr($possible_var, 1); } } else { $start_brace = ''; } if (isset($earliest_data['tab'][5])) { // Then we matched off the third regex - the one that does objects // The first { if there is one, and $this (which is in index 2 $this->_styler->addParseData($start_brace . $earliest_data['tab'][2], $parent_name . '/var', $this->_getExtraParseData(), $this->_complexFlag); // The -> with any whitespace around it $this->_styler->addParseData($earliest_data['tab'][3], $parent_name . '/symbol', $this->_getExtraParseData(), $this->_complexFlag); // The method name $this->_styler->addParseData($earliest_data['tab'][4], $parent_name . '/oodynamic', $this->_getExtraParseData(), $this->_complexFlag); // The closing }, if any if ($earliest_data['tab'][5]) { if ($start_brace) { $this->_styler->addParseData($earliest_data['tab'][5], $parent_name . '/var', $this->_getExtraParseData(), $this->_complexFlag); } else { parent::_addParseData('}'); } } } else { $this->_styler->addParseData($possible_var, $parent_name . '/var', $this->_getExtraParseData(), $this->_complexFlag); } } // Chop off what we have done $code = substr($code, $earliest_data['pos'] + $earliest_data['len']); } // Add the rest parent::_addParseData($code, $first_char_of_next_context); }
/** * Sets styles of contexts in the source code * * @param string The selector to use, this is the style name of a context. Example: php/php * @param string The CSS styles to apply to the context * @since 1.1.1 */ public function setStyles($selector, $styles) { geshi_dbg('GeSHi::setStyles(' . $selector . ', ' . $styles . ')'); $this->_styler->loadStyles('', true); $this->_styler->setRawStyle($selector, $styles); }
/** * GetContextEndData */ function _getContextEndData($code, $context_open_key, $context_opener, $beginning_of_context) { geshi_dbg('GeSHiContext::_getContextEndData(' . $this->_contextName . ', ' . $context_open_key . ', ' . $context_opener . ', ' . $beginning_of_context . ')'); $context_end_pos = false; $context_end_len = -1; $context_end_dlm = ''; $offset = 0; // Bail out if context open key tells us that there is no ender for this context if (-1 == $context_open_key) { geshi_dbg(' no opener so no ender'); return false; } // Balanced endings is handled here if (isset($this->_contextDelimiters[$context_open_key][3])) { $balance_opener = $this->_contextDelimiters[$context_open_key][3][0]; $balance_closer = $this->_contextDelimiters[$context_open_key][3][1]; // We get the first push for free // @todo [blocking 1.1.4] if what we are balancing against is not related // to the starter of the context then we have a problem... check $context_opener // for starter stuff instead of assuming $balance_count = 1; geshi_dbg('@w Begun balancing'); while ($balance_count > 0) { // Look for opener/closers. $opener_pos = geshi_get_position($code, $balance_opener, $offset); $closer_pos = geshi_get_position($code, $balance_closer, $offset); geshi_dbg(' opener pos = ' . print_r($opener_pos, true) . ', closer pos = ' . print_r($closer_pos, true)); // Check what we found if (false !== $opener_pos['pos']) { if (false !== $closer_pos['pos']) { // Opener and closer available if ($opener_pos['pos'] < $closer_pos['pos']) { // Opener is closer so inc. counter ++$balance_count; geshi_dbg(' opener is closer so inc. to ' . $balance_count); // Start searching from new pos just past where we found the opener $offset = $opener_pos['pos'] + 1; // @todo [blocking 1.1.4] could cache closer pos at this point? } else { // closer is closer (bad english heh) --$balance_count; $offset = $closer_pos['pos'] + 1; geshi_dbg(' closer is closer so dec. to ' . $balance_count); } } else { // No closer will ever be available yet we are still in this context... // use end of code as end pos // I've yet to test this case geshi_dbg('@w No closer but still in this context!'); return array('pos' => strlen($code), 'len' => 0, 'dlm' => ''); } } elseif (false !== $closer_pos['pos']) { // No opener but closer. Nothing wrong with this --$balance_count; $offset = $closer_pos['pos'] + 1; geshi_dbg(' only closer left, dec. to ' . $balance_count); } else { // No opener or closer // Assume that we end this context at the end of the code, with // no delimiter geshi_dbg('@w No opener or closer but still in this context!'); return array('pos' => strlen($code), 'len' => 0, 'dlm' => ''); } } // start looking for real end from the position where balancing ends // because we've found where balancing ends, but the end of the balancing // is likely to be the same as the end of the context --$offset; } foreach ($this->_contextDelimiters[$context_open_key][1] as $ender) { geshi_dbg(' Checking ender: ' . str_replace("\n", '\\n', $ender), false); $ender = $this->_substitutePlaceholders($ender); geshi_dbg(' converted to ' . $ender); // Use the offset we may have found when handling balancing of contexts (will // be zero if balancing not done). $position = geshi_get_position($code, $ender, $offset); geshi_dbg(' Ender ' . $ender . ': ' . print_r($position, true)); $length = $position['len']; $position = $position['pos']; // BUGFIX:skip around crap starters if (false === $position) { continue; } if (false === $context_end_pos || $position < $context_end_pos || $position == $context_end_pos && strlen($ender) > $context_end_len) { $context_end_pos = $position; $context_end_len = $length; $context_end_dlm = $ender; } } geshi_dbg('Context ' . $this->_contextName . ' can finish at position ' . $context_end_pos); if (false !== $context_end_pos) { return array('pos' => $context_end_pos, 'len' => $context_end_len, 'dlm' => $context_end_dlm); } else { return false; } }
function _createContextKeywordLookup() { geshi_dbg('GeSHiCodeContext::_createContextKeywordLookup()'); $this->_contextKeywordLookup = array(); foreach ($this->_contextKeywords as $keyword_group_key => $keyword_group_array) { geshi_dbg(" keyword group key: {$keyword_group_key}"); $regexps = geshi_optimize_regexp_list($keyword_group_array[0]); $charlist = ''; $before = '/'; if (!empty($this->_contextCharactersDisallowedBeforeKeywords)) { $charlist = implode($this->_contextCharactersDisallowedBeforeKeywords); if (!empty($charlist)) { $before .= '(?<![' . $charlist . '])'; } } else { $before .= '(?<![a-zA-Z0-9_])'; } $append = ''; if (!empty($this->_contextCharactersDisallowedAfterKeywords)) { $charlist = implode($this->_contextCharactersDisallowedAfterKeywords); if (!empty($charlist)) { $append .= '(?![' . $charlist . '])'; } } else { $append .= '(?![a-zA-Z0-9_])'; } $append .= '/'; // handle case-insensitivity if (!$keyword_group_array[2]) { $append .= 'i'; } foreach ($regexps as &$regexp) { $regexp = $before . '(?:' . $regexp . ')' . $append; } // get min length $min_len = strlen(current($keyword_group_array[0])); // anything as a start value foreach ($keyword_group_array[0] as $keyword) { // if $min_len = 12 we are only interested in keywords which are // less then 12 chars long, i.e. character @ index 11 is not set if (!isset($keyword[$min_len - 1])) { $len = strlen($keyword); if ($len < $min_len) { $min_len = $len; } } } $this->_contextKeywordLookup[$keyword_group_key] = array(0 => $regexps, 1 => $min_len); } if (isset($keyword_group_key)) { geshi_dbg(' Lookup created, first entry: ' . print_r($this->_contextKeywordLookup[$keyword_group_key], true)); } else { geshi_dbg(' Lookup created with no entries'); } }