diff options
author | Alexander Sulfrian <alexander@sulfrian.net> | 2009-12-11 01:16:01 +0100 |
---|---|---|
committer | Alexander Sulfrian <alexander@sulfrian.net> | 2009-12-11 01:16:20 +0100 |
commit | 48d7424647b146a66c5bde93ee836919933a4150 (patch) | |
tree | 3eb816e42a0cd857cf831c57baa59ad7cee478c7 /paste/include/geshi/classes | |
parent | 8242c982ebcdfc67274c8ab79a2f34aa451872d7 (diff) | |
download | rafb-nopaste-48d7424647b146a66c5bde93ee836919933a4150.tar.gz rafb-nopaste-48d7424647b146a66c5bde93ee836919933a4150.tar.xz rafb-nopaste-48d7424647b146a66c5bde93ee836919933a4150.zip |
added geshi syntax highlighter
Diffstat (limited to 'paste/include/geshi/classes')
6 files changed, 1927 insertions, 0 deletions
diff --git a/paste/include/geshi/classes/class.geshicodecontext.php b/paste/include/geshi/classes/class.geshicodecontext.php new file mode 100644 index 0000000..53ac023 --- /dev/null +++ b/paste/include/geshi/classes/class.geshicodecontext.php @@ -0,0 +1,550 @@ +<?php +/** + * GeSHi - Generic Syntax Highlighter + * + * For information on how to use GeSHi, please consult the documentation + * found in the docs/ directory, or online at http://geshi.org/docs/ + * + * This file is part of GeSHi. + * + * GeSHi is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * GeSHi is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with GeSHi; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + * + * You can view a copy of the GNU GPL in the COPYING file that comes + * with GeSHi, in the docs/ directory. + * + * @package core + * @author Nigel McNie <nigel@geshi.org> + * @license http://www.gnu.org/copyleft/gpl.html GNU GPL + * @copyright (C) 2005 Nigel McNie + * @version 1.1.0 + * + */ + +/** + * This class represents a "Code" context - one where keywords and + * regular expressions can be used to highlight part of the context. + * + * If the context you are in requires keyword or regular expression + * support, then GeSHiCodeContext is the context type that you need. + * + * <b>Usage:</b> + * + * Use this class in a context or language file, to define a code + * context: + * + * <pre> 'CHILD_CONTEXTS' => array( + * ... + * new GeSHiCodeContext([params]) + * ... + * ),</pre> + * + * <pre> 'CONTEXTS' => array( + * ... + * new GeSHiCodeContext([params]) + * ... + * ),</pre> + * + * @package core + * @author Nigel McNie <nigel@geshi.org> + * @since 1.1.0 + * @version 1.1.0 + * @see GeSHiContext + * + */ +class GeSHiCodeContext extends GeSHiContext +{ + /**#@+ + * @var array + * @access private + */ + + /** + * Keywords for this code context + * @var array + */ + var $_contextKeywords = array(); + + /** + * Characters that cannot appear before a keyword + * @var array + */ + var $_contextCharactersDisallowedBeforeKeywords = array(); + + /** + * Characters that cannot appear after a keyword + * @var array + */ + var $_contextCharactersDisallowedAfterKeywords = array(); + + /** + * A lookup table for use with regex matched starters/enders + * @var array + */ + var $_contextKeywordLookup; + + /** + * A symbol array + * @var array + */ + var $_contextSymbols = array(); + + /** + * A regex array + * @var array + */ + var $_contextRegexps = array(); + + /** + * An array of object "splitters" + */ + var $_objectSplitters = array(); + + /** + * Whether this code context has finished loading yet + * @todo [blocking 1.1.1] Do this by static variable? + */ + var $_codeContextLoaded = false; + + /**#@-*/ + + + /** + * Redefinition of {@link GeSHiContext::load()} in order to also + * load keywords, regular expressions etc. + * + */ + function load (&$styler) + { + parent::load($styler); + + if ($this->_codeContextLoaded) { + return; + } + $this->_codeContextLoaded = true; + + // Add regex for methods + foreach ($this->_objectSplitters as $data) { + $splitter_match = ''; + foreach ($data[0] as $splitter) { + $splitter_match .= preg_quote($splitter) . '|'; + } + + $this->_contextRegexps[] = array( + 0 => array( + "#(" . substr($splitter_match, 0, -1) . ")(\s*)([a-zA-Z\*\(_][a-zA-Z0-9_\*]*)#" + ), + 1 => '', // char to check for + 2 => array( + 1 => true, + 2 => true, // highlight splitter + 3 => array($data[1], $data[2], $data[3]) // $data[3] says whether to give code a go at the match first + ) + ); + } + } + + /** + * Overrides GeSHiContext::loadStyleData to load style data + */ + function loadStyleData () + { + // @todo [blocking 1.1.1] Skip if already loaded??? + // Set styles for keywords + //geshi_dbg('Loading style data for context ' . $this->getName(), GESHI_DBG_PARSE); + // @todo [blocking 1.1.1] Style data for infectious context loaded many times, could be reduced to one? + //@todo [blocking 1.1.1] array_keys loop construct if possible + foreach ($this->_contextKeywords as $keyword_group_array) { + geshi_dbg($keyword_group_array[1] . ' ' . $keyword_group_array[2], GESHI_DBG_PARSE); + $this->_styler->setStyle($keyword_group_array[1], $keyword_group_array[2]); + } + + // Set styles for regex groups + foreach ($this->_contextRegexps as $data) { + foreach ($data[2] as $group) { + $this->_styler->setStyle($group[0], $group[1]); + } + } + + // Set styles for symbols + foreach ($this->_contextSymbols as $data) { + $this->_styler->setStyle($data[1], $data[2]); + } + + parent::loadStyleData(); + } + + /** + * Overrides {@link GeSHiContext::_addParseData()} to highlight a code context, including + * keywords, symbols and regular expression matches + * + * @param string The code to add as parse data + * @param string The first character of the context after this + */ + function _addParseData ($code, $first_char_of_next_context = '') + { + //$first_char_of_next_context = ''; + geshi_dbg('GeSHiCodeContext::_addParseData(' . substr($code, 0, 15) . ', ' . $first_char_of_next_context . ')', GESHI_DBG_PARSE); + + $regex_matches = array(); + foreach ($this->_contextRegexps as $regex_group_key => $regex_data) { + geshi_dbg(' Regex group: ' . $regex_group_key, GESHI_DBG_PARSE); + // Set style of this group + // $regex_data = array( + // 0 => regex (with brackets to signify groupings + // 1 => a string that if not matched, this part ain't done (speeds stuff up) + // 2 => array( + // 1 => array(name of first group, default style of first group) + // 2 => array(name of second group, ... + // ... + if (!$regex_data[1] || false !== strpos($code, $regex_data[1])) { + foreach ($regex_data[0] as $regex) { + geshi_dbg(' Trying regex ' . $regex . '... ', GESHI_DBG_PARSE, false); + $matches = array(); + preg_match_all($regex, $code, $matches); + geshi_dbg('found ' . count($matches[0]) . ' matches', GESHI_DBG_PARSE); + + // If there are matches... + if (count($matches[0])) { + foreach ($matches[0] as $key => $match) { + // $match is the full match of the regex. We need to get it out of the string, + // although we also need its position in the string + $pos = strpos($code, $match); + // neat splicey jobbie to get rid of the keyword (can't do str_replace...) + // ADDED SPACE FILLERS + $code = substr($code, 0, $pos) . str_repeat("\0", strlen($match)) . substr($code, $pos + strlen($match)); + + // make an array of data for this regex + $data = array(); + foreach ($matches as $match_data) { + $data[] = $match_data[$key]; + } + $regex_matches[] = array(0 => $pos, 1 => $regex_group_key, 2 => $data); + } + } + } + } + } + geshi_dbg(' Regex matches: ' . str_replace("\n", "\r", print_r($regex_matches, true)), GESHI_DBG_PARSE); + + $regex_replacements = array(); + foreach ($regex_matches as $data) { + // $data[0] is the pos + // $data[1] is the key + // $data[2][0] contains the full match + // $data[2][1] contains what is in the first brackets + // $data[2][2] contains what is in the second brackets... + foreach ($data[2] as $key => $match) { + // skip the full match which is in $data[2][0] + if ($key) { + // If there is a name for this bracket group ($key) in this regex group ($data[1])... + if (isset($this->_contextRegexps[$data[1]][2][$key]) && is_array($this->_contextRegexps[$data[1]][2][$key])) { + // If we should be attempting to have a go at code highlighting first... + if (/*isset($this->_contextRegexps[$data[1]][2][$key][2]) && */ + true === $this->_contextRegexps[$data[1]][2][$key][2]) { + // Highlight the match, and put the code into the result + $highlighted_matches = $this->_codeContextHighlight($match); + foreach ($highlighted_matches as $stuff) { + if ($stuff[1] == $this->_contextName) { + $regex_replacements[$data[0]][] = array($stuff[0], $this->_contextRegexps[$data[1]][2][$key][0]); + } else { + $regex_replacements[$data[0]][] = $stuff; + } + } + } else { + $regex_replacements[$data[0]][] = array($match, + $this->_contextRegexps[$data[1]][2][$key][0]); //name in [0], s in [1] + } + // Else, perhaps it is simply set. If so, we highlight it as if it were + // part of the code context + } elseif (isset($this->_contextRegexps[$data[1]][2][$key])) { + // this may end up as array(array(match,name),array(match,name),array..) + //@todo [blocking 1.1.1] may need to pass the first char of next context here if it's at the end... + $parse_data = $this->_codeContextHighlight($match); + foreach ($parse_data as $pdata) { + $regex_replacements[$data[0]][] = $pdata; + } + } + // Else, don't add it at all... + } + } + } + geshi_dbg(' Regex replacements: ' . str_replace("\n", "\r", print_r($regex_replacements, true)), GESHI_DBG_PARSE); + // Now what we do is make an array that looks like this: + // array( + // [position] => [replacement for regex] + // [position] => [replacement for regex] + // ... + // ) + // so we can put them back in as we build the result + + + // The aim is to end up with an array( + // 0 => array(code, contextname) + // 1 => array(code, contextname) + // 2 => ... + // + // $regex_replacements is an array( + // pos => array of arrays like the above, in order + // pos => ... + // + // codeContextHighlight should return something similar + + $parse_data = $this->_codeContextHighlight($code, $regex_replacements, $first_char_of_next_context); + foreach ($parse_data as $data) { + if (!isset($data[2])) { + $this->_styler->addParseData($data[0], $data[1]); + } else { + $this->_styler->addParseData($data[0], $data[1], $data[2]); + } + } + } + + + /** + * Given code, returns an array of context data about it + */ + function _codeContextHighlight ($code, $regex_replacements = array(), $first_char_of_next_context = '') + { + geshi_dbg('GeSHiCodeContext::_codeContextHighlight(' . substr($code, 0, 15) . ', ' . + (($regex_replacements) ? 'array(...)' : 'null') . ', ' . $first_char_of_next_context . ')', GESHI_DBG_PARSE); + //$first_char_of_next_context = ''; + + if (!is_array($this->_contextKeywordLookup)) { + $this->_createContextKeywordLookup(); + } + + $result = array(0 => array('', '')); + $result_pointer = 0; + $length = strlen($code); + $keyword_match_allowed = true; + $earliest_pos = false; + $earliest_keyword = ''; + $earliest_keyword_group = 0; + + // For each character + for ($i = 0; $i < $length; $i++) { + if (isset($regex_replacements[$i])) { + geshi_dbg(' Regex replacements available at position ' . $i . ': ' . $regex_replacements[$i][0][0] . '...', GESHI_DBG_PARSE); + // There's regular expressions expected to go here + foreach ($regex_replacements[$i] as $replacement) { + $result[++$result_pointer] = $replacement; + } + // Allow keyword matching immediately after regular expressions + $keyword_match_allowed = true; + } + + $char = substr($code, $i, 1); + if ("\0" == $char) { + // Not interested in null characters inserted by regex replacements + continue; + } + + // Take symbols into account before doing this + if (!$this->_contextKeywordLookup) { + $this->_checkForSymbol($char, $result, $result_pointer); + continue; + } + + geshi_dbg('@b Current char is: ' . str_replace("\n", '\n', $char), GESHI_DBG_PARSE); + + if ($keyword_match_allowed && isset($this->_contextKeywordLookup[$char])) { + foreach ($this->_contextKeywordLookup[$char] as $keyword_array) { + // keyword array is 0 => keyword, 1 => kwgroup + if (strlen($keyword_array[0]) < $earliest_keyword) { + // We can skip keywords that are shorter than the best + // earliest we can currently do + geshi_dbg(' [skipping ' . $keyword_array[0], GESHI_DBG_PARSE); + continue; + } + geshi_dbg(' Checking code for ' . $keyword_array[0], GESHI_DBG_PARSE); + // If case sensitive + if ($this->_contextKeywords[$keyword_array[1]][3]) { + $next_part_is_keyword = ($keyword_array[0] == substr($code, $i, strlen($keyword_array[0]))); + } else { + $next_part_is_keyword = (strtolower($keyword_array[0]) == strtolower(substr($code, $i, strlen($keyword_array[0])))); + } + + geshi_dbg(" next part is keyword: $next_part_is_keyword", GESHI_DBG_PARSE); + // OPTIMIZE (use lookup to remember for length $foo(1 => false, 2 => false) so if kw is length 1 or 2 then don't need to check + //$after_allowed = ( !in_array(substr($code, $i + strlen($keyword_array[0]), 1), array_diff($this->_context_characters_disallowed_after_keywords, $this->_context_keywords[$keyword_array[1]][4])) ); + // the first char of the keyword is always $char??? + $after_char = substr($code, $i + strlen($keyword_array[0]), 1); + // if '' == $after_char, it's at the end of the context so we need + // the first char from the next context... + if ( '' == $after_char ) $after_char = $first_char_of_next_context; + + geshi_dbg(" after char to check: |$after_char|", GESHI_DBG_PARSE); + $after_allowed = ('' == $after_char || !ctype_alnum($after_char) || + (ctype_alnum($after_char) && + !ctype_alnum($char)) ); + $after_allowed = ($after_allowed && + !in_array($after_char, $this->_contextCharactersDisallowedAfterKeywords)); + // Disallow underscores after keywords + $after_allowed = ($after_allowed && ($after_char != '_')); + + // If where we are up to is a keyword, and it's allowed to be here (before was already + // tested by $keyword_match_allowed) + if ($next_part_is_keyword && $after_allowed) { + //if ( false === $earliest_pos || $pos < $earliest_pos || ($pos == $earliest_pos && strlen($keyword_array[0]) > strlen($earliest_keyword)) ) + if (strlen($keyword_array[0]) > strlen($earliest_keyword)) { + geshi_dbg('@bfound', GESHI_DBG_PARSE); + // what is _pos for? + // What are any of them for?? + $earliest_pos = true;//$pos; + // BUGFIX: just in case case sensitive matching used, get data from string + // instead of from data array + $earliest_keyword = substr($code, $i, strlen($keyword_array[0])); + $earliest_keyword_group = $keyword_array[1]; + } + } + } + } + + // reset matching of keywords + //$keyword_match_allowed = false; + + //echo "Current pos = $i, earliest keyword is " . htmlspecialchars($earliest_keyword) . ' at ' . $earliest_pos . "\n"; + //echo "Symbol string is |$current_symbols|\n"; + + if (false !== $earliest_pos) { + geshi_dbg('Keyword matched: ' . $earliest_keyword, GESHI_DBG_PARSE); + // there's a keyword match! + + $result[++$result_pointer] = array($earliest_keyword, + $this->_contextKeywords[$earliest_keyword_group][1], + $this->_getURL($earliest_keyword, $earliest_keyword_group)); + $i += strlen($earliest_keyword) - 1; + geshi_dbg("strlen of earliest keyword is " . strlen($earliest_keyword) . " (pos is $i)", GESHI_DBG_PARSE); + // doesn't help + $earliest_pos = false; + $earliest_keyword = ''; + } else { + // Check for a symbol instead + $this->_checkForSymbol($char, $result, $result_pointer); + } + + /// If we move this to the end we might be able to get rid of the last one [DONE] + /// The second test on the first line is a little contentious - allows functions that don't + /// start with an alpha character to be within other words, e.g abc<?php, where <?php is a kw + $before_char = substr($code, $i, 1); + $before_char_is_alnum = ctype_alnum($before_char); + $keyword_match_allowed = (!$before_char_is_alnum || ($before_char_is_alnum && !ctype_alnum($char))); + $keyword_match_allowed = ($keyword_match_allowed && !in_array($before_char, + $this->_contextCharactersDisallowedBeforeKeywords)); + // Disallow underscores before keywords + $keyword_match_allowed = ($keyword_match_allowed && ('_' != $before_char)); + geshi_dbg(' Keyword matching allowed: ' . $keyword_match_allowed, GESHI_DBG_PARSE); + geshi_dbg(' [checked ' . substr($code, $i, 1) . ' against ' . print_r($this->_contextCharactersDisallowedBeforeKeywords, true), GESHI_DBG_PARSE); + } + + unset($result[0]); + //geshi_dbg('@b Resultant Parse Data:', GESHI_DBG_PARSE); + //geshi_dbg(str_replace("\n", "\r", print_r($result, true)), GESHI_DBG_PARSE); + //return array(array($code, $this->_contextName)); + return $result; + } + + + /** + * Checks the specified character to see if it is a symbol, and + * adds it to the result array according to its findings. + * + * @param string The possible symbol to check + * @param array The current result data that will be appended to + * @param int The pointer to the current result record + */ + function _checkForSymbol($possible_symbol, &$result,&$result_pointer) + { + $skip = false; + geshi_dbg('Checking ' . $possible_symbol . ' for symbol match', GESHI_DBG_PARSE); + foreach ($this->_contextSymbols as $symbol_data) { + if (in_array($possible_symbol, $symbol_data[0])) { + // we've matched the symbol in $symbol_group + // start the current symbols string + if ($result[$result_pointer][1] == $symbol_data[1]) { + $result[$result_pointer][0] .= $possible_symbol; + } else { + $result[++$result_pointer] = array($possible_symbol, $symbol_data[1]); + } + $skip = true; + break; + } + } + if (!$skip) { + if ($result[$result_pointer][1] == $this->_contextName) { + $result[$result_pointer][0] .= $possible_symbol; + } else { + $result[++$result_pointer] = array($possible_symbol, $this->_contextName); + } + } + } + + /// THIS FUNCTION NEEDS TO DIE!!! + /// When language files are able to be compiled, they should list their keywords + /// in this form already. + function _createContextKeywordLookup () + { + geshi_dbg('GeSHiCodeContext::_createContextKeywordLookup()', GESHI_DBG_PARSE); + + $this->_contextKeywordLookup = array(); + foreach ($this->_contextKeywords as $keyword_group_key => $keyword_group_array) { + geshi_dbg(" keyword group key: $keyword_group_key", GESHI_DBG_PARSE); + + foreach ($keyword_group_array[0] as $keyword) { + // If keywords are case sensitive, add them straight in. + // Otherwise, if they're not and the first char of the lookup is alphabetical, + // add it to both parts of the lookup (a and A for example). + $key = substr($keyword, 0, 1); + if (ctype_alpha($key) && !$keyword_group_array[3]) { + $this->_contextKeywordLookup[strtoupper(substr($keyword, 0, 1))][] = + array(0 => $keyword, 1 => $keyword_group_key /*$keyword_group_array[1]*/); + $this->_contextKeywordLookup[strtolower(substr($keyword, 0, 1))][] = + array(0 => $keyword, 1 => $keyword_group_key /*$keyword_group_array[1]*/); + } else { + $this->_contextKeywordLookup[$key][] = + array(0 => $keyword, 1 => $keyword_group_key /*$keyword_group_array[1]*/); + } + } + } + if (isset($key)) { + geshi_dbg(' Lookup created, first entry: ' . print_r($this->_contextKeywordLookup[$key][0], true), GESHI_DBG_PARSE); + } else { + geshi_dbg(' Lookup created with no entries', GESHI_DBG_PARSE); + } + } + + + /** + * Turns keywords into <a href="url">>keyword<</a> if needed + * + * @todo [blocking 1.1.5] This method still needs to listen to set_link_target, set_link_styles etc + */ + function _getURL ($keyword, $earliest_keyword_group) + { + if ($this->_contextKeywords[$earliest_keyword_group][4] != '') { + // Remove function_exists() call? Valid language files will define functions required... + if (substr($this->_contextKeywords[$earliest_keyword_group][4], -2) == '()' && + function_exists(substr($this->_contextKeywords[$earliest_keyword_group][4], 0, -2))) { + $href = call_user_func(substr($this->_contextKeywords[$earliest_keyword_group][4], 0, -2), $keyword); + } else { + $href = str_replace('{FNAME}', $keyword, $this->_contextKeywords[$earliest_keyword_group][4]); + } + return $href; + } + return ''; + } +} + +?> diff --git a/paste/include/geshi/classes/class.geshicontext.php b/paste/include/geshi/classes/class.geshicontext.php new file mode 100644 index 0000000..964d8b3 --- /dev/null +++ b/paste/include/geshi/classes/class.geshicontext.php @@ -0,0 +1,721 @@ +<?php +/** + * GeSHi - Generic Syntax Highlighter + * + * For information on how to use GeSHi, please consult the documentation + * found in the docs/ directory, or online at http://geshi.org/docs/ + * + * This file is part of GeSHi. + * + * GeSHi is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * GeSHi is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with GeSHi; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + * + * You can view a copy of the GNU GPL in the COPYING file that comes + * with GeSHi, in the docs/ directory. + * + * @package core + * @author Nigel McNie <nigel@geshi.org> + * @license http://www.gnu.org/copyleft/gpl.html GNU GPL + * @copyright (C) 2005 Nigel McNie + * @version 1.1.0 + * + */ + +/** + * The GeSHiContext class + * + * @package core + * @author Nigel McNie + * @since 1.1.0 + * @version 1.1.0 + */ +class GeSHiContext +{ + /**#@- + * @access private + */ + + /** + * The context name. A unique identifier that corresponds to a path under + * the GESHI_CLASSES_ROOT folder where the configuration file for this + * context is. + * @var string + */ + var $_contextName; + + /** + * The file name from where to load data for this context + * @var string + */ + var $_fileName; + + /** + * The dialect name of this context + * @var string + */ + var $_dialectName; + + /** + * The styler helper object + * @var GeSHiStyler + */ + var $_styler; + + /** + * The context delimiters + * @var array + */ + var $_contextDelimiters = array(); + + /** + * The child contexts + * @var array + */ + var $_childContexts = array(); + + /** + * The style type of this context, used for backward compatibility + * with GeSHi 1.0.X + * @var int + */ + var $_contextStyleType = GESHI_STYLE_NONE; + + /** + * Delimiter parse data. Controls which context - the parent or child - + * should parse the delimiters for a context + * @var int + */ + var $_delimiterParseData = GESHI_CHILD_PARSE_BOTH; + + /** + * The overriding child context, if any + * @var GeSHiContext + */ + var $_overridingChildContext; + + /** + * The matching regex table for regex starters + * @var array + */ + var $_startRegexTable = array(); + + /** + * The "infectious context". Will be used to "infect" the context + * tree with itself - this is how PHP inserts itself into HTML contexts + * @var GeSHiContext + */ + var $_infectiousContext; + + /** + * Whether this context has been already loaded + * @var boolean + */ + var $_loaded = false; + + /** + * The name for stuff detected in the start of a context + * @var string + */ + var $_startName = 'start'; + + /** + * The name for stuff detected in the end of a context + * @var string + */ + var $_endName = 'end'; + + /** + * Whether this context is an alias context + * @var boolean + */ + var $_isAlias = false; + /**#@-*/ + + /** + * Creates a new GeSHiContext. + * + * @param string The name of the language this context represents + * @param string The dialect of the language this context represents + * @param string The name of the context + * @param array The name used for aliasing + * @todo [blocking 1.1.9] Better comment + */ + function GeSHiContext ($language_name, $dialect_name = '', $context_name = '', $alias_name = '') + { + // Set dialect + if ('' == $dialect_name) { + $dialect_name = $language_name; + } + $this->_dialectName = $dialect_name; + + // Set the context and file names + if ('' == $context_name) { + // Root of a language + $this->_fileName = $this->_contextName = $language_name . '/' . $dialect_name; + return; + } + if (0 === strpos($context_name, 'common')) { + $this->_fileName = $context_name; + // Strip "common/" from context name to get the actual name... + $context_name = substr($context_name, 7); + } else { + $this->_fileName = $language_name . '/' . $context_name; + } + if ($alias_name) { + $this->_contextName = $alias_name; + $this->_isAlias = true; + } else { + $this->_contextName = "$language_name/$dialect_name/$context_name"; + } + } + + /** + * Returns the name of this context + * + * @return string The full name of this context (language, dialect and context) + */ + function getName () + { + return $this->_contextName; + } + + function getStartName () + { + return $this->_startName; + } + + function getEndName () + { + return $this->_endName; + } + + function isAlias () + { + return $this->_isAlias; + } + + /** + * Loads the context data + */ + function load (&$styler) + { + geshi_dbg('Loading context: ' . $this->_contextName, GESHI_DBG_PARSE); + + if ($this->_loaded) { + geshi_dbg('@oAlready loaded', GESHI_DBG_PARSE); + return; + } + $this->_loaded = true; + + $this->_styler =& $styler; + + if (!geshi_can_include(GESHI_CONTEXTS_ROOT . $this->_fileName . $this->_styler->fileExtension)) { + geshi_dbg('@e Cannot get context information for ' . $this->getName() . ' from file ' + . GESHI_CONTEXTS_ROOT . $this->_fileName . $this->_styler->fileExtension, GESHI_DBG_ERR); + return array('code' => GESHI_ERROR_FILE_UNAVAILABLE, 'name' => $this->_contextName); + } + + // Load the data for this context + $CONTEXT = $this->_contextName; + $CONTEXT_START = "$this->_contextName/$this->_startName"; + $CONTEXT_END = "$this->_contextName/$this->_endName"; + $DIALECT = $this->_dialectName; + // @todo [blocking 1.1.5] This needs testing to see if it is faster + if (false) { + $language_file_name = GESHI_CONTEXTS_ROOT . $this->_contextName . $this->_styler->fileExtension; + $cached_data = $this->_styler->getCacheData($language_file_name); + if (null == $cached_data) { + // Data not loaded for this context yet + //geshi_dbg('@wLoading data for context ' . $this->_contextName, GESHI_DBG_PARSE); + // Get the data, stripping the start/end PHP code markers which aren't allowed in eval() + $cached_data = substr(implode('', file($language_file_name)), 5, -3); + $this->_styler->setCacheData($language_file_name, $cached_data); + } else { + //geshi_dbg('@oRetrieving data from cache for context ' . $this->_contextName, GESHI_DBG_PARSE); + } + eval($cached_data); + } else { + require GESHI_CONTEXTS_ROOT . $this->_fileName . $this->_styler->fileExtension; + } + + // Push the infectious context into the child contexts + if (null != $this->_infectiousContext) { + // Add the context to each of the current contexts... + $keys = array_keys($this->_childContexts); + foreach ($keys as $key) { + $this->_childContexts[$key]->infectWith($this->_infectiousContext); + } + // And add the infectious context to this context itself + $this->_childContexts[] =& $this->_infectiousContext; + geshi_dbg(' Added infectious context ' . $this->_infectiousContext->getName() + . ' to ' . $this->getName(), GESHI_DBG_PARSE); + } + + // Recursively load the child contexts + $keys = array_keys($this->_childContexts); + foreach ($keys as $key) { + $this->_childContexts[$key]->load($styler); + } + + // Load the overriding child context, if any + if ($this->_overridingChildContext) { + if (null != $this->_infectiousContext) { + $this->_overridingChildContext->infectWith($this->_infectiousContext); + } + $this->_overridingChildContext->load($styler); + } + //geshi_dbg('@o Finished loading context ' . $this->_styleName . ' successfully', GESHI_DBG_PARSE); + } + + /** + * Adds an "infectious child" to this context. + * + * Relies on child being a subclass of or actually being a GeSHiContext + */ + function infectWith (&$context) + { + $this->_infectiousContext =& $context; + //geshi_dbg(' Added infectious context ' . $context->getName() + // . ' to ' . $this->getName(), GESHI_DBG_PARSE); + } + + + /** + * Loads style data for the given context. Not implemented here, but can be overridden + * by a child class to get style data from its parent + * + * Note to self: This is needed by GeSHiCodeContext, so don't touch it! + */ + function loadStyleData () + { + //geshi_dbg('Loading style data for context ' . $this->getName(), GESHI_DBG_PARSE); + // Recursively load the child contexts + $keys = array_keys($this->_childContexts); + foreach ($keys as $key) { + $this->_childContexts[$key]->loadStyleData(); + } + + // Load the style data for the overriding child context, if any + if ($this->_overridingChildContext) { + $this->_overridingChildContext->loadStyleData(); + } + } + + /** + * Checks each child to see if it's useful. If not, then remove it + * + * @param string The code that can be used to check if a context + * is needed. + */ + function trimUselessChildren ($code) + { + //geshi_dbg('GeSHiContext::trimUselessChildren()', GESHI_DBG_API | GESHI_DBG_PARSE); + $new_children = array(); + $keys = array_keys($this->_childContexts); + + foreach ($keys as $key) { + //geshi_dbg(' Checking child: ' . $this->_childContexts[$key]->getName() . ': ', GESHI_DBG_PARSE, false); + if (!$this->_childContexts[$key]->contextCanStart($code)) { + // This context will _never_ be useful - and nor will its children + //geshi_dbg('@buseless, removed', GESHI_DBG_PARSE); + // RAM saving technique + // But we shouldn't remove highlight data if the child is an + // "alias" context, since the real context might need the data + if (!$this->_childContexts[$key]->isAlias()) { + $this->_styler->removeStyleData($this->_childContexts[$key]->getName(), + $this->_childContexts[$key]->getStartName(), + $this->_childContexts[$key]->getEndName()); + } + unset($this->_childContexts[$key]); + } + } + + // Recurse into the remaining children, checking them + $keys = array_keys($this->_childContexts); + foreach ($keys as $key) { + $this->_childContexts[$key]->trimUselessChildren($code); + } + } + + /** + * Parses the given code + */ + function parseCode (&$code, $context_start_key = -1, $context_start_delimiter = '', $ignore_context = '', + $first_char_of_next_context = '') + { + geshi_dbg('*** GeSHiContext::parseCode(' . $this->_contextName . ') ***', GESHI_DBG_PARSE); + geshi_dbg('CODE: ' . str_replace("\n", "\r", substr($code, 0, 100)) . "<<<<<\n", GESHI_DBG_PARSE); + if ($context_start_delimiter) geshi_dbg('Delimiter: ' . $context_start_delimiter, GESHI_DBG_PARSE); + // Skip empty/almost empty contexts + if (!$code || ' ' == $code) { + $this->_addParseData($code); + return; + } + + // FIRST: + // If there is an "overriding child context", it should immediately take control + // of the entire parsing. + // An "overriding child context" has the following properties: + // * No starter or ender delimiter + // + // The overridden context has the following properties: + // * Explicit starter/ender + // * No children (they're not relevant after all) + // + // An example: HTML embeds CSS highlighting by using the html/css context. This context + // has one overriding child context: css. After all, once in the CSS context, HTML don't care + // anymore. + // Likewise, javascript embedded in HTML is an overriding child - HTML does the work of deciding + // exactly where javascript gets called, and javascript does the rest. + // + + // If there is an overriding context... + if ($this->_overridingChildContext) { + // Find the end of this thing + $finish_data = $this->_getContextEndData($code, $context_start_key, $context_start_delimiter, true); // true? + // If this context should not parse the ender, add it on to the stuff to parse + if ($this->shouldParseEnder()) { + $finish_data['pos'] += $finish_data['len']; + } + // Make a temp copy of the stuff the occ will parse + $tmp = substr($code, 0, $finish_data['pos']); + // Tell the occ to parse the copy + $this->_overridingChildContext->parseCode($tmp); // start with no starter at all + // trim the code + $code = substr($code, $finish_data['pos']); + return; + } + + // Add the start of this context to the parse data if it is already known + if ($context_start_delimiter) { + $this->_addParseDataStart($context_start_delimiter); + $code = substr($code, strlen($context_start_delimiter)); + } + + $original_length = strlen($code); + + while ('' != $code) { + if (strlen($code) != $original_length) { + geshi_dbg('CODE: ' . str_replace("\n", "\r", substr($code, 0, 100)) . "<<<<<\n", GESHI_DBG_PARSE); + } + // Second parameter: if we are at the start of the context or not + // Pass the ignored context so it can be properly ignored + $earliest_context_data = $this->_getEarliestContextData($code, strlen($code) == $original_length, + $ignore_context); + $finish_data = $this->_getContextEndData($code, $context_start_key, $context_start_delimiter, + strlen($code) == $original_length); + geshi_dbg('@bEarliest context data: pos=' . $earliest_context_data['pos'] . ', len=' . + $earliest_context_data['len'], GESHI_DBG_PARSE); + geshi_dbg('@bFinish data: pos=' . $finish_data['pos'] . ', len=' . $finish_data['len'], GESHI_DBG_PARSE); + + // If there is earliest context data we parse up to it then hand control to that context + if ($earliest_context_data) { + if ($finish_data) { + // Merge to work out who wins + if ($finish_data['pos'] <= $earliest_context_data['pos']) { + geshi_dbg('Earliest context and Finish data: finish is closer', GESHI_DBG_PARSE); + + // Add the parse data + $this->_addParseData(substr($code, 0, $finish_data['pos']), substr($code, $finish_data['pos'], 1)); + + // If we should pass the ender, add the parse data + if ($this->shouldParseEnder()) { + $this->_addParseDataEnd(substr($code, $finish_data['pos'], $finish_data['len'])); + $finish_data['pos'] += $finish_data['len']; + } + // Trim the code and return the unparsed delimiter + $code = substr($code, $finish_data['pos']); + return $finish_data['dlm']; + } else { + geshi_dbg('Earliest and finish data, but earliest gets priority', GESHI_DBG_PARSE); + $foo = true; + } + } else { $foo = true; /** no finish data */} + + if (isset($foo)) geshi_dbg('Earliest data but not finish data', GESHI_DBG_PARSE); + // Highlight up to delimiter + ///The "+ len" can be manipulated to do starter and ender data + if (!$earliest_context_data['con']->shouldParseStarter()) { + $earliest_context_data['pos'] += $earliest_context_data['len']; + //BUGFIX: null out dlm so it doesn't squash the actual rest of context + $earliest_context_data['dlm'] = ''; + } + + // We should parseCode() the substring. + // BUT we have to remember that we should ignore the child context we've matched, + // else we'll have a wee recursion problem on our hands... + $tmp = substr($code, 0, $earliest_context_data['pos']); + $this->parseCode($tmp, -1, '', $earliest_context_data['con']->getName(), + substr($code, $earliest_context_data['pos'], 1)); // parse with no starter + $code = substr($code, $earliest_context_data['pos']); + $ender = $earliest_context_data['con']->parseCode($code, $earliest_context_data['key'], $earliest_context_data['dlm']); + // check that the earliest context actually wants the ender + if (!$earliest_context_data['con']->shouldParseEnder() && $earliest_context_data['dlm'] == $ender) { + geshi_dbg('earliest_context_data[dlm]=' . $earliest_context_data['dlm'] . ', ender=' . $ender, GESHI_DBG_PARSE); + // second param = first char of next context + $this->_addParseData(substr($code, 0, strlen($ender)), substr($code, strlen($ender), 1)); + $code = substr($code, strlen($ender)); + } + } else { + if ($finish_data) { + // finish early... + geshi_dbg('No earliest data but finish data', GESHI_DBG_PARSE); + + // second param = first char of next context + $this->_addParseData(substr($code, 0, $finish_data['pos']), substr($code, $finish_data['pos'], 1)); + + if ($this->shouldParseEnder()) { + $this->_addParseDataEnd(substr($code, $finish_data['pos'], $finish_data['len'])); + $finish_data['pos'] += $finish_data['len']; + } + $code = substr($code, $finish_data['pos']); + // return the length for use above + return $finish_data['dlm']; + } else { + geshi_dbg('No earliest or finish data', GESHI_DBG_PARSE); + // All remaining code is in this context + $this->_addParseData($code, $first_char_of_next_context); + $code = ''; + return; // not really needed (?) + } + } + } + } + + /** + * @return true if this context wants to parse its start delimiters + */ + function shouldParseStarter() + { + return $this->_delimiterParseData & GESHI_CHILD_PARSE_LEFT; + } + + /** + * @return true if this context wants to parse its end delimiters + */ + function shouldParseEnder () + { + return $this->_delimiterParseData & GESHI_CHILD_PARSE_RIGHT; + } + + /** + * Return true if it is possible for this context to parse this code at all + */ + function contextCanStart ($code) + { + foreach ($this->_contextDelimiters as $key => $delim_array) { + foreach ($delim_array[0] as $delimiter) { + geshi_dbg(' Checking delimiter ' . $delimiter . '... ', GESHI_DBG_PARSE, false); + $data = geshi_get_position($code, $delimiter, 0, $delim_array[2]); + + if (false !== $data['pos']) { + return true; + } + } + } + return false; + } + + /** + * Works out the closest child context + * + * @param $ignore_context The context to ignore (if there is one) + */ + function _getEarliestContextData ($code, $start_of_context, $ignore_context) + { + geshi_dbg(' GeSHiContext::_getEarliestContextData(' . $this->_contextName . ', '. $start_of_context . ')', GESHI_DBG_API | GESHI_DBG_PARSE); + $earliest_pos = false; + $earliest_len = false; + $earliest_con = null; + $earliest_key = -1; + $earliest_dlm = ''; + + foreach ($this->_childContexts as $context) { + if ($ignore_context == $context->getName()) { + // whups, ignore you... + continue; + } + $data = $context->getContextStartData($code, $start_of_context); + geshi_dbg(' ' . $context->_contextName . ' says it can start from ' . $data['pos'], GESHI_DBG_PARSE, false); + + if (-1 != $data['pos']) { + if ((false === $earliest_pos) || $earliest_pos > $data['pos'] || + ($earliest_pos == $data['pos'] && $earliest_len < $data['len'])) { + geshi_dbg(' which is the earliest position', GESHI_DBG_PARSE); + $earliest_pos = $data['pos']; + $earliest_len = $data['len']; + $earliest_con = $context; + $earliest_key = $data['key']; + $earliest_dlm = $data['dlm']; + } else { + geshi_dbg('', GESHI_DBG_PARSE); + } + } else { + geshi_dbg('', GESHI_DBG_PARSE); + } + } + // What do we need to know? + // Well, assume that one of the child contexts can parse + // Then, parseCode() is going to call parseCode() recursively on that object + // + if (false !== $earliest_pos) { + return array('pos' => $earliest_pos, 'len' => $earliest_len, 'con' => $earliest_con, 'key' => $earliest_key, 'dlm' => $earliest_dlm); + } else { + return false; + } + } + + /** + * Checks the context delimiters for this context against the passed + * code to see if this context can help parse the code + */ + function getContextStartData ($code, $start_of_context) + { + //geshi_dbg(' GeSHi::getContextStartInformation(' . $this->_contextName . ')', GESHI_DBG_PARSE | GESHI_DBG_API); + geshi_dbg(' ' . $this->_contextName, GESHI_DBG_PARSE); + + $first_position = -1; + $first_length = -1; + $first_key = -1; + $first_dlm = ''; + + foreach ($this->_contextDelimiters as $key => $delim_array) { + foreach ($delim_array[0] as $delimiter) { + geshi_dbg(' Checking delimiter ' . $delimiter . '... ', GESHI_DBG_PARSE, false); + $data = geshi_get_position($code, $delimiter, 0, $delim_array[2], true); + geshi_dbg(print_r($data, true), GESHI_DBG_PARSE, false); + $position = $data['pos']; + $length = $data['len']; + if (isset($data['tab'])) { + geshi_dbg('Table: ' . print_r($data['tab'], true), GESHI_DBG_PARSE); + $this->_startRegexTable = $data['tab']; + $delimiter = $data['tab'][0]; + } + + if (false !== $position) { + geshi_dbg('found at position ' . $position . ', checking... ', GESHI_DBG_PARSE, false); + if ((-1 == $first_position) || ($first_position > $position) || + (($first_position == $position) && ($first_length < $length))) { + geshi_dbg('@bearliest! (length ' . $length . ')', GESHI_DBG_PARSE); + $first_position = $position; + $first_length = $length; + $first_key = $key; + $first_dlm = $delimiter; + } + } else { + geshi_dbg('', GESHI_DBG_PARSE); + } + } + } + + return array('pos' => $first_position, 'len' => $first_length, + 'key' => $first_key, 'dlm' => $first_dlm); + } + + /** + * GetContextEndData + */ + function _getContextEndData ($code, $context_open_key, $context_opener, $beginning_of_context) + { + geshi_dbg('GeSHiContext::_getContextEndData(' . $this->_contextName . ', ' . $context_open_key . ', ' + . $context_opener . ', ' . $beginning_of_context . ')', GESHI_DBG_API | GESHI_DBG_PARSE); + $context_end_pos = false; + $context_end_len = -1; + $context_end_dlm = ''; + + // Bail out if context open key tells us that there is no ender for this context + if (-1 == $context_open_key) { + geshi_dbg(' no opener so no ender', GESHI_DBG_PARSE); + return false; + } + + foreach ($this->_contextDelimiters[$context_open_key][1] as $ender) { + geshi_dbg(' Checking ender: ' . str_replace("\n", '\n', $ender), GESHI_DBG_PARSE, false); + $ender = $this->_substitutePlaceholders($ender); + geshi_dbg(' converted to ' . $ender, GESHI_DBG_PARSE); + + $position = geshi_get_position($code, $ender); + geshi_dbg(' Ender ' . $ender . ': ' . print_r($position, true), GESHI_DBG_PARSE); + $length = $position['len']; + $position = $position['pos']; + + // BUGFIX:skip around crap starters + if (false === $position) { + continue; + } + + if ((false === $context_end_pos) || ($position < $context_end_pos) || ($position == $context_end_pos && strlen($ender) > $context_end_len)) { + $context_end_pos = $position; + $context_end_len = $length; + $context_end_dlm = $ender; + } + } + geshi_dbg('Context ' . $this->_contextName . ' can finish at position ' . $context_end_pos, GESHI_DBG_PARSE); + + if (false !== $context_end_pos) { + return array('pos' => $context_end_pos, 'len' => $context_end_len, 'dlm' => $context_end_dlm); + } else { + return false; + } + } + + /** + * Adds parse data to the overall result + * + * This method is mainly designed so that subclasses of GeSHiContext can + * override it to break the context up further - for example, GeSHiStringContext + * uses it to add escape characters + * + * @param string The code to add + * @param string The first character of the next context (used by GeSHiCodeContext) + */ + function _addParseData ($code, $first_char_of_next_context = '') + { + $this->_styler->addParseData($code, $this->_contextName); + } + + /** + * Adds parse data for the start of a context to the overallresult + */ + function _addParseDataStart ($code) + { + $this->_styler->addParseDataStart($code, $this->_contextName, $this->_startName); + } + + /** + * Adds parse data for the end of a context to the overallresult + */ + function _addParseDataEnd ($code) + { + $this->_styler->addParseDataEnd($code, $this->_contextName, $this->_endName); + } + + /** + * Substitutes placeholders for values matched in opening regular expressions + * for contexts with their actual values + * + * + */ + function _substitutePlaceholders ($ender) + { + if ($this->_startRegexTable) { + foreach ($this->_startRegexTable as $key => $match) { + $ender = str_replace('!!!' . $key, quotemeta($match), $ender); + } + } + return $ender; + } +} + +?> diff --git a/paste/include/geshi/classes/class.geshistringcontext.php b/paste/include/geshi/classes/class.geshistringcontext.php new file mode 100644 index 0000000..1d712e5 --- /dev/null +++ b/paste/include/geshi/classes/class.geshistringcontext.php @@ -0,0 +1,190 @@ +<?php +/** + * GeSHi - Generic Syntax Highlighter + * + * For information on how to use GeSHi, please consult the documentation + * found in the docs/ directory, or online at http://geshi.org/docs/ + * + * This file is part of GeSHi. + * + * GeSHi is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * GeSHi is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with GeSHi; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + * + * You can view a copy of the GNU GPL in the COPYING file that comes + * with GeSHi, in the docs/ directory. + * + * @package core + * @author Nigel McNie <nigel@geshi.org> + * @license http://www.gnu.org/copyleft/gpl.html GNU GPL + * @copyright (C) 2005 Nigel McNie + * @version 1.1.0 + * + */ + +/** + * The GeSHiStringContext class. This class extends GeSHiContext to handle + * the concept of escape characters that strings often use. + * + * @package core + * @author Nigel McNie <nigel@geshi.org> + * @since 1.1.0 + * @version 1.1.0 + * @see GeSHiContext + */ +class GeSHiStringContext extends GeSHiContext +{ + /**#@- + * @access private + */ + var $_escapeCharacters; + + // Characters that should be escaped + var $_charsToEscape; + + /** + * This is used by the 'DELIM' "character" in the _charsToEscape array. We + * abuse the fact that _addParseData will be called right after _getContextEndData + * if the context is to be passed + */ + var $_lastOpener; + + /**#@-*/ + + /** + * GetContextEndData + */ + function _getContextEndData ($code, $context_open_key, $context_opener) + { + geshi_dbg('GeSHiStringContext::_getContextEndData(' . $this->_contextName . ', ' . $context_open_key . ', ' . $context_opener . ')', GESHI_DBG_API | GESHI_DBG_PARSE); + $this->_lastOpener = $context_opener; + + foreach ($this->_contextDelimiters[$context_open_key][1] as $ender) { + geshi_dbg(' Checking ender: ' . $ender, GESHI_DBG_PARSE); + + // Prepare ender regexes if needed + $ender = $this->_substitutePlaceholders($ender); + geshi_dbg(' ender after substitution: ' . $ender, GESHI_DBG_PARSE); + + $pos = 0; + while (true) { + $pos = geshi_get_position($code, $ender, $pos); + if (false === $pos) { + break; + } + $len = $pos['len']; + $pos = $pos['pos']; + + $possible_string = substr($code, 0, $pos); + geshi_dbg(' String might be: ' . $possible_string, GESHI_DBG_PARSE); + + foreach ($this->_escapeCharacters as $escape_char) { + // remove escaped escape characters + $possible_string = str_replace($escape_char . $escape_char, '', $possible_string); + } + + geshi_dbg(' String with double escapes removed: ' . $possible_string, GESHI_DBG_PARSE); + + //@todo [blocking 1.1.1] possible bug: only last escape character checked here + if (substr($possible_string, -1) != $escape_char) { + // We may have found the correct ender. If we haven't, then this string + // never ends and we will set the end position to the length of the code + // substr($code, $pos, 1) == $ender + $endpos = geshi_get_position($code, $ender, $pos); + geshi_dbg(' position of ender: ' . $endpos['pos'], GESHI_DBG_PARSE); + $pos = ($pos && $endpos['pos'] === $pos) ? $pos : strlen($code); + return array('pos' => $pos, 'len' => $len, 'dlm' => $ender); + } + // else, start further up + ++$pos; + } + } + return false; + } + + /** + * Overrides addParseData to add escape characters also + */ + function _addParseData ($code, $first_char_of_next_context = '') + { + geshi_dbg('GeSHiStringContext::_addParseData(' . substr($code, 0, 15) . '...)', GESHI_DBG_PARSE); + + $length = strlen($code); + $string = ''; + for ($i = 0; $i < $length; $i++) { + $char = substr($code, $i, 1); + geshi_dbg('Char: ' . $char, GESHI_DBG_PARSE); + $skip = false; + + foreach ($this->_escapeCharacters as $escape_char) { + $len = 1; + if ($char == $escape_char && (false !== ($len = $this->_shouldBeEscaped(substr($code, $i + 1))))) { + geshi_dbg('Match: len = ' . $len, GESHI_DBG_PARSE); + if ($string) { + $this->_styler->addParseData($string, $this->_contextName); + $string = ''; + } + // Needs a better name than /esc + $this->_styler->addParseData($escape_char . substr($code, $i + 1, $len), $this->_contextName . '/esc'); + // FastForward + $i += $len; + $skip = true; + break; + } + } + + if (!$skip) { + $string .= $char; + } + } + if ($string) { + $this->_styler->addParseData($string, $this->_contextName); + } + } + + /** + * Checks whether the character(s) at the start of the parameter string are + * characters that should be escaped. + * + * @param string The string to check the beginning of for escape characters + * @return int|false The length of the escape character sequence, else false + */ + function _shouldBeEscaped ($code) + { + // Feature: If 'DELIM' is one of the "characters" in the _charsToEscape array, then it is + // replaced by the context opener + $chars_to_escape = str_replace('DELIM', $this->_lastOpener, $this->_charsToEscape); + + geshi_dbg('Checking: ' . substr($code, 0, 15), GESHI_DBG_PARSE); + foreach ($chars_to_escape as $match) { + if ('REGEX' != substr($match, 0, 5)) { + geshi_dbg('Test: ' . $match, GESHI_DBG_PARSE); + if (substr($code, 0, 1) == $match) { + return 1; + } + } else { + geshi_dbg(' Testing via regex: ' . $match . '... ', GESHI_DBG_PARSE, false); + $data = geshi_get_position($code, $match, 0); + if (0 === $data['pos']) { + geshi_dbg('match, data = ' . print_r($data, true), GESHI_DBG_PARSE); + return $data['len']; + } + geshi_dbg('no match', GESHI_DBG_PARSE); + } + } + // No matches... + return false; + } +} + +?> diff --git a/paste/include/geshi/classes/class.geshistyler.php b/paste/include/geshi/classes/class.geshistyler.php new file mode 100644 index 0000000..fa56478 --- /dev/null +++ b/paste/include/geshi/classes/class.geshistyler.php @@ -0,0 +1,224 @@ +<?php +/** + * GeSHi - Generic Syntax Highlighter + * + * For information on how to use GeSHi, please consult the documentation + * found in the docs/ directory, or online at http://geshi.org/docs/ + * + * This file is part of GeSHi. + * + * GeSHi is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * GeSHi is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with GeSHi; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + * + * You can view a copy of the GNU GPL in the COPYING file that comes + * with GeSHi, in the docs/ directory. + * + * @package core + * @author Nigel McNie <nigel@geshi.org> + * @license http://www.gnu.org/copyleft/gpl.html GNU GPL + * @copyright (C) 2005 Nigel McNie + * @version 1.1.0 + * + */ + +/** + * The GeSHiStyler class + * + * @package core + * @author Nigel McNie <nigel@geshi.org> + * @since 1.1.0 + * @version 1.1.0 + */ +class GeSHiStyler +{ + /** + * @var string + */ + var $charset; + + /** + * @var string + */ + var $fileExtension; + + /** + * @var array + */ + var $_styleData = array(); + + /** + * @var array + */ + var $_parseData; + + /** + * @var int + */ + var $_parseDataPointer = 0; + + /** + * @var array + */ + var $_contextCacheData = array(); + + function setStyle ($context_name, $style, $start_name = 'start', $end_name = 'end') + { + // @todo [blocking 1.1.1] Why is this called sometimes with blank data? + geshi_dbg('GeSHiStyler::setStyle(' . $context_name . ', ' . $style . ')', GESHI_DBG_PARSE); + $this->_styleData[$context_name] = $style; + /*if (!isset($this->_styleData["$context_name/$start_name"])) { + $this->_styleData["$context_name/$start_name"] = $style; + } + if (!isset($this->_styleData["$context_name/$end_name"])) { + $this->_styleData["$context_name/$end_name"] = $style; + }*/ + } + + /*function setStartStyle ($context_name, $style) + { + $this->_styleData["$context_name/$this->_startName"] = $style; + } + + function setStartName ($name) + { + $this->_startName = $name; + }*/ + + function removeStyleData ($context_name, $context_start_name = 'start', $context_end_name = 'end') + { + unset($this->_styleData[$context_name]); + unset($this->_styleData["$context_name/$context_start_name"]); + unset($this->_styleData["$context_name/$context_end_name"]); + geshi_dbg(' removed style data for ' . $context_name, GESHI_DBG_PARSE); + } + + /*function setEndStyle ($context_name, $style) + { + $this->_styleData["$context_name/$this->_endName"] = $style; + } + + function setEndName ($name) + { + $this->_endName = $name; + }*/ + + function getStyle ($context_name) + { + if (isset($this->_styleData[$context_name])) { + return $this->_styleData[$context_name]; + } + // If style for starter/ender requested and we got here, use the default + if ('/end' == substr($context_name, -4)) { + $this->_styleData[$context_name] = $this->_styleData[substr($context_name, 0, -4)]; + return $this->_styleData[$context_name]; + } + if ('/start' == substr($context_name, -6)) { + $this->_styleData[$context_name] = $this->_styleData[substr($context_name, 0, -6)]; + return $this->_styleData[$context_name]; + } + + //@todo [blocking 1.1.5] Make the default style for otherwise unstyled elements configurable + $this->_styleData[$context_name] = 'color:#000;'; + return 'color:#000;'; + } + /* + function getStyleStart ($context_name) + { + if (isset($this->_styleData["$context_name/$this->_startName"])) { + return $this->_styleData["$context_name/$this->_startName"]; + } + $this->_styleData["$context_name/$this->_startName"] = $this->getStyle($context_name); + return $this->_styleData["$context_name/$this->_startName"]; + } + + function getStyleEnd ($context_name) + { + if (isset($this->_styleData["$context_name/$this->_endName"])) { + return $this->_styleData["$context_name/$this->_endName"]; + } + $this->_styleData["$context_name/$this->_endName"] = $this->getStyle($context_name); + return $this->_styleData["$context_name/$this->_endName"]; + }*/ + /* + function startIsUnique ($context_name) + { + return (isset($this->_styleData["$context_name/$this->_startName"]) + && '' != $this->_styleData["$context_name/$this->_startName"] + && $this->_styleData["$context_name/$this->_startName"] != $this->_styleData[$context_name]); + } + + function endIsUnique ($context_name) + { + $r = (isset($this->_styleData["$context_name/$this->_endName"]) + && '' != $this->_styleData["$context_name/$this->_endName"] + && $this->_styleData["$context_name/$this->_endName"] != $this->_styleData[$context_name]); + geshi_dbg('GeSHiStyler::endIsUnique(' . $context_name . ') = ' . $r, GESHI_DBG_PARSE); + return $r; + } + */ + function resetParseData () + { + $this->_parseData = null; + $this->_parseDataPointer = 0; + } + + /** + * This method adds parse data. It tries to merge it also if two + * consecutive contexts with the same name add parse data (which is + * very possible). + */ + function addParseData ($code, $context_name, $url = '') + { + if ($context_name == $this->_parseData[$this->_parseDataPointer][1]) { + // same context, same URL + $this->_parseData[$this->_parseDataPointer][0] .= $code; + } else { + $this->_parseData[++$this->_parseDataPointer] = array($code, $context_name, $url); + } + } + + function addParseDataStart ($code, $context_name, $start_name = 'start') + { + $this->addParseData($code, "$context_name/$start_name"); + } + + function addParseDataEnd ($code, $context_name, $end_name = 'end') + { + $this->addParseData($code, "$context_name/$end_name"); + } + + function getParseData () + { + return $this->_parseData; + } + + /** + * Sets cache data + */ + function setCacheData ($cached_file_name, $cache_str) + { + $this->_contextCacheData[$cached_file_name] = $cache_str; + } + + /** + * Gets cache data + */ + function getCacheData ($cached_file_name) + { + return isset($this->_contextCacheData[$cached_file_name]) ? + $this->_contextCacheData[$cached_file_name] : null; + } +} + +?> diff --git a/paste/include/geshi/classes/css/class.geshicssinlinemediacontext.php b/paste/include/geshi/classes/css/class.geshicssinlinemediacontext.php new file mode 100644 index 0000000..28bfb5e --- /dev/null +++ b/paste/include/geshi/classes/css/class.geshicssinlinemediacontext.php @@ -0,0 +1,60 @@ +<?php +/** + * GeSHi - Generic Syntax Highlighter + * + * For information on how to use GeSHi, please consult the documentation + * found in the docs/ directory, or online at http://geshi.org/docs/ + * + * This file is part of GeSHi. + * + * GeSHi is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * GeSHi is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with GeSHi; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + * + * You can view a copy of the GNU GPL in the COPYING file that comes + * with GeSHi, in the docs/ directory. + * + * @package lang + * @author Nigel McNie <nigel@geshi.org> + * @license http://www.gnu.org/copyleft/gpl.html GNU GPL + * @copyright (C) 2005 Nigel McNie + * @version 1.1.0 + * + */ + +/** + * The GeSHiCSSInlineMediaContext class + * + * @package lang + * @author Nigel McNie <nigel@geshi.org> + * @since 1.1.0 + * @version 1.1.0 + * @see GeSHiContext + */ +class GeSHiCSSInlineMediaContext extends GeSHiContext +{ + /** + * Overrides {@link GeSHiContext::_addParseDataStart()} to + * highlight the start of the inline media context correctly + * + * @param string The code that is part of the start of this context + * @access private + */ + function _addParseDataStart ($code) + { + $this->_styler->addParseData('@media', $this->_contextName . '/starter'); + $this->_styler->addParseDataStart(substr($code, 6), $this->_contextName); + } +} + +?> diff --git a/paste/include/geshi/classes/php/class.geshiphpdoublestringcontext.php b/paste/include/geshi/classes/php/class.geshiphpdoublestringcontext.php new file mode 100644 index 0000000..c7532eb --- /dev/null +++ b/paste/include/geshi/classes/php/class.geshiphpdoublestringcontext.php @@ -0,0 +1,182 @@ +<?php +/** + * GeSHi - Generic Syntax Highlighter + * + * For information on how to use GeSHi, please consult the documentation + * found in the docs/ directory, or online at http://geshi.org/docs/ + * + * This file is part of GeSHi. + * + * GeSHi is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * GeSHi is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with GeSHi; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + * + * You can view a copy of the GNU GPL in the COPYING file that comes + * with GeSHi, in the docs/ directory. + * + * @package lang + * @author Nigel McNie <nigel@geshi.org> + * @license http://www.gnu.org/copyleft/gpl.html GNU GPL + * @copyright (C) 2005 Nigel McNie + * @version 1.1.0 + * + */ + +/** + * The GeSHiPHPDoubleStringContext class represents a PHP double string + * + * @package lang + * @author Nigel McNie <nigel@geshi.org> + * @since 1.1.0 + * @version 1.1.0 + * @see GeSHiStringContext, GeSHiContext + */ +class GeSHiPHPDoubleStringContext extends GeSHiStringContext +{ + /** + * A cached copy of the parent name + * @var string + * @access private + */ + var $_parentName; + + /** + * The regular expressions used to match variables + * in this context. + * + * {@internal Do Not Change These! The code logic + * depends on them, they are just assigned here so + * that they aren't assigned every time the + * _addParseData method is called}} + * + * @var array + * @access private + */ + var $_regexes = array( + 'REGEX#(\{?\$\$?\{?[a-zA-Z_][a-zA-Z0-9_]*\}?)#', + 'REGEX#(\{?\$\$?\{?[a-zA-Z_][a-zA-Z0-9_]*\[[\$a-zA-Z0-9_\s\[\]\']*\]\}?)#', + 'REGEX#(\{?)(\$\$?\{?[a-zA-Z_][a-zA-Z0-9_]*)(\s*->\s*)([a-zA-Z_][a-zA-Z0-9_]*)(\}?)#' + ); + + /** + * Loads data for a PHP Double String Context. + * + * @var GeSHiStyler The styler to be used for this context + */ + function load (&$styler) + { + parent::load($styler); + $this->_parentName = parent::getName(); + } + + /** + * Adds code detected as being in this context to the parse data + */ + function _addParseData ($code, $first_char_of_next_context = '') + { + geshi_dbg('GeSHiPHPDoubleStringContext::_addParseData(' . substr($code, 0, 15) . '...)', GESHI_DBG_PARSE); + + while (true) { + $earliest_data = array('pos' => false, 'len' => 0); + foreach ($this->_regexes as $regex) { + $data = geshi_get_position($code, $regex, 0, false, true); // request table + if ((false != $data['pos'] && false === $earliest_data['pos']) || + (false !== $data['pos']) && + (($data['pos'] < $earliest_data['pos']) || + ($data['pos'] == $earliest_data['pos'] && $data['len'] > $earliest_data['len']))) { + $earliest_data = $data; + } + } + + if (false === $earliest_data['pos']) { + // No more variables in this string + break; + } + + // bugfix: because we match a var, it might have been escaped. + // so only do to -1 so we can catch slash if it has been + $pos = ($earliest_data['pos']) ? $earliest_data['pos'] - 1 : 0; + $len = ($earliest_data['pos']) ? $earliest_data['len'] + 1 : $earliest_data['len']; + parent::_addParseData(substr($code, 0, $pos)); + + // Now the entire possible var is in: + $possible_var = substr($code, $pos, $len); + geshi_dbg('Found variable at position ' . $earliest_data['pos'] . '(' . $possible_var . ')', GESHI_DBG_PARSE); + + // Check that the dollar sign that started this variable was not escaped + //$first_part = str_replace('\\\\', '', substr($code, 0, $pos)); + //if ('\\' == substr($first_part, -1)) { + // If \\ before var and { is not next character after that... + if ('\\' == substr($possible_var, 0, 1) && '{' != substr($possible_var, 1, 1)) { + // This variable has been escaped, so add the escaped dollar sign + // as the correct context, and the rest of the variable (recurse to catch + // other variables inside this possible variable) + geshi_dbg('Variable was escaped', GESHI_DBG_PARSE); + $this->_styler->addParseData(substr($possible_var, 0, 2), $this->_parentName . '/esc'); + $this->_addParseData(substr($possible_var, 2)); + } else { + // Add first character that might have been a \\ but in fact isn't to the parent + // but only do it if we had to modify the position + if ('$' != substr($possible_var, 0, 1)) { + parent::_addParseData(substr($possible_var, 0, 1)); + $possible_var = substr($possible_var, 1); + } + + // Many checks could go in here... + // @todo [blocking 1.1.5] check for ${foo} variables: start { matched by end } + // because at the moment ${foo is matched for example. + if ('{' == substr($possible_var, 0, 1)) { + if ('}' == substr($possible_var, -1)) { + $start_brace = '{'; + } else { + $start_brace = ''; + parent::_addParseData('{'); + // remove brace from $possible_var. This will only be used + // if the variable isn't an OO variable anyway... + $possible_var = substr($possible_var, 1); + } + } else { + $start_brace = ''; + } + + if (isset($earliest_data['tab'][5])) { + // Then we matched off the third regex - the one that does objects + // The first { if there is one, and $this (which is in index 2 + $this->_styler->addParseData($start_brace . $earliest_data['tab'][2], $this->_parentName . '/var'); + // The -> with any whitespace around it + $this->_styler->addParseData($earliest_data['tab'][3], $this->_parentName . '/sym0'); + // The method name + $this->_styler->addParseData($earliest_data['tab'][4], $this->_parentName . '/oodynamic'); + // The closing }, if any + if ($earliest_data['tab'][5]) { + if ($start_brace) { + $this->_styler->addParseData($earliest_data['tab'][5], $this->_parentName . '/var'); + } else { + parent::_addParseData('}'); + } + } + } else { + $this->_styler->addParseData($possible_var, $this->_parentName . '/var'); + } + + } + + // Chop off what we have done + $code = substr($code, $earliest_data['pos'] + $earliest_data['len']); + } + // Add the rest + parent::_addParseData($code, $first_char_of_next_context); + } +} + +?> |