diff options
author | Alexander Sulfrian <alexander@sulfrian.net> | 2009-12-11 01:16:01 +0100 |
---|---|---|
committer | Alexander Sulfrian <alexander@sulfrian.net> | 2009-12-11 01:16:20 +0100 |
commit | 48d7424647b146a66c5bde93ee836919933a4150 (patch) | |
tree | 3eb816e42a0cd857cf831c57baa59ad7cee478c7 /paste/include/geshi/classes/class.geshicodecontext.php | |
parent | 8242c982ebcdfc67274c8ab79a2f34aa451872d7 (diff) | |
download | rafb-nopaste-48d7424647b146a66c5bde93ee836919933a4150.tar.gz rafb-nopaste-48d7424647b146a66c5bde93ee836919933a4150.tar.xz rafb-nopaste-48d7424647b146a66c5bde93ee836919933a4150.zip |
added geshi syntax highlighter
Diffstat (limited to 'paste/include/geshi/classes/class.geshicodecontext.php')
-rw-r--r-- | paste/include/geshi/classes/class.geshicodecontext.php | 550 |
1 files changed, 550 insertions, 0 deletions
diff --git a/paste/include/geshi/classes/class.geshicodecontext.php b/paste/include/geshi/classes/class.geshicodecontext.php new file mode 100644 index 0000000..53ac023 --- /dev/null +++ b/paste/include/geshi/classes/class.geshicodecontext.php @@ -0,0 +1,550 @@ +<?php +/** + * GeSHi - Generic Syntax Highlighter + * + * For information on how to use GeSHi, please consult the documentation + * found in the docs/ directory, or online at http://geshi.org/docs/ + * + * This file is part of GeSHi. + * + * GeSHi is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * GeSHi is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with GeSHi; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + * + * You can view a copy of the GNU GPL in the COPYING file that comes + * with GeSHi, in the docs/ directory. + * + * @package core + * @author Nigel McNie <nigel@geshi.org> + * @license http://www.gnu.org/copyleft/gpl.html GNU GPL + * @copyright (C) 2005 Nigel McNie + * @version 1.1.0 + * + */ + +/** + * This class represents a "Code" context - one where keywords and + * regular expressions can be used to highlight part of the context. + * + * If the context you are in requires keyword or regular expression + * support, then GeSHiCodeContext is the context type that you need. + * + * <b>Usage:</b> + * + * Use this class in a context or language file, to define a code + * context: + * + * <pre> 'CHILD_CONTEXTS' => array( + * ... + * new GeSHiCodeContext([params]) + * ... + * ),</pre> + * + * <pre> 'CONTEXTS' => array( + * ... + * new GeSHiCodeContext([params]) + * ... + * ),</pre> + * + * @package core + * @author Nigel McNie <nigel@geshi.org> + * @since 1.1.0 + * @version 1.1.0 + * @see GeSHiContext + * + */ +class GeSHiCodeContext extends GeSHiContext +{ + /**#@+ + * @var array + * @access private + */ + + /** + * Keywords for this code context + * @var array + */ + var $_contextKeywords = array(); + + /** + * Characters that cannot appear before a keyword + * @var array + */ + var $_contextCharactersDisallowedBeforeKeywords = array(); + + /** + * Characters that cannot appear after a keyword + * @var array + */ + var $_contextCharactersDisallowedAfterKeywords = array(); + + /** + * A lookup table for use with regex matched starters/enders + * @var array + */ + var $_contextKeywordLookup; + + /** + * A symbol array + * @var array + */ + var $_contextSymbols = array(); + + /** + * A regex array + * @var array + */ + var $_contextRegexps = array(); + + /** + * An array of object "splitters" + */ + var $_objectSplitters = array(); + + /** + * Whether this code context has finished loading yet + * @todo [blocking 1.1.1] Do this by static variable? + */ + var $_codeContextLoaded = false; + + /**#@-*/ + + + /** + * Redefinition of {@link GeSHiContext::load()} in order to also + * load keywords, regular expressions etc. + * + */ + function load (&$styler) + { + parent::load($styler); + + if ($this->_codeContextLoaded) { + return; + } + $this->_codeContextLoaded = true; + + // Add regex for methods + foreach ($this->_objectSplitters as $data) { + $splitter_match = ''; + foreach ($data[0] as $splitter) { + $splitter_match .= preg_quote($splitter) . '|'; + } + + $this->_contextRegexps[] = array( + 0 => array( + "#(" . substr($splitter_match, 0, -1) . ")(\s*)([a-zA-Z\*\(_][a-zA-Z0-9_\*]*)#" + ), + 1 => '', // char to check for + 2 => array( + 1 => true, + 2 => true, // highlight splitter + 3 => array($data[1], $data[2], $data[3]) // $data[3] says whether to give code a go at the match first + ) + ); + } + } + + /** + * Overrides GeSHiContext::loadStyleData to load style data + */ + function loadStyleData () + { + // @todo [blocking 1.1.1] Skip if already loaded??? + // Set styles for keywords + //geshi_dbg('Loading style data for context ' . $this->getName(), GESHI_DBG_PARSE); + // @todo [blocking 1.1.1] Style data for infectious context loaded many times, could be reduced to one? + //@todo [blocking 1.1.1] array_keys loop construct if possible + foreach ($this->_contextKeywords as $keyword_group_array) { + geshi_dbg($keyword_group_array[1] . ' ' . $keyword_group_array[2], GESHI_DBG_PARSE); + $this->_styler->setStyle($keyword_group_array[1], $keyword_group_array[2]); + } + + // Set styles for regex groups + foreach ($this->_contextRegexps as $data) { + foreach ($data[2] as $group) { + $this->_styler->setStyle($group[0], $group[1]); + } + } + + // Set styles for symbols + foreach ($this->_contextSymbols as $data) { + $this->_styler->setStyle($data[1], $data[2]); + } + + parent::loadStyleData(); + } + + /** + * Overrides {@link GeSHiContext::_addParseData()} to highlight a code context, including + * keywords, symbols and regular expression matches + * + * @param string The code to add as parse data + * @param string The first character of the context after this + */ + function _addParseData ($code, $first_char_of_next_context = '') + { + //$first_char_of_next_context = ''; + geshi_dbg('GeSHiCodeContext::_addParseData(' . substr($code, 0, 15) . ', ' . $first_char_of_next_context . ')', GESHI_DBG_PARSE); + + $regex_matches = array(); + foreach ($this->_contextRegexps as $regex_group_key => $regex_data) { + geshi_dbg(' Regex group: ' . $regex_group_key, GESHI_DBG_PARSE); + // Set style of this group + // $regex_data = array( + // 0 => regex (with brackets to signify groupings + // 1 => a string that if not matched, this part ain't done (speeds stuff up) + // 2 => array( + // 1 => array(name of first group, default style of first group) + // 2 => array(name of second group, ... + // ... + if (!$regex_data[1] || false !== strpos($code, $regex_data[1])) { + foreach ($regex_data[0] as $regex) { + geshi_dbg(' Trying regex ' . $regex . '... ', GESHI_DBG_PARSE, false); + $matches = array(); + preg_match_all($regex, $code, $matches); + geshi_dbg('found ' . count($matches[0]) . ' matches', GESHI_DBG_PARSE); + + // If there are matches... + if (count($matches[0])) { + foreach ($matches[0] as $key => $match) { + // $match is the full match of the regex. We need to get it out of the string, + // although we also need its position in the string + $pos = strpos($code, $match); + // neat splicey jobbie to get rid of the keyword (can't do str_replace...) + // ADDED SPACE FILLERS + $code = substr($code, 0, $pos) . str_repeat("\0", strlen($match)) . substr($code, $pos + strlen($match)); + + // make an array of data for this regex + $data = array(); + foreach ($matches as $match_data) { + $data[] = $match_data[$key]; + } + $regex_matches[] = array(0 => $pos, 1 => $regex_group_key, 2 => $data); + } + } + } + } + } + geshi_dbg(' Regex matches: ' . str_replace("\n", "\r", print_r($regex_matches, true)), GESHI_DBG_PARSE); + + $regex_replacements = array(); + foreach ($regex_matches as $data) { + // $data[0] is the pos + // $data[1] is the key + // $data[2][0] contains the full match + // $data[2][1] contains what is in the first brackets + // $data[2][2] contains what is in the second brackets... + foreach ($data[2] as $key => $match) { + // skip the full match which is in $data[2][0] + if ($key) { + // If there is a name for this bracket group ($key) in this regex group ($data[1])... + if (isset($this->_contextRegexps[$data[1]][2][$key]) && is_array($this->_contextRegexps[$data[1]][2][$key])) { + // If we should be attempting to have a go at code highlighting first... + if (/*isset($this->_contextRegexps[$data[1]][2][$key][2]) && */ + true === $this->_contextRegexps[$data[1]][2][$key][2]) { + // Highlight the match, and put the code into the result + $highlighted_matches = $this->_codeContextHighlight($match); + foreach ($highlighted_matches as $stuff) { + if ($stuff[1] == $this->_contextName) { + $regex_replacements[$data[0]][] = array($stuff[0], $this->_contextRegexps[$data[1]][2][$key][0]); + } else { + $regex_replacements[$data[0]][] = $stuff; + } + } + } else { + $regex_replacements[$data[0]][] = array($match, + $this->_contextRegexps[$data[1]][2][$key][0]); //name in [0], s in [1] + } + // Else, perhaps it is simply set. If so, we highlight it as if it were + // part of the code context + } elseif (isset($this->_contextRegexps[$data[1]][2][$key])) { + // this may end up as array(array(match,name),array(match,name),array..) + //@todo [blocking 1.1.1] may need to pass the first char of next context here if it's at the end... + $parse_data = $this->_codeContextHighlight($match); + foreach ($parse_data as $pdata) { + $regex_replacements[$data[0]][] = $pdata; + } + } + // Else, don't add it at all... + } + } + } + geshi_dbg(' Regex replacements: ' . str_replace("\n", "\r", print_r($regex_replacements, true)), GESHI_DBG_PARSE); + // Now what we do is make an array that looks like this: + // array( + // [position] => [replacement for regex] + // [position] => [replacement for regex] + // ... + // ) + // so we can put them back in as we build the result + + + // The aim is to end up with an array( + // 0 => array(code, contextname) + // 1 => array(code, contextname) + // 2 => ... + // + // $regex_replacements is an array( + // pos => array of arrays like the above, in order + // pos => ... + // + // codeContextHighlight should return something similar + + $parse_data = $this->_codeContextHighlight($code, $regex_replacements, $first_char_of_next_context); + foreach ($parse_data as $data) { + if (!isset($data[2])) { + $this->_styler->addParseData($data[0], $data[1]); + } else { + $this->_styler->addParseData($data[0], $data[1], $data[2]); + } + } + } + + + /** + * Given code, returns an array of context data about it + */ + function _codeContextHighlight ($code, $regex_replacements = array(), $first_char_of_next_context = '') + { + geshi_dbg('GeSHiCodeContext::_codeContextHighlight(' . substr($code, 0, 15) . ', ' . + (($regex_replacements) ? 'array(...)' : 'null') . ', ' . $first_char_of_next_context . ')', GESHI_DBG_PARSE); + //$first_char_of_next_context = ''; + + if (!is_array($this->_contextKeywordLookup)) { + $this->_createContextKeywordLookup(); + } + + $result = array(0 => array('', '')); + $result_pointer = 0; + $length = strlen($code); + $keyword_match_allowed = true; + $earliest_pos = false; + $earliest_keyword = ''; + $earliest_keyword_group = 0; + + // For each character + for ($i = 0; $i < $length; $i++) { + if (isset($regex_replacements[$i])) { + geshi_dbg(' Regex replacements available at position ' . $i . ': ' . $regex_replacements[$i][0][0] . '...', GESHI_DBG_PARSE); + // There's regular expressions expected to go here + foreach ($regex_replacements[$i] as $replacement) { + $result[++$result_pointer] = $replacement; + } + // Allow keyword matching immediately after regular expressions + $keyword_match_allowed = true; + } + + $char = substr($code, $i, 1); + if ("\0" == $char) { + // Not interested in null characters inserted by regex replacements + continue; + } + + // Take symbols into account before doing this + if (!$this->_contextKeywordLookup) { + $this->_checkForSymbol($char, $result, $result_pointer); + continue; + } + + geshi_dbg('@b Current char is: ' . str_replace("\n", '\n', $char), GESHI_DBG_PARSE); + + if ($keyword_match_allowed && isset($this->_contextKeywordLookup[$char])) { + foreach ($this->_contextKeywordLookup[$char] as $keyword_array) { + // keyword array is 0 => keyword, 1 => kwgroup + if (strlen($keyword_array[0]) < $earliest_keyword) { + // We can skip keywords that are shorter than the best + // earliest we can currently do + geshi_dbg(' [skipping ' . $keyword_array[0], GESHI_DBG_PARSE); + continue; + } + geshi_dbg(' Checking code for ' . $keyword_array[0], GESHI_DBG_PARSE); + // If case sensitive + if ($this->_contextKeywords[$keyword_array[1]][3]) { + $next_part_is_keyword = ($keyword_array[0] == substr($code, $i, strlen($keyword_array[0]))); + } else { + $next_part_is_keyword = (strtolower($keyword_array[0]) == strtolower(substr($code, $i, strlen($keyword_array[0])))); + } + + geshi_dbg(" next part is keyword: $next_part_is_keyword", GESHI_DBG_PARSE); + // OPTIMIZE (use lookup to remember for length $foo(1 => false, 2 => false) so if kw is length 1 or 2 then don't need to check + //$after_allowed = ( !in_array(substr($code, $i + strlen($keyword_array[0]), 1), array_diff($this->_context_characters_disallowed_after_keywords, $this->_context_keywords[$keyword_array[1]][4])) ); + // the first char of the keyword is always $char??? + $after_char = substr($code, $i + strlen($keyword_array[0]), 1); + // if '' == $after_char, it's at the end of the context so we need + // the first char from the next context... + if ( '' == $after_char ) $after_char = $first_char_of_next_context; + + geshi_dbg(" after char to check: |$after_char|", GESHI_DBG_PARSE); + $after_allowed = ('' == $after_char || !ctype_alnum($after_char) || + (ctype_alnum($after_char) && + !ctype_alnum($char)) ); + $after_allowed = ($after_allowed && + !in_array($after_char, $this->_contextCharactersDisallowedAfterKeywords)); + // Disallow underscores after keywords + $after_allowed = ($after_allowed && ($after_char != '_')); + + // If where we are up to is a keyword, and it's allowed to be here (before was already + // tested by $keyword_match_allowed) + if ($next_part_is_keyword && $after_allowed) { + //if ( false === $earliest_pos || $pos < $earliest_pos || ($pos == $earliest_pos && strlen($keyword_array[0]) > strlen($earliest_keyword)) ) + if (strlen($keyword_array[0]) > strlen($earliest_keyword)) { + geshi_dbg('@bfound', GESHI_DBG_PARSE); + // what is _pos for? + // What are any of them for?? + $earliest_pos = true;//$pos; + // BUGFIX: just in case case sensitive matching used, get data from string + // instead of from data array + $earliest_keyword = substr($code, $i, strlen($keyword_array[0])); + $earliest_keyword_group = $keyword_array[1]; + } + } + } + } + + // reset matching of keywords + //$keyword_match_allowed = false; + + //echo "Current pos = $i, earliest keyword is " . htmlspecialchars($earliest_keyword) . ' at ' . $earliest_pos . "\n"; + //echo "Symbol string is |$current_symbols|\n"; + + if (false !== $earliest_pos) { + geshi_dbg('Keyword matched: ' . $earliest_keyword, GESHI_DBG_PARSE); + // there's a keyword match! + + $result[++$result_pointer] = array($earliest_keyword, + $this->_contextKeywords[$earliest_keyword_group][1], + $this->_getURL($earliest_keyword, $earliest_keyword_group)); + $i += strlen($earliest_keyword) - 1; + geshi_dbg("strlen of earliest keyword is " . strlen($earliest_keyword) . " (pos is $i)", GESHI_DBG_PARSE); + // doesn't help + $earliest_pos = false; + $earliest_keyword = ''; + } else { + // Check for a symbol instead + $this->_checkForSymbol($char, $result, $result_pointer); + } + + /// If we move this to the end we might be able to get rid of the last one [DONE] + /// The second test on the first line is a little contentious - allows functions that don't + /// start with an alpha character to be within other words, e.g abc<?php, where <?php is a kw + $before_char = substr($code, $i, 1); + $before_char_is_alnum = ctype_alnum($before_char); + $keyword_match_allowed = (!$before_char_is_alnum || ($before_char_is_alnum && !ctype_alnum($char))); + $keyword_match_allowed = ($keyword_match_allowed && !in_array($before_char, + $this->_contextCharactersDisallowedBeforeKeywords)); + // Disallow underscores before keywords + $keyword_match_allowed = ($keyword_match_allowed && ('_' != $before_char)); + geshi_dbg(' Keyword matching allowed: ' . $keyword_match_allowed, GESHI_DBG_PARSE); + geshi_dbg(' [checked ' . substr($code, $i, 1) . ' against ' . print_r($this->_contextCharactersDisallowedBeforeKeywords, true), GESHI_DBG_PARSE); + } + + unset($result[0]); + //geshi_dbg('@b Resultant Parse Data:', GESHI_DBG_PARSE); + //geshi_dbg(str_replace("\n", "\r", print_r($result, true)), GESHI_DBG_PARSE); + //return array(array($code, $this->_contextName)); + return $result; + } + + + /** + * Checks the specified character to see if it is a symbol, and + * adds it to the result array according to its findings. + * + * @param string The possible symbol to check + * @param array The current result data that will be appended to + * @param int The pointer to the current result record + */ + function _checkForSymbol($possible_symbol, &$result,&$result_pointer) + { + $skip = false; + geshi_dbg('Checking ' . $possible_symbol . ' for symbol match', GESHI_DBG_PARSE); + foreach ($this->_contextSymbols as $symbol_data) { + if (in_array($possible_symbol, $symbol_data[0])) { + // we've matched the symbol in $symbol_group + // start the current symbols string + if ($result[$result_pointer][1] == $symbol_data[1]) { + $result[$result_pointer][0] .= $possible_symbol; + } else { + $result[++$result_pointer] = array($possible_symbol, $symbol_data[1]); + } + $skip = true; + break; + } + } + if (!$skip) { + if ($result[$result_pointer][1] == $this->_contextName) { + $result[$result_pointer][0] .= $possible_symbol; + } else { + $result[++$result_pointer] = array($possible_symbol, $this->_contextName); + } + } + } + + /// THIS FUNCTION NEEDS TO DIE!!! + /// When language files are able to be compiled, they should list their keywords + /// in this form already. + function _createContextKeywordLookup () + { + geshi_dbg('GeSHiCodeContext::_createContextKeywordLookup()', GESHI_DBG_PARSE); + + $this->_contextKeywordLookup = array(); + foreach ($this->_contextKeywords as $keyword_group_key => $keyword_group_array) { + geshi_dbg(" keyword group key: $keyword_group_key", GESHI_DBG_PARSE); + + foreach ($keyword_group_array[0] as $keyword) { + // If keywords are case sensitive, add them straight in. + // Otherwise, if they're not and the first char of the lookup is alphabetical, + // add it to both parts of the lookup (a and A for example). + $key = substr($keyword, 0, 1); + if (ctype_alpha($key) && !$keyword_group_array[3]) { + $this->_contextKeywordLookup[strtoupper(substr($keyword, 0, 1))][] = + array(0 => $keyword, 1 => $keyword_group_key /*$keyword_group_array[1]*/); + $this->_contextKeywordLookup[strtolower(substr($keyword, 0, 1))][] = + array(0 => $keyword, 1 => $keyword_group_key /*$keyword_group_array[1]*/); + } else { + $this->_contextKeywordLookup[$key][] = + array(0 => $keyword, 1 => $keyword_group_key /*$keyword_group_array[1]*/); + } + } + } + if (isset($key)) { + geshi_dbg(' Lookup created, first entry: ' . print_r($this->_contextKeywordLookup[$key][0], true), GESHI_DBG_PARSE); + } else { + geshi_dbg(' Lookup created with no entries', GESHI_DBG_PARSE); + } + } + + + /** + * Turns keywords into <a href="url">>keyword<</a> if needed + * + * @todo [blocking 1.1.5] This method still needs to listen to set_link_target, set_link_styles etc + */ + function _getURL ($keyword, $earliest_keyword_group) + { + if ($this->_contextKeywords[$earliest_keyword_group][4] != '') { + // Remove function_exists() call? Valid language files will define functions required... + if (substr($this->_contextKeywords[$earliest_keyword_group][4], -2) == '()' && + function_exists(substr($this->_contextKeywords[$earliest_keyword_group][4], 0, -2))) { + $href = call_user_func(substr($this->_contextKeywords[$earliest_keyword_group][4], 0, -2), $keyword); + } else { + $href = str_replace('{FNAME}', $keyword, $this->_contextKeywords[$earliest_keyword_group][4]); + } + return $href; + } + return ''; + } +} + +?> |