diff options
author | Elliot Kroo <kroo@appjet.com> | 2010-03-11 15:21:30 -0800 |
---|---|---|
committer | Elliot Kroo <kroo@appjet.com> | 2010-03-11 15:21:30 -0800 |
commit | 98e2821b38a775737e42a2479a6bc65107210859 (patch) | |
tree | 55939a8ba1dce4f4e48ebb13b658061d62bf1b9a /infrastructure/rhino1_7R1/src/org/mozilla/javascript/regexp | |
parent | c1894c8e0a52f4e3d2f89fa92f0066bbf0fcf1b1 (diff) | |
download | etherpad-98e2821b38a775737e42a2479a6bc65107210859.tar.gz etherpad-98e2821b38a775737e42a2479a6bc65107210859.tar.xz etherpad-98e2821b38a775737e42a2479a6bc65107210859.zip |
reorganizing the first level of folders (trunk/branch folders are not the git way :)
Diffstat (limited to 'infrastructure/rhino1_7R1/src/org/mozilla/javascript/regexp')
4 files changed, 3687 insertions, 0 deletions
diff --git a/infrastructure/rhino1_7R1/src/org/mozilla/javascript/regexp/NativeRegExp.java b/infrastructure/rhino1_7R1/src/org/mozilla/javascript/regexp/NativeRegExp.java new file mode 100644 index 0000000..a893841 --- /dev/null +++ b/infrastructure/rhino1_7R1/src/org/mozilla/javascript/regexp/NativeRegExp.java @@ -0,0 +1,2782 @@ +/* -*- Mode: java; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 4 -*- + * + * ***** BEGIN LICENSE BLOCK ***** + * Version: MPL 1.1/GPL 2.0 + * + * The contents of this file are subject to the Mozilla Public License Version + * 1.1 (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * http://www.mozilla.org/MPL/ + * + * Software distributed under the License is distributed on an "AS IS" basis, + * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License + * for the specific language governing rights and limitations under the + * License. + * + * The Original Code is Rhino code, released + * May 6, 1998. + * + * The Initial Developer of the Original Code is + * Netscape Communications Corporation. + * Portions created by the Initial Developer are Copyright (C) 1997-1999 + * the Initial Developer. All Rights Reserved. + * + * Contributor(s): + * Norris Boyd + * Igor Bukanov + * Brendan Eich + * Matthias Radestock + * + * Alternatively, the contents of this file may be used under the terms of + * the GNU General Public License Version 2 or later (the "GPL"), in which + * case the provisions of the GPL are applicable instead of those above. If + * you wish to allow use of your version of this file only under the terms of + * the GPL and not to allow others to use your version of this file under the + * MPL, indicate your decision by deleting the provisions above and replacing + * them with the notice and other provisions required by the GPL. If you do + * not delete the provisions above, a recipient may use your version of this + * file under either the MPL or the GPL. + * + * ***** END LICENSE BLOCK ***** */ + +package org.mozilla.javascript.regexp; + +import java.io.Serializable; + +import org.mozilla.javascript.Context; +import org.mozilla.javascript.Function; +import org.mozilla.javascript.IdFunctionObject; +import org.mozilla.javascript.IdScriptableObject; +import org.mozilla.javascript.Kit; +import org.mozilla.javascript.ScriptRuntime; +import org.mozilla.javascript.Scriptable; +import org.mozilla.javascript.ScriptableObject; +import org.mozilla.javascript.Undefined; + +/** + * This class implements the RegExp native object. + * + * Revision History: + * Implementation in C by Brendan Eich + * Initial port to Java by Norris Boyd from jsregexp.c version 1.36 + * Merged up to version 1.38, which included Unicode support. + * Merged bug fixes in version 1.39. + * Merged JSFUN13_BRANCH changes up to 1.32.2.13 + * + * @author Brendan Eich + * @author Norris Boyd + */ + + + +public class NativeRegExp extends IdScriptableObject implements Function +{ + static final long serialVersionUID = 4965263491464903264L; + + private static final Object REGEXP_TAG = new Object(); + + public static final int JSREG_GLOB = 0x1; // 'g' flag: global + public static final int JSREG_FOLD = 0x2; // 'i' flag: fold + public static final int JSREG_MULTILINE = 0x4; // 'm' flag: multiline + + //type of match to perform + public static final int TEST = 0; + public static final int MATCH = 1; + public static final int PREFIX = 2; + + private static final boolean debug = false; + + private static final byte REOP_EMPTY = 0; /* match rest of input against rest of r.e. */ + private static final byte REOP_ALT = 1; /* alternative subexpressions in kid and next */ + private static final byte REOP_BOL = 2; /* beginning of input (or line if multiline) */ + private static final byte REOP_EOL = 3; /* end of input (or line if multiline) */ + private static final byte REOP_WBDRY = 4; /* match "" at word boundary */ + private static final byte REOP_WNONBDRY = 5; /* match "" at word non-boundary */ + private static final byte REOP_QUANT = 6; /* quantified atom: atom{1,2} */ + private static final byte REOP_STAR = 7; /* zero or more occurrences of kid */ + private static final byte REOP_PLUS = 8; /* one or more occurrences of kid */ + private static final byte REOP_OPT = 9; /* optional subexpression in kid */ + private static final byte REOP_LPAREN = 10; /* left paren bytecode: kid is u.num'th sub-regexp */ + private static final byte REOP_RPAREN = 11; /* right paren bytecode */ + private static final byte REOP_DOT = 12; /* stands for any character */ +// private static final byte REOP_CCLASS = 13; /* character class: [a-f] */ + private static final byte REOP_DIGIT = 14; /* match a digit char: [0-9] */ + private static final byte REOP_NONDIGIT = 15; /* match a non-digit char: [^0-9] */ + private static final byte REOP_ALNUM = 16; /* match an alphanumeric char: [0-9a-z_A-Z] */ + private static final byte REOP_NONALNUM = 17; /* match a non-alphanumeric char: [^0-9a-z_A-Z] */ + private static final byte REOP_SPACE = 18; /* match a whitespace char */ + private static final byte REOP_NONSPACE = 19; /* match a non-whitespace char */ + private static final byte REOP_BACKREF = 20; /* back-reference (e.g., \1) to a parenthetical */ + private static final byte REOP_FLAT = 21; /* match a flat string */ + private static final byte REOP_FLAT1 = 22; /* match a single char */ + private static final byte REOP_JUMP = 23; /* for deoptimized closure loops */ +// private static final byte REOP_DOTSTAR = 24; /* optimize .* to use a single opcode */ +// private static final byte REOP_ANCHOR = 25; /* like .* but skips left context to unanchored r.e. */ +// private static final byte REOP_EOLONLY = 26; /* $ not preceded by any pattern */ +// private static final byte REOP_UCFLAT = 27; /* flat Unicode string; len immediate counts chars */ + private static final byte REOP_UCFLAT1 = 28; /* single Unicode char */ +// private static final byte REOP_UCCLASS = 29; /* Unicode character class, vector of chars to match */ +// private static final byte REOP_NUCCLASS = 30; /* negated Unicode character class */ +// private static final byte REOP_BACKREFi = 31; /* case-independent REOP_BACKREF */ + private static final byte REOP_FLATi = 32; /* case-independent REOP_FLAT */ + private static final byte REOP_FLAT1i = 33; /* case-independent REOP_FLAT1 */ +// private static final byte REOP_UCFLATi = 34; /* case-independent REOP_UCFLAT */ + private static final byte REOP_UCFLAT1i = 35; /* case-independent REOP_UCFLAT1 */ +// private static final byte REOP_ANCHOR1 = 36; /* first-char discriminating REOP_ANCHOR */ +// private static final byte REOP_NCCLASS = 37; /* negated 8-bit character class */ +// private static final byte REOP_DOTSTARMIN = 38; /* ungreedy version of REOP_DOTSTAR */ +// private static final byte REOP_LPARENNON = 39; /* non-capturing version of REOP_LPAREN */ +// private static final byte REOP_RPARENNON = 40; /* non-capturing version of REOP_RPAREN */ + private static final byte REOP_ASSERT = 41; /* zero width positive lookahead assertion */ + private static final byte REOP_ASSERT_NOT = 42; /* zero width negative lookahead assertion */ + private static final byte REOP_ASSERTTEST = 43; /* sentinel at end of assertion child */ + private static final byte REOP_ASSERTNOTTEST = 44; /* sentinel at end of !assertion child */ + private static final byte REOP_MINIMALSTAR = 45; /* non-greedy version of * */ + private static final byte REOP_MINIMALPLUS = 46; /* non-greedy version of + */ + private static final byte REOP_MINIMALOPT = 47; /* non-greedy version of ? */ + private static final byte REOP_MINIMALQUANT = 48; /* non-greedy version of {} */ + private static final byte REOP_ENDCHILD = 49; /* sentinel at end of quantifier child */ + private static final byte REOP_CLASS = 50; /* character class with index */ + private static final byte REOP_REPEAT = 51; /* directs execution of greedy quantifier */ + private static final byte REOP_MINIMALREPEAT = 52; /* directs execution of non-greedy quantifier */ + private static final byte REOP_END = 53; + + + + public static void init(Context cx, Scriptable scope, boolean sealed) + { + + NativeRegExp proto = new NativeRegExp(); + proto.re = (RECompiled)compileRE(cx, "", null, false); + proto.activatePrototypeMap(MAX_PROTOTYPE_ID); + proto.setParentScope(scope); + proto.setPrototype(getObjectPrototype(scope)); + + NativeRegExpCtor ctor = new NativeRegExpCtor(); + // Bug #324006: ECMA-262 15.10.6.1 says "The initial value of + // RegExp.prototype.constructor is the builtin RegExp constructor." + proto.put("constructor", proto, ctor); + + ScriptRuntime.setFunctionProtoAndParent(ctor, scope); + + ctor.setImmunePrototypeProperty(proto); + + if (sealed) { + proto.sealObject(); + ctor.sealObject(); + } + + defineProperty(scope, "RegExp", ctor, ScriptableObject.DONTENUM); + } + + NativeRegExp(Scriptable scope, Object regexpCompiled) + { + this.re = (RECompiled)regexpCompiled; + this.lastIndex = 0; + ScriptRuntime.setObjectProtoAndParent(this, scope); + } + + public String getClassName() + { + return "RegExp"; + } + + public Object call(Context cx, Scriptable scope, Scriptable thisObj, + Object[] args) + { + return execSub(cx, scope, args, MATCH); + } + + public Scriptable construct(Context cx, Scriptable scope, Object[] args) + { + return (Scriptable)execSub(cx, scope, args, MATCH); + } + + Scriptable compile(Context cx, Scriptable scope, Object[] args) + { + if (args.length > 0 && args[0] instanceof NativeRegExp) { + if (args.length > 1 && args[1] != Undefined.instance) { + // report error + throw ScriptRuntime.typeError0("msg.bad.regexp.compile"); + } + NativeRegExp thatObj = (NativeRegExp) args[0]; + this.re = thatObj.re; + this.lastIndex = thatObj.lastIndex; + return this; + } + String s = args.length == 0 ? "" : ScriptRuntime.toString(args[0]); + String global = args.length > 1 && args[1] != Undefined.instance + ? ScriptRuntime.toString(args[1]) + : null; + this.re = (RECompiled)compileRE(cx, s, global, false); + this.lastIndex = 0; + return this; + } + + public String toString() + { + StringBuffer buf = new StringBuffer(); + buf.append('/'); + if (re.source.length != 0) { + buf.append(re.source); + } else { + // See bugzilla 226045 + buf.append("(?:)"); + } + buf.append('/'); + if ((re.flags & JSREG_GLOB) != 0) + buf.append('g'); + if ((re.flags & JSREG_FOLD) != 0) + buf.append('i'); + if ((re.flags & JSREG_MULTILINE) != 0) + buf.append('m'); + return buf.toString(); + } + + NativeRegExp() { } + + private static RegExpImpl getImpl(Context cx) + { + return (RegExpImpl) ScriptRuntime.getRegExpProxy(cx); + } + + private Object execSub(Context cx, Scriptable scopeObj, + Object[] args, int matchType) + { + RegExpImpl reImpl = getImpl(cx); + String str; + if (args.length == 0) { + str = reImpl.input; + if (str == null) { + reportError("msg.no.re.input.for", toString()); + } + } else { + str = ScriptRuntime.toString(args[0]); + } + double d = ((re.flags & JSREG_GLOB) != 0) ? lastIndex : 0; + + Object rval; + if (d < 0 || str.length() < d) { + lastIndex = 0; + rval = null; + } + else { + int indexp[] = { (int)d }; + rval = executeRegExp(cx, scopeObj, reImpl, str, indexp, matchType); + if ((re.flags & JSREG_GLOB) != 0) { + lastIndex = (rval == null || rval == Undefined.instance) + ? 0 : indexp[0]; + } + } + return rval; + } + + static Object compileRE(Context cx, String str, String global, boolean flat) + { + RECompiled regexp = new RECompiled(); + regexp.source = str.toCharArray(); + int length = str.length(); + + int flags = 0; + if (global != null) { + for (int i = 0; i < global.length(); i++) { + char c = global.charAt(i); + if (c == 'g') { + flags |= JSREG_GLOB; + } else if (c == 'i') { + flags |= JSREG_FOLD; + } else if (c == 'm') { + flags |= JSREG_MULTILINE; + } else { + reportError("msg.invalid.re.flag", String.valueOf(c)); + } + } + } + regexp.flags = flags; + + CompilerState state = new CompilerState(cx, regexp.source, length, flags); + if (flat && length > 0) { +if (debug) { +System.out.println("flat = \"" + str + "\""); +} + state.result = new RENode(REOP_FLAT); + state.result.chr = state.cpbegin[0]; + state.result.length = length; + state.result.flatIndex = 0; + state.progLength += 5; + } + else + if (!parseDisjunction(state)) + return null; + + regexp.program = new byte[state.progLength + 1]; + if (state.classCount != 0) { + regexp.classList = new RECharSet[state.classCount]; + regexp.classCount = state.classCount; + } + int endPC = emitREBytecode(state, regexp, 0, state.result); + regexp.program[endPC++] = REOP_END; + +if (debug) { +System.out.println("Prog. length = " + endPC); +for (int i = 0; i < endPC; i++) { + System.out.print(regexp.program[i]); + if (i < (endPC - 1)) System.out.print(", "); +} +System.out.println(); +} + regexp.parenCount = state.parenCount; + + // If re starts with literal, init anchorCh accordingly + switch (regexp.program[0]) { + case REOP_UCFLAT1: + case REOP_UCFLAT1i: + regexp.anchorCh = (char)getIndex(regexp.program, 1); + break; + case REOP_FLAT1: + case REOP_FLAT1i: + regexp.anchorCh = (char)(regexp.program[1] & 0xFF); + break; + case REOP_FLAT: + case REOP_FLATi: + int k = getIndex(regexp.program, 1); + regexp.anchorCh = regexp.source[k]; + break; + } + +if (debug) { +if (regexp.anchorCh >= 0) { + System.out.println("Anchor ch = '" + (char)regexp.anchorCh + "'"); +} +} + return regexp; + } + + static boolean isDigit(char c) + { + return '0' <= c && c <= '9'; + } + + private static boolean isWord(char c) + { + return Character.isLetter(c) || isDigit(c) || c == '_'; + } + + private static boolean isLineTerm(char c) + { + return ScriptRuntime.isJSLineTerminator(c); + } + + private static boolean isREWhiteSpace(int c) + { + return (c == '\u0020' || c == '\u0009' + || c == '\n' || c == '\r' + || c == 0x2028 || c == 0x2029 + || c == '\u000C' || c == '\u000B' + || c == '\u00A0' + || Character.getType((char)c) == Character.SPACE_SEPARATOR); + } + + /* + * + * 1. If IgnoreCase is false, return ch. + * 2. Let u be ch converted to upper case as if by calling + * String.prototype.toUpperCase on the one-character string ch. + * 3. If u does not consist of a single character, return ch. + * 4. Let cu be u's character. + * 5. If ch's code point value is greater than or equal to decimal 128 and cu's + * code point value is less than decimal 128, then return ch. + * 6. Return cu. + */ + private static char upcase(char ch) + { + if (ch < 128) { + if ('a' <= ch && ch <= 'z') { + return (char)(ch + ('A' - 'a')); + } + return ch; + } + char cu = Character.toUpperCase(ch); + if ((ch >= 128) && (cu < 128)) return ch; + return cu; + } + + private static char downcase(char ch) + { + if (ch < 128) { + if ('A' <= ch && ch <= 'Z') { + return (char)(ch + ('a' - 'A')); + } + return ch; + } + char cl = Character.toLowerCase(ch); + if ((ch >= 128) && (cl < 128)) return ch; + return cl; + } + +/* + * Validates and converts hex ascii value. + */ + private static int toASCIIHexDigit(int c) + { + if (c < '0') + return -1; + if (c <= '9') { + return c - '0'; + } + c |= 0x20; + if ('a' <= c && c <= 'f') { + return c - 'a' + 10; + } + return -1; + } + +/* + * Top-down regular expression grammar, based closely on Perl4. + * + * regexp: altern A regular expression is one or more + * altern '|' regexp alternatives separated by vertical bar. + */ + private static boolean parseDisjunction(CompilerState state) + { + if (!parseAlternative(state)) + return false; + char[] source = state.cpbegin; + int index = state.cp; + if (index != source.length && source[index] == '|') { + RENode altResult; + ++state.cp; + altResult = new RENode(REOP_ALT); + altResult.kid = state.result; + if (!parseDisjunction(state)) + return false; + altResult.kid2 = state.result; + state.result = altResult; + /* ALT, <next>, ..., JUMP, <end> ... JUMP <end> */ + state.progLength += 9; + } + return true; + } + +/* + * altern: item An alternative is one or more items, + * item altern concatenated together. + */ + private static boolean parseAlternative(CompilerState state) + { + RENode headTerm = null; + RENode tailTerm = null; + char[] source = state.cpbegin; + while (true) { + if (state.cp == state.cpend || source[state.cp] == '|' + || (state.parenNesting != 0 && source[state.cp] == ')')) + { + if (headTerm == null) { + state.result = new RENode(REOP_EMPTY); + } + else + state.result = headTerm; + return true; + } + if (!parseTerm(state)) + return false; + if (headTerm == null) + headTerm = state.result; + else { + if (tailTerm == null) { + headTerm.next = state.result; + tailTerm = state.result; + while (tailTerm.next != null) tailTerm = tailTerm.next; + } + else { + tailTerm.next = state.result; + tailTerm = tailTerm.next; + while (tailTerm.next != null) tailTerm = tailTerm.next; + } + } + } + } + + /* calculate the total size of the bitmap required for a class expression */ + private static boolean + calculateBitmapSize(CompilerState state, RENode target, char[] src, + int index, int end) + { + char rangeStart = 0; + char c; + int n; + int nDigits; + int i; + int max = 0; + boolean inRange = false; + + target.bmsize = 0; + + if (index == end) + return true; + + if (src[index] == '^') + ++index; + + while (index != end) { + int localMax = 0; + nDigits = 2; + switch (src[index]) { + case '\\': + ++index; + c = src[index++]; + switch (c) { + case 'b': + localMax = 0x8; + break; + case 'f': + localMax = 0xC; + break; + case 'n': + localMax = 0xA; + break; + case 'r': + localMax = 0xD; + break; + case 't': + localMax = 0x9; + break; + case 'v': + localMax = 0xB; + break; + case 'c': + if (((index + 1) < end) && Character.isLetter(src[index + 1])) + localMax = (char)(src[index++] & 0x1F); + else + localMax = '\\'; + break; + case 'u': + nDigits += 2; + // fall thru... + case 'x': + n = 0; + for (i = 0; (i < nDigits) && (index < end); i++) { + c = src[index++]; + n = Kit.xDigitToInt(c, n); + if (n < 0) { + // Back off to accepting the original + // '\' as a literal + index -= (i + 1); + n = '\\'; + break; + } + } + localMax = n; + break; + case 'd': + if (inRange) { + reportError("msg.bad.range", ""); + return false; + } + localMax = '9'; + break; + case 'D': + case 's': + case 'S': + case 'w': + case 'W': + if (inRange) { + reportError("msg.bad.range", ""); + return false; + } + target.bmsize = 65535; + return true; + case '0': + case '1': + case '2': + case '3': + case '4': + case '5': + case '6': + case '7': + /* + * This is a non-ECMA extension - decimal escapes (in this + * case, octal!) are supposed to be an error inside class + * ranges, but supported here for backwards compatibility. + * + */ + n = (c - '0'); + c = src[index]; + if ('0' <= c && c <= '7') { + index++; + n = 8 * n + (c - '0'); + c = src[index]; + if ('0' <= c && c <= '7') { + index++; + i = 8 * n + (c - '0'); + if (i <= 0377) + n = i; + else + index--; + } + } + localMax = n; + break; + + default: + localMax = c; + break; + } + break; + default: + localMax = src[index++]; + break; + } + if (inRange) { + if (rangeStart > localMax) { + reportError("msg.bad.range", ""); + return false; + } + inRange = false; + } + else { + if (index < (end - 1)) { + if (src[index] == '-') { + ++index; + inRange = true; + rangeStart = (char)localMax; + continue; + } + } + } + if ((state.flags & JSREG_FOLD) != 0){ + char cu = upcase((char)localMax); + char cd = downcase((char)localMax); + localMax = (cu >= cd) ? cu : cd; + } + if (localMax > max) + max = localMax; + } + target.bmsize = max; + return true; + } + + /* + * item: assertion An item is either an assertion or + * quantatom a quantified atom. + * + * assertion: '^' Assertions match beginning of string + * (or line if the class static property + * RegExp.multiline is true). + * '$' End of string (or line if the class + * static property RegExp.multiline is + * true). + * '\b' Word boundary (between \w and \W). + * '\B' Word non-boundary. + * + * quantatom: atom An unquantified atom. + * quantatom '{' n ',' m '}' + * Atom must occur between n and m times. + * quantatom '{' n ',' '}' Atom must occur at least n times. + * quantatom '{' n '}' Atom must occur exactly n times. + * quantatom '*' Zero or more times (same as {0,}). + * quantatom '+' One or more times (same as {1,}). + * quantatom '?' Zero or one time (same as {0,1}). + * + * any of which can be optionally followed by '?' for ungreedy + * + * atom: '(' regexp ')' A parenthesized regexp (what matched + * can be addressed using a backreference, + * see '\' n below). + * '.' Matches any char except '\n'. + * '[' classlist ']' A character class. + * '[' '^' classlist ']' A negated character class. + * '\f' Form Feed. + * '\n' Newline (Line Feed). + * '\r' Carriage Return. + * '\t' Horizontal Tab. + * '\v' Vertical Tab. + * '\d' A digit (same as [0-9]). + * '\D' A non-digit. + * '\w' A word character, [0-9a-z_A-Z]. + * '\W' A non-word character. + * '\s' A whitespace character, [ \b\f\n\r\t\v]. + * '\S' A non-whitespace character. + * '\' n A backreference to the nth (n decimal + * and positive) parenthesized expression. + * '\' octal An octal escape sequence (octal must be + * two or three digits long, unless it is + * 0 for the null character). + * '\x' hex A hex escape (hex must be two digits). + * '\c' ctrl A control character, ctrl is a letter. + * '\' literalatomchar Any character except one of the above + * that follow '\' in an atom. + * otheratomchar Any character not first among the other + * atom right-hand sides. + */ + + private static void doFlat(CompilerState state, char c) + { + state.result = new RENode(REOP_FLAT); + state.result.chr = c; + state.result.length = 1; + state.result.flatIndex = -1; + state.progLength += 3; + } + + private static int + getDecimalValue(char c, CompilerState state, int maxValue, + String overflowMessageId) + { + boolean overflow = false; + int start = state.cp; + char[] src = state.cpbegin; + int value = c - '0'; + for (; state.cp != state.cpend; ++state.cp) { + c = src[state.cp]; + if (!isDigit(c)) { + break; + } + if (!overflow) { + int digit = c - '0'; + if (value < (maxValue - digit) / 10) { + value = value * 10 + digit; + } else { + overflow = true; + value = maxValue; + } + } + } + if (overflow) { + reportError(overflowMessageId, + String.valueOf(src, start, state.cp - start)); + } + return value; + } + + private static boolean + parseTerm(CompilerState state) + { + char[] src = state.cpbegin; + char c = src[state.cp++]; + int nDigits = 2; + int parenBaseCount = state.parenCount; + int num, tmp; + RENode term; + int termStart; + + switch (c) { + /* assertions and atoms */ + case '^': + state.result = new RENode(REOP_BOL); + state.progLength++; + return true; + case '$': + state.result = new RENode(REOP_EOL); + state.progLength++; + return true; + case '\\': + if (state.cp < state.cpend) { + c = src[state.cp++]; + switch (c) { + /* assertion escapes */ + case 'b' : + state.result = new RENode(REOP_WBDRY); + state.progLength++; + return true; + case 'B': + state.result = new RENode(REOP_WNONBDRY); + state.progLength++; + return true; + /* Decimal escape */ + case '0': +/* + * Under 'strict' ECMA 3, we interpret \0 as NUL and don't accept octal. + * However, (XXX and since Rhino doesn't have a 'strict' mode) we'll just + * behave the old way for compatibility reasons. + * (see http://bugzilla.mozilla.org/show_bug.cgi?id=141078) + * + */ + reportWarning(state.cx, "msg.bad.backref", ""); + /* octal escape */ + num = 0; + while (state.cp < state.cpend) { + c = src[state.cp]; + if ((c >= '0') && (c <= '7')) { + state.cp++; + tmp = 8 * num + (c - '0'); + if (tmp > 0377) + break; + num = tmp; + } + else + break; + } + c = (char)(num); + doFlat(state, c); + break; + case '1': + case '2': + case '3': + case '4': + case '5': + case '6': + case '7': + case '8': + case '9': + termStart = state.cp - 1; + num = getDecimalValue(c, state, 0xFFFF, + "msg.overlarge.backref"); + if (num > state.parenCount) + reportWarning(state.cx, "msg.bad.backref", ""); + /* + * n > 9 or > count of parentheses, + * then treat as octal instead. + */ + if ((num > 9) && (num > state.parenCount)) { + state.cp = termStart; + num = 0; + while (state.cp < state.cpend) { + c = src[state.cp]; + if ((c >= '0') && (c <= '7')) { + state.cp++; + tmp = 8 * num + (c - '0'); + if (tmp > 0377) + break; + num = tmp; + } + else + break; + } + c = (char)(num); + doFlat(state, c); + break; + } + /* otherwise, it's a back-reference */ + state.result = new RENode(REOP_BACKREF); + state.result.parenIndex = num - 1; + state.progLength += 3; + break; + /* Control escape */ + case 'f': + c = 0xC; + doFlat(state, c); + break; + case 'n': + c = 0xA; + doFlat(state, c); + break; + case 'r': + c = 0xD; + doFlat(state, c); + break; + case 't': + c = 0x9; + doFlat(state, c); + break; + case 'v': + c = 0xB; + doFlat(state, c); + break; + /* Control letter */ + case 'c': + if (((state.cp + 1) < state.cpend) && + Character.isLetter(src[state.cp + 1])) + c = (char)(src[state.cp++] & 0x1F); + else { + /* back off to accepting the original '\' as a literal */ + --state.cp; + c = '\\'; + } + doFlat(state, c); + break; + /* UnicodeEscapeSequence */ + case 'u': + nDigits += 2; + // fall thru... + /* HexEscapeSequence */ + case 'x': + { + int n = 0; + int i; + for (i = 0; (i < nDigits) + && (state.cp < state.cpend); i++) { + c = src[state.cp++]; + n = Kit.xDigitToInt(c, n); + if (n < 0) { + // Back off to accepting the original + // 'u' or 'x' as a literal + state.cp -= (i + 2); + n = src[state.cp++]; + break; + } + } + c = (char)(n); + } + doFlat(state, c); + break; + /* Character class escapes */ + case 'd': + state.result = new RENode(REOP_DIGIT); + state.progLength++; + break; + case 'D': + state.result = new RENode(REOP_NONDIGIT); + state.progLength++; + break; + case 's': + state.result = new RENode(REOP_SPACE); + state.progLength++; + break; + case 'S': + state.result = new RENode(REOP_NONSPACE); + state.progLength++; + break; + case 'w': + state.result = new RENode(REOP_ALNUM); + state.progLength++; + break; + case 'W': + state.result = new RENode(REOP_NONALNUM); + state.progLength++; + break; + /* IdentityEscape */ + default: + state.result = new RENode(REOP_FLAT); + state.result.chr = c; + state.result.length = 1; + state.result.flatIndex = state.cp - 1; + state.progLength += 3; + break; + } + break; + } + else { + /* a trailing '\' is an error */ + reportError("msg.trail.backslash", ""); + return false; + } + case '(': { + RENode result = null; + termStart = state.cp; + if (state.cp + 1 < state.cpend && src[state.cp] == '?' + && ((c = src[state.cp + 1]) == '=' || c == '!' || c == ':')) + { + state.cp += 2; + if (c == '=') { + result = new RENode(REOP_ASSERT); + /* ASSERT, <next>, ... ASSERTTEST */ + state.progLength += 4; + } else if (c == '!') { + result = new RENode(REOP_ASSERT_NOT); + /* ASSERTNOT, <next>, ... ASSERTNOTTEST */ + state.progLength += 4; + } + } else { + result = new RENode(REOP_LPAREN); + /* LPAREN, <index>, ... RPAREN, <index> */ + state.progLength += 6; + result.parenIndex = state.parenCount++; + } + ++state.parenNesting; + if (!parseDisjunction(state)) + return false; + if (state.cp == state.cpend || src[state.cp] != ')') { + reportError("msg.unterm.paren", "in regular expression"/*APPJET*/); + return false; + } + ++state.cp; + --state.parenNesting; + if (result != null) { + result.kid = state.result; + state.result = result; + } + break; + } + case ')': + reportError("msg.re.unmatched.right.paren", ""); + return false; + case '[': + state.result = new RENode(REOP_CLASS); + termStart = state.cp; + state.result.startIndex = termStart; + while (true) { + if (state.cp == state.cpend) { + reportError("msg.unterm.class", ""); + return false; + } + if (src[state.cp] == '\\') + state.cp++; + else { + if (src[state.cp] == ']') { + state.result.kidlen = state.cp - termStart; + break; + } + } + state.cp++; + } + state.result.index = state.classCount++; + /* + * Call calculateBitmapSize now as we want any errors it finds + * to be reported during the parse phase, not at execution. + */ + if (!calculateBitmapSize(state, state.result, src, termStart, state.cp++)) + return false; + state.progLength += 3; /* CLASS, <index> */ + break; + + case '.': + state.result = new RENode(REOP_DOT); + state.progLength++; + break; + case '*': + case '+': + case '?': + reportError("msg.bad.quant", String.valueOf(src[state.cp - 1])); + return false; + default: + state.result = new RENode(REOP_FLAT); + state.result.chr = c; + state.result.length = 1; + state.result.flatIndex = state.cp - 1; + state.progLength += 3; + break; + } + + term = state.result; + if (state.cp == state.cpend) { + return true; + } + boolean hasQ = false; + switch (src[state.cp]) { + case '+': + state.result = new RENode(REOP_QUANT); + state.result.min = 1; + state.result.max = -1; + /* <PLUS>, <parencount>, <parenindex>, <next> ... <ENDCHILD> */ + state.progLength += 8; + hasQ = true; + break; + case '*': + state.result = new RENode(REOP_QUANT); + state.result.min = 0; + state.result.max = -1; + /* <STAR>, <parencount>, <parenindex>, <next> ... <ENDCHILD> */ + state.progLength += 8; + hasQ = true; + break; + case '?': + state.result = new RENode(REOP_QUANT); + state.result.min = 0; + state.result.max = 1; + /* <OPT>, <parencount>, <parenindex>, <next> ... <ENDCHILD> */ + state.progLength += 8; + hasQ = true; + break; + case '{': /* balance '}' */ + { + int min = 0; + int max = -1; + int leftCurl = state.cp; + + /* For Perl etc. compatibility, if quntifier does not match + * \{\d+(,\d*)?\} exactly back off from it + * being a quantifier, and chew it up as a literal + * atom next time instead. + */ + + c = src[++state.cp]; + if (isDigit(c)) { + ++state.cp; + min = getDecimalValue(c, state, 0xFFFF, + "msg.overlarge.min"); + c = src[state.cp]; + if (c == ',') { + c = src[++state.cp]; + if (isDigit(c)) { + ++state.cp; + max = getDecimalValue(c, state, 0xFFFF, + "msg.overlarge.max"); + c = src[state.cp]; + if (min > max) { + reportError("msg.max.lt.min", + String.valueOf(src[state.cp])); + return false; + } + } + } else { + max = min; + } + /* balance '{' */ + if (c == '}') { + state.result = new RENode(REOP_QUANT); + state.result.min = min; + state.result.max = max; + // QUANT, <min>, <max>, <parencount>, + // <parenindex>, <next> ... <ENDCHILD> + state.progLength += 12; + hasQ = true; + } + } + if (!hasQ) { + state.cp = leftCurl; + } + break; + } + } + if (!hasQ) + return true; + + ++state.cp; + state.result.kid = term; + state.result.parenIndex = parenBaseCount; + state.result.parenCount = state.parenCount - parenBaseCount; + if ((state.cp < state.cpend) && (src[state.cp] == '?')) { + ++state.cp; + state.result.greedy = false; + } + else + state.result.greedy = true; + return true; + } + + private static void resolveForwardJump(byte[] array, int from, int pc) + { + if (from > pc) throw Kit.codeBug(); + addIndex(array, from, pc - from); + } + + private static int getOffset(byte[] array, int pc) + { + return getIndex(array, pc); + } + + private static int addIndex(byte[] array, int pc, int index) + { + if (index < 0) throw Kit.codeBug(); + if (index > 0xFFFF) + throw Context.reportRuntimeError("Too complex regexp"); + array[pc] = (byte)(index >> 8); + array[pc + 1] = (byte)(index); + return pc + 2; + } + + private static int getIndex(byte[] array, int pc) + { + return ((array[pc] & 0xFF) << 8) | (array[pc + 1] & 0xFF); + } + + private static final int OFFSET_LEN = 2; + private static final int INDEX_LEN = 2; + + private static int + emitREBytecode(CompilerState state, RECompiled re, int pc, RENode t) + { + RENode nextAlt; + int nextAltFixup, nextTermFixup; + byte[] program = re.program; + + while (t != null) { + program[pc++] = t.op; + switch (t.op) { + case REOP_EMPTY: + --pc; + break; + case REOP_ALT: + nextAlt = t.kid2; + nextAltFixup = pc; /* address of next alternate */ + pc += OFFSET_LEN; + pc = emitREBytecode(state, re, pc, t.kid); + program[pc++] = REOP_JUMP; + nextTermFixup = pc; /* address of following term */ + pc += OFFSET_LEN; + resolveForwardJump(program, nextAltFixup, pc); + pc = emitREBytecode(state, re, pc, nextAlt); + + program[pc++] = REOP_JUMP; + nextAltFixup = pc; + pc += OFFSET_LEN; + + resolveForwardJump(program, nextTermFixup, pc); + resolveForwardJump(program, nextAltFixup, pc); + break; + case REOP_FLAT: + /* + * Consecutize FLAT's if possible. + */ + if (t.flatIndex != -1) { + while ((t.next != null) && (t.next.op == REOP_FLAT) + && ((t.flatIndex + t.length) + == t.next.flatIndex)) { + t.length += t.next.length; + t.next = t.next.next; + } + } + if ((t.flatIndex != -1) && (t.length > 1)) { + if ((state.flags & JSREG_FOLD) != 0) + program[pc - 1] = REOP_FLATi; + else + program[pc - 1] = REOP_FLAT; + pc = addIndex(program, pc, t.flatIndex); + pc = addIndex(program, pc, t.length); + } + else { + if (t.chr < 256) { + if ((state.flags & JSREG_FOLD) != 0) + program[pc - 1] = REOP_FLAT1i; + else + program[pc - 1] = REOP_FLAT1; + program[pc++] = (byte)(t.chr); + } + else { + if ((state.flags & JSREG_FOLD) != 0) + program[pc - 1] = REOP_UCFLAT1i; + else + program[pc - 1] = REOP_UCFLAT1; + pc = addIndex(program, pc, t.chr); + } + } + break; + case REOP_LPAREN: + pc = addIndex(program, pc, t.parenIndex); + pc = emitREBytecode(state, re, pc, t.kid); + program[pc++] = REOP_RPAREN; + pc = addIndex(program, pc, t.parenIndex); + break; + case REOP_BACKREF: + pc = addIndex(program, pc, t.parenIndex); + break; + case REOP_ASSERT: + nextTermFixup = pc; + pc += OFFSET_LEN; + pc = emitREBytecode(state, re, pc, t.kid); + program[pc++] = REOP_ASSERTTEST; + resolveForwardJump(program, nextTermFixup, pc); + break; + case REOP_ASSERT_NOT: + nextTermFixup = pc; + pc += OFFSET_LEN; + pc = emitREBytecode(state, re, pc, t.kid); + program[pc++] = REOP_ASSERTNOTTEST; + resolveForwardJump(program, nextTermFixup, pc); + break; + case REOP_QUANT: + if ((t.min == 0) && (t.max == -1)) + program[pc - 1] = (t.greedy) ? REOP_STAR : REOP_MINIMALSTAR; + else + if ((t.min == 0) && (t.max == 1)) + program[pc - 1] = (t.greedy) ? REOP_OPT : REOP_MINIMALOPT; + else + if ((t.min == 1) && (t.max == -1)) + program[pc - 1] = (t.greedy) ? REOP_PLUS : REOP_MINIMALPLUS; + else { + if (!t.greedy) program[pc - 1] = REOP_MINIMALQUANT; + pc = addIndex(program, pc, t.min); + // max can be -1 which addIndex does not accept + pc = addIndex(program, pc, t.max + 1); + } + pc = addIndex(program, pc, t.parenCount); + pc = addIndex(program, pc, t.parenIndex); + nextTermFixup = pc; + pc += OFFSET_LEN; + pc = emitREBytecode(state, re, pc, t.kid); + program[pc++] = REOP_ENDCHILD; + resolveForwardJump(program, nextTermFixup, pc); + break; + case REOP_CLASS: + pc = addIndex(program, pc, t.index); + re.classList[t.index] = new RECharSet(t.bmsize, t.startIndex, + t.kidlen); + break; + default: + break; + } + t = t.next; + } + return pc; + } + + private static void + pushProgState(REGlobalData gData, int min, int max, + REBackTrackData backTrackLastToSave, + int continuation_pc, int continuation_op) + { + gData.stateStackTop = new REProgState(gData.stateStackTop, min, max, + gData.cp, backTrackLastToSave, + continuation_pc, + continuation_op); + } + + private static REProgState + popProgState(REGlobalData gData) + { + REProgState state = gData.stateStackTop; + gData.stateStackTop = state.previous; + return state; + } + + private static void + pushBackTrackState(REGlobalData gData, byte op, int target) + { + gData.backTrackStackTop = new REBackTrackData(gData, op, target); + } + + /* + * Consecutive literal characters. + */ + private static boolean + flatNMatcher(REGlobalData gData, int matchChars, + int length, char[] chars, int end) + { + if ((gData.cp + length) > end) + return false; + for (int i = 0; i < length; i++) { + if (gData.regexp.source[matchChars + i] != chars[gData.cp + i]) { + return false; + } + } + gData.cp += length; + return true; + } + + private static boolean + flatNIMatcher(REGlobalData gData, int matchChars, + int length, char[] chars, int end) + { + if ((gData.cp + length) > end) + return false; + for (int i = 0; i < length; i++) { + if (upcase(gData.regexp.source[matchChars + i]) + != upcase(chars[gData.cp + i])) + { + return false; + } + } + gData.cp += length; + return true; + } + + /* + 1. Evaluate DecimalEscape to obtain an EscapeValue E. + 2. If E is not a character then go to step 6. + 3. Let ch be E's character. + 4. Let A be a one-element RECharSet containing the character ch. + 5. Call CharacterSetMatcher(A, false) and return its Matcher result. + 6. E must be an integer. Let n be that integer. + 7. If n=0 or n>NCapturingParens then throw a SyntaxError exception. + 8. Return an internal Matcher closure that takes two arguments, a State x + and a Continuation c, and performs the following: + 1. Let cap be x's captures internal array. + 2. Let s be cap[n]. + 3. If s is undefined, then call c(x) and return its result. + 4. Let e be x's endIndex. + 5. Let len be s's length. + 6. Let f be e+len. + 7. If f>InputLength, return failure. + 8. If there exists an integer i between 0 (inclusive) and len (exclusive) + such that Canonicalize(s[i]) is not the same character as + Canonicalize(Input [e+i]), then return failure. + 9. Let y be the State (f, cap). + 10. Call c(y) and return its result. + */ + private static boolean + backrefMatcher(REGlobalData gData, int parenIndex, + char[] chars, int end) + { + int len; + int i; + int parenContent = gData.parens_index(parenIndex); + if (parenContent == -1) + return true; + + len = gData.parens_length(parenIndex); + if ((gData.cp + len) > end) + return false; + + if ((gData.regexp.flags & JSREG_FOLD) != 0) { + for (i = 0; i < len; i++) { + if (upcase(chars[parenContent + i]) != upcase(chars[gData.cp + i])) + return false; + } + } + else { + for (i = 0; i < len; i++) { + if (chars[parenContent + i] != chars[gData.cp + i]) + return false; + } + } + gData.cp += len; + return true; + } + + + /* Add a single character to the RECharSet */ + private static void + addCharacterToCharSet(RECharSet cs, char c) + { + int byteIndex = (c / 8); + if (c > cs.length) + throw new RuntimeException(); + cs.bits[byteIndex] |= 1 << (c & 0x7); + } + + + /* Add a character range, c1 to c2 (inclusive) to the RECharSet */ + private static void + addCharacterRangeToCharSet(RECharSet cs, char c1, char c2) + { + int i; + + int byteIndex1 = (c1 / 8); + int byteIndex2 = (c2 / 8); + + if ((c2 > cs.length) || (c1 > c2)) + throw new RuntimeException(); + + c1 &= 0x7; + c2 &= 0x7; + + if (byteIndex1 == byteIndex2) { + cs.bits[byteIndex1] |= ((0xFF) >> (7 - (c2 - c1))) << c1; + } + else { + cs.bits[byteIndex1] |= 0xFF << c1; + for (i = byteIndex1 + 1; i < byteIndex2; i++) + cs.bits[i] = (byte)0xFF; + cs.bits[byteIndex2] |= (0xFF) >> (7 - c2); + } + } + + /* Compile the source of the class into a RECharSet */ + private static void + processCharSet(REGlobalData gData, RECharSet charSet) + { + synchronized (charSet) { + if (!charSet.converted) { + processCharSetImpl(gData, charSet); + charSet.converted = true; + } + } + } + + + private static void + processCharSetImpl(REGlobalData gData, RECharSet charSet) + { + int src = charSet.startIndex; + int end = src + charSet.strlength; + + char rangeStart = 0, thisCh; + int byteLength; + char c; + int n; + int nDigits; + int i; + boolean inRange = false; + + charSet.sense = true; + byteLength = (charSet.length / 8) + 1; + charSet.bits = new byte[byteLength]; + + if (src == end) + return; + + if (gData.regexp.source[src] == '^') { + charSet.sense = false; + ++src; + } + + while (src != end) { + nDigits = 2; + switch (gData.regexp.source[src]) { + case '\\': + ++src; + c = gData.regexp.source[src++]; + switch (c) { + case 'b': + thisCh = 0x8; + break; + case 'f': + thisCh = 0xC; + break; + case 'n': + thisCh = 0xA; + break; + case 'r': + thisCh = 0xD; + break; + case 't': + thisCh = 0x9; + break; + case 'v': + thisCh = 0xB; + break; + case 'c': + if (((src + 1) < end) && isWord(gData.regexp.source[src + 1])) + thisCh = (char)(gData.regexp.source[src++] & 0x1F); + else { + --src; + thisCh = '\\'; + } + break; + case 'u': + nDigits += 2; + // fall thru + case 'x': + n = 0; + for (i = 0; (i < nDigits) && (src < end); i++) { + c = gData.regexp.source[src++]; + int digit = toASCIIHexDigit(c); + if (digit < 0) { + /* back off to accepting the original '\' + * as a literal + */ + src -= (i + 1); + n = '\\'; + break; + } + n = (n << 4) | digit; + } + thisCh = (char)(n); + break; + case '0': + case '1': + case '2': + case '3': + case '4': + case '5': + case '6': + case '7': + /* + * This is a non-ECMA extension - decimal escapes (in this + * case, octal!) are supposed to be an error inside class + * ranges, but supported here for backwards compatibility. + * + */ + n = (c - '0'); + c = gData.regexp.source[src]; + if ('0' <= c && c <= '7') { + src++; + n = 8 * n + (c - '0'); + c = gData.regexp.source[src]; + if ('0' <= c && c <= '7') { + src++; + i = 8 * n + (c - '0'); + if (i <= 0377) + n = i; + else + src--; + } + } + thisCh = (char)(n); + break; + + case 'd': + addCharacterRangeToCharSet(charSet, '0', '9'); + continue; /* don't need range processing */ + case 'D': + addCharacterRangeToCharSet(charSet, (char)0, (char)('0' - 1)); + addCharacterRangeToCharSet(charSet, (char)('9' + 1), + (char)(charSet.length)); + continue; + case 's': + for (i = charSet.length; i >= 0; i--) + if (isREWhiteSpace(i)) + addCharacterToCharSet(charSet, (char)(i)); + continue; + case 'S': + for (i = charSet.length; i >= 0; i--) + if (!isREWhiteSpace(i)) + addCharacterToCharSet(charSet, (char)(i)); + continue; + case 'w': + for (i = charSet.length; i >= 0; i--) + if (isWord((char)i)) + addCharacterToCharSet(charSet, (char)(i)); + continue; + case 'W': + for (i = charSet.length; i >= 0; i--) + if (!isWord((char)i)) + addCharacterToCharSet(charSet, (char)(i)); + continue; + default: + thisCh = c; + break; + + } + break; + + default: + thisCh = gData.regexp.source[src++]; + break; + + } + if (inRange) { + if ((gData.regexp.flags & JSREG_FOLD) != 0) { + addCharacterRangeToCharSet(charSet, + upcase(rangeStart), + upcase(thisCh)); + addCharacterRangeToCharSet(charSet, + downcase(rangeStart), + downcase(thisCh)); + } else { + addCharacterRangeToCharSet(charSet, rangeStart, thisCh); + } + inRange = false; + } + else { + if ((gData.regexp.flags & JSREG_FOLD) != 0) { + addCharacterToCharSet(charSet, upcase(thisCh)); + addCharacterToCharSet(charSet, downcase(thisCh)); + } else { + addCharacterToCharSet(charSet, thisCh); + } + if (src < (end - 1)) { + if (gData.regexp.source[src] == '-') { + ++src; + inRange = true; + rangeStart = thisCh; + } + } + } + } + } + + + /* + * Initialize the character set if it this is the first call. + * Test the bit - if the ^ flag was specified, non-inclusion is a success + */ + private static boolean + classMatcher(REGlobalData gData, RECharSet charSet, char ch) + { + if (!charSet.converted) { + processCharSet(gData, charSet); + } + + int byteIndex = ch / 8; + if (charSet.sense) { + if ((charSet.length == 0) || + ( (ch > charSet.length) + || ((charSet.bits[byteIndex] & (1 << (ch & 0x7))) == 0) )) + return false; + } else { + if (! ((charSet.length == 0) || + ( (ch > charSet.length) + || ((charSet.bits[byteIndex] & (1 << (ch & 0x7))) == 0) ))) + return false; + } + return true; + } + + private static boolean + executeREBytecode(REGlobalData gData, char[] chars, int end) + { + int pc = 0; + byte program[] = gData.regexp.program; + int currentContinuation_op; + int currentContinuation_pc; + boolean result = false; + + currentContinuation_pc = 0; + currentContinuation_op = REOP_END; +if (debug) { +System.out.println("Input = \"" + new String(chars) + "\", start at " + gData.cp); +} + int op = program[pc++]; + for (;;) { +if (debug) { +System.out.println("Testing at " + gData.cp + ", op = " + op); +} + switch (op) { + case REOP_EMPTY: + result = true; + break; + case REOP_BOL: + if (gData.cp != 0) { + if (gData.multiline || + ((gData.regexp.flags & JSREG_MULTILINE) != 0)) { + if (!isLineTerm(chars[gData.cp - 1])) { + result = false; + break; + } + } + else { + result = false; + break; + } + } + result = true; + break; + case REOP_EOL: + if (gData.cp != end) { + if (gData.multiline || + ((gData.regexp.flags & JSREG_MULTILINE) != 0)) { + if (!isLineTerm(chars[gData.cp])) { + result = false; + break; + } + } + else { + result = false; + break; + } + } + result = true; + break; + case REOP_WBDRY: + result = ((gData.cp == 0 || !isWord(chars[gData.cp - 1])) + ^ !((gData.cp < end) && isWord(chars[gData.cp]))); + break; + case REOP_WNONBDRY: + result = ((gData.cp == 0 || !isWord(chars[gData.cp - 1])) + ^ ((gData.cp < end) && isWord(chars[gData.cp]))); + break; + case REOP_DOT: + result = (gData.cp != end && !isLineTerm(chars[gData.cp])); + if (result) { + gData.cp++; + } + break; + case REOP_DIGIT: + result = (gData.cp != end && isDigit(chars[gData.cp])); + if (result) { + gData.cp++; + } + break; + case REOP_NONDIGIT: + result = (gData.cp != end && !isDigit(chars[gData.cp])); + if (result) { + gData.cp++; + } + break; + case REOP_SPACE: + result = (gData.cp != end && isREWhiteSpace(chars[gData.cp])); + if (result) { + gData.cp++; + } + break; + case REOP_NONSPACE: + result = (gData.cp != end && !isREWhiteSpace(chars[gData.cp])); + if (result) { + gData.cp++; + } + break; + case REOP_ALNUM: + result = (gData.cp != end && isWord(chars[gData.cp])); + if (result) { + gData.cp++; + } + break; + case REOP_NONALNUM: + result = (gData.cp != end && !isWord(chars[gData.cp])); + if (result) { + gData.cp++; + } + break; + case REOP_FLAT: + { + int offset = getIndex(program, pc); + pc += INDEX_LEN; + int length = getIndex(program, pc); + pc += INDEX_LEN; + result = flatNMatcher(gData, offset, length, chars, end); + } + break; + case REOP_FLATi: + { + int offset = getIndex(program, pc); + pc += INDEX_LEN; + int length = getIndex(program, pc); + pc += INDEX_LEN; + result = flatNIMatcher(gData, offset, length, chars, end); + } + break; + case REOP_FLAT1: + { + char matchCh = (char)(program[pc++] & 0xFF); + result = (gData.cp != end && chars[gData.cp] == matchCh); + if (result) { + gData.cp++; + } + } + break; + case REOP_FLAT1i: + { + char matchCh = (char)(program[pc++] & 0xFF); + result = (gData.cp != end + && upcase(chars[gData.cp]) == upcase(matchCh)); + if (result) { + gData.cp++; + } + } + break; + case REOP_UCFLAT1: + { + char matchCh = (char)getIndex(program, pc); + pc += INDEX_LEN; + result = (gData.cp != end && chars[gData.cp] == matchCh); + if (result) { + gData.cp++; + } + } + break; + case REOP_UCFLAT1i: + { + char matchCh = (char)getIndex(program, pc); + pc += INDEX_LEN; + result = (gData.cp != end + && upcase(chars[gData.cp]) == upcase(matchCh)); + if (result) { + gData.cp++; + } + } + break; + case REOP_ALT: + { + int nextpc; + byte nextop; + pushProgState(gData, 0, 0, null, + currentContinuation_pc, + currentContinuation_op); + nextpc = pc + getOffset(program, pc); + nextop = program[nextpc++]; + pushBackTrackState(gData, nextop, nextpc); + pc += INDEX_LEN; + op = program[pc++]; + } + continue; + + case REOP_JUMP: + { + int offset; + REProgState state = popProgState(gData); + currentContinuation_pc = state.continuation_pc; + currentContinuation_op = state.continuation_op; + offset = getOffset(program, pc); + pc += offset; + op = program[pc++]; + } + continue; + + + case REOP_LPAREN: + { + int parenIndex = getIndex(program, pc); + pc += INDEX_LEN; + gData.set_parens(parenIndex, gData.cp, 0); + op = program[pc++]; + } + continue; + case REOP_RPAREN: + { + int cap_index; + int parenIndex = getIndex(program, pc); + pc += INDEX_LEN; + cap_index = gData.parens_index(parenIndex); + gData.set_parens(parenIndex, cap_index, + gData.cp - cap_index); + if (parenIndex > gData.lastParen) + gData.lastParen = parenIndex; + op = program[pc++]; + } + continue; + case REOP_BACKREF: + { + int parenIndex = getIndex(program, pc); + pc += INDEX_LEN; + result = backrefMatcher(gData, parenIndex, chars, end); + } + break; + + case REOP_CLASS: + { + int index = getIndex(program, pc); + pc += INDEX_LEN; + if (gData.cp != end) { + if (classMatcher(gData, gData.regexp.classList[index], + chars[gData.cp])) + { + gData.cp++; + result = true; + break; + } + } + result = false; + } + break; + + case REOP_ASSERT: + case REOP_ASSERT_NOT: + { + byte testOp; + pushProgState(gData, 0, 0, gData.backTrackStackTop, + currentContinuation_pc, + currentContinuation_op); + if (op == REOP_ASSERT) { + testOp = REOP_ASSERTTEST; + } else { + testOp = REOP_ASSERTNOTTEST; + } + pushBackTrackState(gData, testOp, + pc + getOffset(program, pc)); + pc += INDEX_LEN; + op = program[pc++]; + } + continue; + + case REOP_ASSERTTEST: + case REOP_ASSERTNOTTEST: + { + REProgState state = popProgState(gData); + gData.cp = state.index; + gData.backTrackStackTop = state.backTrack; + currentContinuation_pc = state.continuation_pc; + currentContinuation_op = state.continuation_op; + if (result) { + if (op == REOP_ASSERTTEST) { + result = true; + } else { + result = false; + } + } else { + if (op == REOP_ASSERTTEST) { + // Do nothing + } else { + result = true; + } + } + } + break; + + case REOP_STAR: + case REOP_PLUS: + case REOP_OPT: + case REOP_QUANT: + case REOP_MINIMALSTAR: + case REOP_MINIMALPLUS: + case REOP_MINIMALOPT: + case REOP_MINIMALQUANT: + { + int min, max; + boolean greedy = false; + switch (op) { + case REOP_STAR: + greedy = true; + // fallthrough + case REOP_MINIMALSTAR: + min = 0; + max = -1; + break; + case REOP_PLUS: + greedy = true; + // fallthrough + case REOP_MINIMALPLUS: + min = 1; + max = -1; + break; + case REOP_OPT: + greedy = true; + // fallthrough + case REOP_MINIMALOPT: + min = 0; + max = 1; + break; + case REOP_QUANT: + greedy = true; + // fallthrough + case REOP_MINIMALQUANT: + min = getOffset(program, pc); + pc += INDEX_LEN; + // See comments in emitREBytecode for " - 1" reason + max = getOffset(program, pc) - 1; + pc += INDEX_LEN; + break; + default: + throw Kit.codeBug(); + } + pushProgState(gData, min, max, null, + currentContinuation_pc, + currentContinuation_op); + if (greedy) { + currentContinuation_op = REOP_REPEAT; + currentContinuation_pc = pc; + pushBackTrackState(gData, REOP_REPEAT, pc); + /* Step over <parencount>, <parenindex> & <next> */ + pc += 3 * INDEX_LEN; + op = program[pc++]; + } else { + if (min != 0) { + currentContinuation_op = REOP_MINIMALREPEAT; + currentContinuation_pc = pc; + /* <parencount> <parenindex> & <next> */ + pc += 3 * INDEX_LEN; + op = program[pc++]; + } else { + pushBackTrackState(gData, REOP_MINIMALREPEAT, pc); + popProgState(gData); + pc += 2 * INDEX_LEN; // <parencount> & <parenindex> + pc = pc + getOffset(program, pc); + op = program[pc++]; + } + } + } + continue; + + case REOP_ENDCHILD: + // Use the current continuation. + pc = currentContinuation_pc; + op = currentContinuation_op; + continue; + + case REOP_REPEAT: + { + REProgState state = popProgState(gData); + if (!result) { + // + // There's been a failure, see if we have enough + // children. + // + if (state.min == 0) + result = true; + currentContinuation_pc = state.continuation_pc; + currentContinuation_op = state.continuation_op; + pc += 2 * INDEX_LEN; /* <parencount> & <parenindex> */ + pc = pc + getOffset(program, pc); + break; + } + else { + if (state.min == 0 && gData.cp == state.index) { + // matched an empty string, that'll get us nowhere + result = false; + currentContinuation_pc = state.continuation_pc; + currentContinuation_op = state.continuation_op; + pc += 2 * INDEX_LEN; + pc = pc + getOffset(program, pc); + break; + } + int new_min = state.min, new_max = state.max; + if (new_min != 0) new_min--; + if (new_max != -1) new_max--; + if (new_max == 0) { + result = true; + currentContinuation_pc = state.continuation_pc; + currentContinuation_op = state.continuation_op; + pc += 2 * INDEX_LEN; + pc = pc + getOffset(program, pc); + break; + } + pushProgState(gData, new_min, new_max, null, + state.continuation_pc, + state.continuation_op); + currentContinuation_op = REOP_REPEAT; + currentContinuation_pc = pc; + pushBackTrackState(gData, REOP_REPEAT, pc); + int parenCount = getIndex(program, pc); + pc += INDEX_LEN; + int parenIndex = getIndex(program, pc); + pc += 2 * INDEX_LEN; + op = program[pc++]; + for (int k = 0; k < parenCount; k++) { + gData.set_parens(parenIndex + k, -1, 0); + } + } + } + continue; + + case REOP_MINIMALREPEAT: + { + REProgState state = popProgState(gData); + if (!result) { + // + // Non-greedy failure - try to consume another child. + // + if (state.max == -1 || state.max > 0) { + pushProgState(gData, state.min, state.max, null, + state.continuation_pc, + state.continuation_op); + currentContinuation_op = REOP_MINIMALREPEAT; + currentContinuation_pc = pc; + int parenCount = getIndex(program, pc); + pc += INDEX_LEN; + int parenIndex = getIndex(program, pc); + pc += 2 * INDEX_LEN; + for (int k = 0; k < parenCount; k++) { + gData.set_parens(parenIndex + k, -1, 0); + } + op = program[pc++]; + continue; + } else { + // Don't need to adjust pc since we're going to pop. + currentContinuation_pc = state.continuation_pc; + currentContinuation_op = state.continuation_op; + break; + } + } else { + if (state.min == 0 && gData.cp == state.index) { + // Matched an empty string, that'll get us nowhere. + result = false; + currentContinuation_pc = state.continuation_pc; + currentContinuation_op = state.continuation_op; + break; + } + int new_min = state.min, new_max = state.max; + if (new_min != 0) new_min--; + if (new_max != -1) new_max--; + pushProgState(gData, new_min, new_max, null, + state.continuation_pc, + state.continuation_op); + if (new_min != 0) { + currentContinuation_op = REOP_MINIMALREPEAT; + currentContinuation_pc = pc; + int parenCount = getIndex(program, pc); + pc += INDEX_LEN; + int parenIndex = getIndex(program, pc); + pc += 2 * INDEX_LEN; + for (int k = 0; k < parenCount; k++) { + gData.set_parens(parenIndex + k, -1, 0); + } + op = program[pc++]; + } else { + currentContinuation_pc = state.continuation_pc; + currentContinuation_op = state.continuation_op; + pushBackTrackState(gData, REOP_MINIMALREPEAT, pc); + popProgState(gData); + pc += 2 * INDEX_LEN; + pc = pc + getOffset(program, pc); + op = program[pc++]; + } + continue; + } + } + + case REOP_END: + return true; + + default: + throw Kit.codeBug(); + + } + /* + * If the match failed and there's a backtrack option, take it. + * Otherwise this is a complete and utter failure. + */ + if (!result) { + REBackTrackData backTrackData = gData.backTrackStackTop; + if (backTrackData != null) { + gData.backTrackStackTop = backTrackData.previous; + + gData.lastParen = backTrackData.lastParen; + + // XXX: If backTrackData will no longer be used, then + // there is no need to clone backTrackData.parens + if (backTrackData.parens != null) { + gData.parens = backTrackData.parens.clone(); + } + + gData.cp = backTrackData.cp; + + gData.stateStackTop = backTrackData.stateStackTop; + + currentContinuation_op + = gData.stateStackTop.continuation_op; + currentContinuation_pc + = gData.stateStackTop.continuation_pc; + pc = backTrackData.continuation_pc; + op = backTrackData.continuation_op; + continue; + } + else + return false; + } + + op = program[pc++]; + } + + } + + private static boolean + matchRegExp(REGlobalData gData, RECompiled re, + char[] chars, int start, int end, boolean multiline) + { + if (re.parenCount != 0) { + gData.parens = new long[re.parenCount]; + } else { + gData.parens = null; + } + + gData.backTrackStackTop = null; + + gData.stateStackTop = null; + + gData.multiline = multiline; + gData.regexp = re; + gData.lastParen = 0; + + int anchorCh = gData.regexp.anchorCh; + // + // have to include the position beyond the last character + // in order to detect end-of-input/line condition + // + for (int i = start; i <= end; ++i) { + // + // If the first node is a literal match, step the index into + // the string until that match is made, or fail if it can't be + // found at all. + // + if (anchorCh >= 0) { + for (;;) { + if (i == end) { + return false; + } + char matchCh = chars[i]; + if (matchCh == anchorCh || + ((gData.regexp.flags & JSREG_FOLD) != 0 + && upcase(matchCh) == upcase((char)anchorCh))) + { + break; + } + ++i; + } + } + gData.cp = i; + for (int j = 0; j < re.parenCount; j++) { + gData.set_parens(j, -1, 0); + } + boolean result = executeREBytecode(gData, chars, end); + + gData.backTrackStackTop = null; + gData.stateStackTop = null; + if (result) { + gData.skipped = i - start; + return true; + } + } + return false; + } + + /* + * indexp is assumed to be an array of length 1 + */ + Object executeRegExp(Context cx, Scriptable scopeObj, RegExpImpl res, + String str, int indexp[], int matchType) + { + REGlobalData gData = new REGlobalData(); + + int start = indexp[0]; + char[] charArray = str.toCharArray(); + int end = charArray.length; + if (start > end) + start = end; + // + // Call the recursive matcher to do the real work. + // + boolean matches = matchRegExp(gData, re, charArray, start, end, + res.multiline); + if (!matches) { + if (matchType != PREFIX) return null; + return Undefined.instance; + } + int index = gData.cp; + int i = index; + indexp[0] = i; + int matchlen = i - (start + gData.skipped); + int ep = index; + index -= matchlen; + Object result; + Scriptable obj; + + if (matchType == TEST) { + /* + * Testing for a match and updating cx.regExpImpl: don't allocate + * an array object, do return true. + */ + result = Boolean.TRUE; + obj = null; + } + else { + /* + * The array returned on match has element 0 bound to the matched + * string, elements 1 through re.parenCount bound to the paren + * matches, an index property telling the length of the left context, + * and an input property referring to the input string. + */ + Scriptable scope = getTopLevelScope(scopeObj); + result = ScriptRuntime.newObject(cx, scope, "Array", null); + obj = (Scriptable) result; + + String matchstr = new String(charArray, index, matchlen); + obj.put(0, obj, matchstr); + } + + if (re.parenCount == 0) { + res.parens = null; + res.lastParen = SubString.emptySubString; + } else { + SubString parsub = null; + int num; + res.parens = new SubString[re.parenCount]; + for (num = 0; num < re.parenCount; num++) { + int cap_index = gData.parens_index(num); + String parstr; + if (cap_index != -1) { + int cap_length = gData.parens_length(num); + parsub = new SubString(charArray, cap_index, cap_length); + res.parens[num] = parsub; + if (matchType == TEST) continue; + parstr = parsub.toString(); + obj.put(num+1, obj, parstr); + } + else { + if (matchType != TEST) + obj.put(num+1, obj, Undefined.instance); + } + } + res.lastParen = parsub; + } + + if (! (matchType == TEST)) { + /* + * Define the index and input properties last for better for/in loop + * order (so they come after the elements). + */ + obj.put("index", obj, new Integer(start + gData.skipped)); + obj.put("input", obj, str); + } + + if (res.lastMatch == null) { + res.lastMatch = new SubString(); + res.leftContext = new SubString(); + res.rightContext = new SubString(); + } + res.lastMatch.charArray = charArray; + res.lastMatch.index = index; + res.lastMatch.length = matchlen; + + res.leftContext.charArray = charArray; + if (cx.getLanguageVersion() == Context.VERSION_1_2) { + /* + * JS1.2 emulated Perl4.0.1.8 (patch level 36) for global regexps used + * in scalar contexts, and unintentionally for the string.match "list" + * psuedo-context. On "hi there bye", the following would result: + * + * Language while(/ /g){print("$`");} s/ /$`/g + * perl4.036 "hi", "there" "hihitherehi therebye" + * perl5 "hi", "hi there" "hihitherehi therebye" + * js1.2 "hi", "there" "hihitheretherebye" + * + * Insofar as JS1.2 always defined $` as "left context from the last + * match" for global regexps, it was more consistent than perl4. + */ + res.leftContext.index = start; + res.leftContext.length = gData.skipped; + } else { + /* + * For JS1.3 and ECMAv2, emulate Perl5 exactly: + * + * js1.3 "hi", "hi there" "hihitherehi therebye" + */ + res.leftContext.index = 0; + res.leftContext.length = start + gData.skipped; + } + + res.rightContext.charArray = charArray; + res.rightContext.index = ep; + res.rightContext.length = end - ep; + + return result; + } + + int getFlags() + { + return re.flags; + } + + private static void reportWarning(Context cx, String messageId, String arg) + { + if (cx.hasFeature(Context.FEATURE_STRICT_MODE)) { + String msg = ScriptRuntime.getMessage1(messageId, arg); + Context.reportWarning(msg); + } + } + + private static void reportError(String messageId, String arg) + { + String msg = ScriptRuntime.getMessage1(messageId, arg); + throw ScriptRuntime.constructError("SyntaxError", msg); + } + +// #string_id_map# + + private static final int + Id_lastIndex = 1, + Id_source = 2, + Id_global = 3, + Id_ignoreCase = 4, + Id_multiline = 5, + + MAX_INSTANCE_ID = 5; + + protected int getMaxInstanceId() + { + return MAX_INSTANCE_ID; + } + + protected int findInstanceIdInfo(String s) + { + int id; +// #generated# Last update: 2007-05-09 08:16:24 EDT + L0: { id = 0; String X = null; int c; + int s_length = s.length(); + if (s_length==6) { + c=s.charAt(0); + if (c=='g') { X="global";id=Id_global; } + else if (c=='s') { X="source";id=Id_source; } + } + else if (s_length==9) { + c=s.charAt(0); + if (c=='l') { X="lastIndex";id=Id_lastIndex; } + else if (c=='m') { X="multiline";id=Id_multiline; } + } + else if (s_length==10) { X="ignoreCase";id=Id_ignoreCase; } + if (X!=null && X!=s && !X.equals(s)) id = 0; + break L0; + } +// #/generated# +// #/string_id_map# + + if (id == 0) return super.findInstanceIdInfo(s); + + int attr; + switch (id) { + case Id_lastIndex: + attr = PERMANENT | DONTENUM; + break; + case Id_source: + case Id_global: + case Id_ignoreCase: + case Id_multiline: + attr = PERMANENT | READONLY | DONTENUM; + break; + default: + throw new IllegalStateException(); + } + return instanceIdInfo(attr, id); + } + + protected String getInstanceIdName(int id) + { + switch (id) { + case Id_lastIndex: return "lastIndex"; + case Id_source: return "source"; + case Id_global: return "global"; + case Id_ignoreCase: return "ignoreCase"; + case Id_multiline: return "multiline"; + } + return super.getInstanceIdName(id); + } + + protected Object getInstanceIdValue(int id) + { + switch (id) { + case Id_lastIndex: + return ScriptRuntime.wrapNumber(lastIndex); + case Id_source: + return new String(re.source); + case Id_global: + return ScriptRuntime.wrapBoolean((re.flags & JSREG_GLOB) != 0); + case Id_ignoreCase: + return ScriptRuntime.wrapBoolean((re.flags & JSREG_FOLD) != 0); + case Id_multiline: + return ScriptRuntime.wrapBoolean((re.flags & JSREG_MULTILINE) != 0); + } + return super.getInstanceIdValue(id); + } + + protected void setInstanceIdValue(int id, Object value) + { + if (id == Id_lastIndex) { + lastIndex = ScriptRuntime.toNumber(value); + return; + } + super.setInstanceIdValue(id, value); + } + + protected void initPrototypeId(int id) + { + String s; + int arity; + switch (id) { + case Id_compile: arity=1; s="compile"; break; + case Id_toString: arity=0; s="toString"; break; + case Id_toSource: arity=0; s="toSource"; break; + case Id_exec: arity=1; s="exec"; break; + case Id_test: arity=1; s="test"; break; + case Id_prefix: arity=1; s="prefix"; break; + default: throw new IllegalArgumentException(String.valueOf(id)); + } + initPrototypeMethod(REGEXP_TAG, id, s, arity); + } + + public Object execIdCall(IdFunctionObject f, Context cx, Scriptable scope, + Scriptable thisObj, Object[] args) + { + if (!f.hasTag(REGEXP_TAG)) { + return super.execIdCall(f, cx, scope, thisObj, args); + } + int id = f.methodId(); + switch (id) { + case Id_compile: + return realThis(thisObj, f).compile(cx, scope, args); + + case Id_toString: + case Id_toSource: + return realThis(thisObj, f).toString(); + + case Id_exec: + return realThis(thisObj, f).execSub(cx, scope, args, MATCH); + + case Id_test: { + Object x = realThis(thisObj, f).execSub(cx, scope, args, TEST); + return Boolean.TRUE.equals(x) ? Boolean.TRUE : Boolean.FALSE; + } + + case Id_prefix: + return realThis(thisObj, f).execSub(cx, scope, args, PREFIX); + } + throw new IllegalArgumentException(String.valueOf(id)); + } + + private static NativeRegExp realThis(Scriptable thisObj, IdFunctionObject f) + { + if (!(thisObj instanceof NativeRegExp)) + throw incompatibleCallError(f); + return (NativeRegExp)thisObj; + } + +// #string_id_map# + protected int findPrototypeId(String s) + { + int id; +// #generated# Last update: 2007-05-09 08:16:24 EDT + L0: { id = 0; String X = null; int c; + L: switch (s.length()) { + case 4: c=s.charAt(0); + if (c=='e') { X="exec";id=Id_exec; } + else if (c=='t') { X="test";id=Id_test; } + break L; + case 6: X="prefix";id=Id_prefix; break L; + case 7: X="compile";id=Id_compile; break L; + case 8: c=s.charAt(3); + if (c=='o') { X="toSource";id=Id_toSource; } + else if (c=='t') { X="toString";id=Id_toString; } + break L; + } + if (X!=null && X!=s && !X.equals(s)) id = 0; + break L0; + } +// #/generated# + return id; + } + + private static final int + Id_compile = 1, + Id_toString = 2, + Id_toSource = 3, + Id_exec = 4, + Id_test = 5, + Id_prefix = 6, + + MAX_PROTOTYPE_ID = 6; + +// #/string_id_map# + + private RECompiled re; + double lastIndex; /* index after last match, for //g iterator */ + +} // class NativeRegExp + +class RECompiled implements Serializable +{ + static final long serialVersionUID = -6144956577595844213L; + + char []source; /* locked source string, sans // */ + int parenCount; /* number of parenthesized submatches */ + int flags; /* flags */ + byte[] program; /* regular expression bytecode */ + int classCount; /* count [...] bitmaps */ + RECharSet[] classList; /* list of [...] bitmaps */ + int anchorCh = -1; /* if >= 0, then re starts with this literal char */ +} + +class RENode { + + RENode(byte op) + { + this.op = op; + } + + byte op; /* r.e. op bytecode */ + RENode next; /* next in concatenation order */ + RENode kid; /* first operand */ + + RENode kid2; /* second operand */ + int num; /* could be a number */ + int parenIndex; /* or a parenthesis index */ + + /* or a range */ + int min; + int max; + int parenCount; + boolean greedy; + + /* or a character class */ + int startIndex; + int kidlen; /* length of string at kid, in chars */ + int bmsize; /* bitmap size, based on max char code */ + int index; /* index into class list */ + + /* or a literal sequence */ + char chr; /* of one character */ + int length; /* or many (via the index) */ + int flatIndex; /* which is -1 if not sourced */ + +} + +class CompilerState { + + CompilerState(Context cx, char[] source, int length, int flags) + { + this.cx = cx; + this.cpbegin = source; + this.cp = 0; + this.cpend = length; + this.flags = flags; + this.parenCount = 0; + this.classCount = 0; + this.progLength = 0; + } + + Context cx; + char cpbegin[]; + int cpend; + int cp; + int flags; + int parenCount; + int parenNesting; + int classCount; /* number of [] encountered */ + int progLength; /* estimated bytecode length */ + RENode result; +} + +class REProgState +{ + REProgState(REProgState previous, int min, int max, int index, + REBackTrackData backTrack, + int continuation_pc, int continuation_op) + { + this.previous = previous; + this.min = min; + this.max = max; + this.index = index; + this.continuation_op = continuation_op; + this.continuation_pc = continuation_pc; + this.backTrack = backTrack; + } + + REProgState previous; // previous state in stack + + int min; /* current quantifier min */ + int max; /* current quantifier max */ + int index; /* progress in text */ + int continuation_op; + int continuation_pc; + REBackTrackData backTrack; // used by ASSERT_ to recover state +} + +class REBackTrackData { + + REBackTrackData(REGlobalData gData, int op, int pc) + { + previous = gData.backTrackStackTop; + continuation_op = op; + continuation_pc = pc; + lastParen = gData.lastParen; + if (gData.parens != null) { + parens = gData.parens.clone(); + } + cp = gData.cp; + stateStackTop = gData.stateStackTop; + } + + REBackTrackData previous; + + int continuation_op; /* where to backtrack to */ + int continuation_pc; + int lastParen; + long[] parens; /* parenthesis captures */ + int cp; /* char buffer index */ + REProgState stateStackTop; /* state of op that backtracked */ +} + +class REGlobalData { + boolean multiline; + RECompiled regexp; /* the RE in execution */ + int lastParen; /* highest paren set so far */ + int skipped; /* chars skipped anchoring this r.e. */ + + int cp; /* char buffer index */ + long[] parens; /* parens captures */ + + REProgState stateStackTop; /* stack of state of current ancestors */ + + REBackTrackData backTrackStackTop; /* last matched-so-far position */ + + + /** + * Get start of parenthesis capture contents, -1 for empty. + */ + int parens_index(int i) + { + return (int)(parens[i]); + } + + /** + * Get length of parenthesis capture contents. + */ + int parens_length(int i) + { + return (int)(parens[i] >>> 32); + } + + void set_parens(int i, int index, int length) + { + parens[i] = (index & 0xffffffffL) | ((long)length << 32); + } + +} + +/* + * This struct holds a bitmap representation of a class from a regexp. + * There's a list of these referenced by the classList field in the NativeRegExp + * struct below. The initial state has startIndex set to the offset in the + * original regexp source of the beginning of the class contents. The first + * use of the class converts the source representation into a bitmap. + * + */ +final class RECharSet implements Serializable +{ + static final long serialVersionUID = 7931787979395898394L; + + RECharSet(int length, int startIndex, int strlength) + { + this.length = length; + this.startIndex = startIndex; + this.strlength = strlength; + } + + int length; + int startIndex; + int strlength; + + volatile transient boolean converted; + volatile transient boolean sense; + volatile transient byte[] bits; +} + + diff --git a/infrastructure/rhino1_7R1/src/org/mozilla/javascript/regexp/NativeRegExpCtor.java b/infrastructure/rhino1_7R1/src/org/mozilla/javascript/regexp/NativeRegExpCtor.java new file mode 100644 index 0000000..808d62d --- /dev/null +++ b/infrastructure/rhino1_7R1/src/org/mozilla/javascript/regexp/NativeRegExpCtor.java @@ -0,0 +1,289 @@ +/* -*- Mode: java; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 4 -*- + * + * ***** BEGIN LICENSE BLOCK ***** + * Version: MPL 1.1/GPL 2.0 + * + * The contents of this file are subject to the Mozilla Public License Version + * 1.1 (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * http://www.mozilla.org/MPL/ + * + * Software distributed under the License is distributed on an "AS IS" basis, + * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License + * for the specific language governing rights and limitations under the + * License. + * + * The Original Code is Rhino code, released + * May 6, 1998. + * + * The Initial Developer of the Original Code is + * Netscape Communications Corporation. + * Portions created by the Initial Developer are Copyright (C) 1997-1999 + * the Initial Developer. All Rights Reserved. + * + * Contributor(s): + * Norris Boyd + * Igor Bukanov + * Brendan Eich + * + * Alternatively, the contents of this file may be used under the terms of + * the GNU General Public License Version 2 or later (the "GPL"), in which + * case the provisions of the GPL are applicable instead of those above. If + * you wish to allow use of your version of this file only under the terms of + * the GPL and not to allow others to use your version of this file under the + * MPL, indicate your decision by deleting the provisions above and replacing + * them with the notice and other provisions required by the GPL. If you do + * not delete the provisions above, a recipient may use your version of this + * file under either the MPL or the GPL. + * + * ***** END LICENSE BLOCK ***** */ + +package org.mozilla.javascript.regexp; + +import org.mozilla.javascript.*; + +/** + * This class implements the RegExp constructor native object. + * + * Revision History: + * Implementation in C by Brendan Eich + * Initial port to Java by Norris Boyd from jsregexp.c version 1.36 + * Merged up to version 1.38, which included Unicode support. + * Merged bug fixes in version 1.39. + * Merged JSFUN13_BRANCH changes up to 1.32.2.11 + * + * @author Brendan Eich + * @author Norris Boyd + */ +class NativeRegExpCtor extends BaseFunction +{ + static final long serialVersionUID = -5733330028285400526L; + + NativeRegExpCtor() + { + } + + public String getFunctionName() + { + return "RegExp"; + } + + public Object call(Context cx, Scriptable scope, Scriptable thisObj, + Object[] args) + { + if (args.length > 0 && args[0] instanceof NativeRegExp && + (args.length == 1 || args[1] == Undefined.instance)) + { + return args[0]; + } + return construct(cx, scope, args); + } + + public Scriptable construct(Context cx, Scriptable scope, Object[] args) + { + NativeRegExp re = new NativeRegExp(); + re.compile(cx, scope, args); + ScriptRuntime.setObjectProtoAndParent(re, scope); + return re; + } + + private static RegExpImpl getImpl() + { + Context cx = Context.getCurrentContext(); + return (RegExpImpl) ScriptRuntime.getRegExpProxy(cx); + } + +// #string_id_map# + + private static final int + Id_multiline = 1, + Id_STAR = 2, // #string=$*# + + Id_input = 3, + Id_UNDERSCORE = 4, // #string=$_# + + Id_lastMatch = 5, + Id_AMPERSAND = 6, // #string=$&# + + Id_lastParen = 7, + Id_PLUS = 8, // #string=$+# + + Id_leftContext = 9, + Id_BACK_QUOTE = 10, // #string=$`# + + Id_rightContext = 11, + Id_QUOTE = 12, // #string=$'# + + DOLLAR_ID_BASE = 12; + + private static final int + Id_DOLLAR_1 = DOLLAR_ID_BASE + 1, // #string=$1# + Id_DOLLAR_2 = DOLLAR_ID_BASE + 2, // #string=$2# + Id_DOLLAR_3 = DOLLAR_ID_BASE + 3, // #string=$3# + Id_DOLLAR_4 = DOLLAR_ID_BASE + 4, // #string=$4# + Id_DOLLAR_5 = DOLLAR_ID_BASE + 5, // #string=$5# + Id_DOLLAR_6 = DOLLAR_ID_BASE + 6, // #string=$6# + Id_DOLLAR_7 = DOLLAR_ID_BASE + 7, // #string=$7# + Id_DOLLAR_8 = DOLLAR_ID_BASE + 8, // #string=$8# + Id_DOLLAR_9 = DOLLAR_ID_BASE + 9, // #string=$9# + + MAX_INSTANCE_ID = DOLLAR_ID_BASE + 9; + + protected int getMaxInstanceId() + { + return super.getMaxInstanceId() + MAX_INSTANCE_ID; + } + + protected int findInstanceIdInfo(String s) { + int id; +// #generated# Last update: 2001-05-24 16:09:31 GMT+02:00 + L0: { id = 0; String X = null; int c; + L: switch (s.length()) { + case 2: switch (s.charAt(1)) { + case '&': if (s.charAt(0)=='$') {id=Id_AMPERSAND; break L0;} break L; + case '\'': if (s.charAt(0)=='$') {id=Id_QUOTE; break L0;} break L; + case '*': if (s.charAt(0)=='$') {id=Id_STAR; break L0;} break L; + case '+': if (s.charAt(0)=='$') {id=Id_PLUS; break L0;} break L; + case '1': if (s.charAt(0)=='$') {id=Id_DOLLAR_1; break L0;} break L; + case '2': if (s.charAt(0)=='$') {id=Id_DOLLAR_2; break L0;} break L; + case '3': if (s.charAt(0)=='$') {id=Id_DOLLAR_3; break L0;} break L; + case '4': if (s.charAt(0)=='$') {id=Id_DOLLAR_4; break L0;} break L; + case '5': if (s.charAt(0)=='$') {id=Id_DOLLAR_5; break L0;} break L; + case '6': if (s.charAt(0)=='$') {id=Id_DOLLAR_6; break L0;} break L; + case '7': if (s.charAt(0)=='$') {id=Id_DOLLAR_7; break L0;} break L; + case '8': if (s.charAt(0)=='$') {id=Id_DOLLAR_8; break L0;} break L; + case '9': if (s.charAt(0)=='$') {id=Id_DOLLAR_9; break L0;} break L; + case '_': if (s.charAt(0)=='$') {id=Id_UNDERSCORE; break L0;} break L; + case '`': if (s.charAt(0)=='$') {id=Id_BACK_QUOTE; break L0;} break L; + } break L; + case 5: X="input";id=Id_input; break L; + case 9: c=s.charAt(4); + if (c=='M') { X="lastMatch";id=Id_lastMatch; } + else if (c=='P') { X="lastParen";id=Id_lastParen; } + else if (c=='i') { X="multiline";id=Id_multiline; } + break L; + case 11: X="leftContext";id=Id_leftContext; break L; + case 12: X="rightContext";id=Id_rightContext; break L; + } + if (X!=null && X!=s && !X.equals(s)) id = 0; + } +// #/generated# + + if (id == 0) return super.findInstanceIdInfo(s); + + int attr; + switch (id) { + case Id_multiline: + case Id_STAR: + case Id_input: + case Id_UNDERSCORE: + attr = PERMANENT; + break; + default: + attr = PERMANENT | READONLY; + break; + } + + return instanceIdInfo(attr, super.getMaxInstanceId() + id); + } + +// #/string_id_map# + + protected String getInstanceIdName(int id) + { + int shifted = id - super.getMaxInstanceId(); + if (1 <= shifted && shifted <= MAX_INSTANCE_ID) { + switch (shifted) { + case Id_multiline: return "multiline"; + case Id_STAR: return "$*"; + + case Id_input: return "input"; + case Id_UNDERSCORE: return "$_"; + + case Id_lastMatch: return "lastMatch"; + case Id_AMPERSAND: return "$&"; + + case Id_lastParen: return "lastParen"; + case Id_PLUS: return "$+"; + + case Id_leftContext: return "leftContext"; + case Id_BACK_QUOTE: return "$`"; + + case Id_rightContext: return "rightContext"; + case Id_QUOTE: return "$'"; + } + // Must be one of $1..$9, convert to 0..8 + int substring_number = shifted - DOLLAR_ID_BASE - 1; + char[] buf = { '$', (char)('1' + substring_number) }; + return new String(buf); + } + return super.getInstanceIdName(id); + } + + protected Object getInstanceIdValue(int id) + { + int shifted = id - super.getMaxInstanceId(); + if (1 <= shifted && shifted <= MAX_INSTANCE_ID) { + RegExpImpl impl = getImpl(); + Object stringResult; + switch (shifted) { + case Id_multiline: + case Id_STAR: + return ScriptRuntime.wrapBoolean(impl.multiline); + + case Id_input: + case Id_UNDERSCORE: + stringResult = impl.input; + break; + + case Id_lastMatch: + case Id_AMPERSAND: + stringResult = impl.lastMatch; + break; + + case Id_lastParen: + case Id_PLUS: + stringResult = impl.lastParen; + break; + + case Id_leftContext: + case Id_BACK_QUOTE: + stringResult = impl.leftContext; + break; + + case Id_rightContext: + case Id_QUOTE: + stringResult = impl.rightContext; + break; + + default: + { + // Must be one of $1..$9, convert to 0..8 + int substring_number = shifted - DOLLAR_ID_BASE - 1; + stringResult = impl.getParenSubString(substring_number); + break; + } + } + return (stringResult == null) ? "" : stringResult.toString(); + } + return super.getInstanceIdValue(id); + } + + protected void setInstanceIdValue(int id, Object value) + { + int shifted = id - super.getMaxInstanceId(); + switch (shifted) { + case Id_multiline: + case Id_STAR: + getImpl().multiline = ScriptRuntime.toBoolean(value); + return; + + case Id_input: + case Id_UNDERSCORE: + getImpl().input = ScriptRuntime.toString(value); + return; + } + super.setInstanceIdValue(id, value); + } + +} diff --git a/infrastructure/rhino1_7R1/src/org/mozilla/javascript/regexp/RegExpImpl.java b/infrastructure/rhino1_7R1/src/org/mozilla/javascript/regexp/RegExpImpl.java new file mode 100644 index 0000000..4b0a303 --- /dev/null +++ b/infrastructure/rhino1_7R1/src/org/mozilla/javascript/regexp/RegExpImpl.java @@ -0,0 +1,541 @@ +/* -*- Mode: java; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 4 -*- + * + * ***** BEGIN LICENSE BLOCK ***** + * Version: MPL 1.1/GPL 2.0 + * + * The contents of this file are subject to the Mozilla Public License Version + * 1.1 (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * http://www.mozilla.org/MPL/ + * + * Software distributed under the License is distributed on an "AS IS" basis, + * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License + * for the specific language governing rights and limitations under the + * License. + * + * The Original Code is Rhino code, released + * May 6, 1998. + * + * The Initial Developer of the Original Code is + * Netscape Communications Corporation. + * Portions created by the Initial Developer are Copyright (C) 1997-1999 + * the Initial Developer. All Rights Reserved. + * + * Contributor(s): + * + * Alternatively, the contents of this file may be used under the terms of + * the GNU General Public License Version 2 or later (the "GPL"), in which + * case the provisions of the GPL are applicable instead of those above. If + * you wish to allow use of your version of this file only under the terms of + * the GPL and not to allow others to use your version of this file under the + * MPL, indicate your decision by deleting the provisions above and replacing + * them with the notice and other provisions required by the GPL. If you do + * not delete the provisions above, a recipient may use your version of this + * file under either the MPL or the GPL. + * + * ***** END LICENSE BLOCK ***** */ + +package org.mozilla.javascript.regexp; + +import org.mozilla.javascript.*; + +/** + * + */ +public class RegExpImpl implements RegExpProxy { + + public boolean isRegExp(Scriptable obj) { + return obj instanceof NativeRegExp; + } + + public Object compileRegExp(Context cx, String source, String flags) + { + return NativeRegExp.compileRE(cx, source, flags, false); + } + + public Scriptable wrapRegExp(Context cx, Scriptable scope, + Object compiled) + { + return new NativeRegExp(scope, compiled); + } + + public Object action(Context cx, Scriptable scope, + Scriptable thisObj, Object[] args, + int actionType) + { + GlobData data = new GlobData(); + data.mode = actionType; + + switch (actionType) { + case RA_MATCH: + { + Object rval; + data.optarg = 1; + rval = matchOrReplace(cx, scope, thisObj, args, + this, data, false); + return data.arrayobj == null ? rval : data.arrayobj; + } + + case RA_SEARCH: + data.optarg = 1; + return matchOrReplace(cx, scope, thisObj, args, + this, data, false); + + case RA_REPLACE: + { + Object arg1 = args.length < 2 ? Undefined.instance : args[1]; + String repstr = null; + Function lambda = null; + if (arg1 instanceof Function) { + lambda = (Function) arg1; + } else { + repstr = ScriptRuntime.toString(arg1); + } + + data.optarg = 2; + data.lambda = lambda; + data.repstr = repstr; + data.dollar = repstr == null ? -1 : repstr.indexOf('$'); + data.charBuf = null; + data.leftIndex = 0; + Object val = matchOrReplace(cx, scope, thisObj, args, + this, data, true); + SubString rc = this.rightContext; + + if (data.charBuf == null) { + if (data.global || val == null + || !val.equals(Boolean.TRUE)) + { + /* Didn't match even once. */ + return data.str; + } + SubString lc = this.leftContext; + replace_glob(data, cx, scope, this, lc.index, lc.length); + } + data.charBuf.append(rc.charArray, rc.index, rc.length); + return data.charBuf.toString(); + } + + default: + throw Kit.codeBug(); + } + } + + /** + * Analog of C match_or_replace. + */ + private static Object matchOrReplace(Context cx, Scriptable scope, + Scriptable thisObj, Object[] args, + RegExpImpl reImpl, + GlobData data, boolean forceFlat) + { + NativeRegExp re; + + String str = ScriptRuntime.toString(thisObj); + data.str = str; + Scriptable topScope = ScriptableObject.getTopLevelScope(scope); + + if (args.length == 0) { + Object compiled = NativeRegExp.compileRE(cx, "", "", false); + re = new NativeRegExp(topScope, compiled); + } else if (args[0] instanceof NativeRegExp) { + re = (NativeRegExp) args[0]; + } else { + String src = ScriptRuntime.toString(args[0]); + String opt; + if (data.optarg < args.length) { + args[0] = src; + opt = ScriptRuntime.toString(args[data.optarg]); + } else { + opt = null; + } + Object compiled = NativeRegExp.compileRE(cx, src, opt, forceFlat); + re = new NativeRegExp(topScope, compiled); + } + data.regexp = re; + + data.global = (re.getFlags() & NativeRegExp.JSREG_GLOB) != 0; + int[] indexp = { 0 }; + Object result = null; + if (data.mode == RA_SEARCH) { + result = re.executeRegExp(cx, scope, reImpl, + str, indexp, NativeRegExp.TEST); + if (result != null && result.equals(Boolean.TRUE)) + result = new Integer(reImpl.leftContext.length); + else + result = new Integer(-1); + } else if (data.global) { + re.lastIndex = 0; + for (int count = 0; indexp[0] <= str.length(); count++) { + result = re.executeRegExp(cx, scope, reImpl, + str, indexp, NativeRegExp.TEST); + if (result == null || !result.equals(Boolean.TRUE)) + break; + if (data.mode == RA_MATCH) { + match_glob(data, cx, scope, count, reImpl); + } else { + if (data.mode != RA_REPLACE) Kit.codeBug(); + SubString lastMatch = reImpl.lastMatch; + int leftIndex = data.leftIndex; + int leftlen = lastMatch.index - leftIndex; + data.leftIndex = lastMatch.index + lastMatch.length; + replace_glob(data, cx, scope, reImpl, leftIndex, leftlen); + } + if (reImpl.lastMatch.length == 0) { + if (indexp[0] == str.length()) + break; + indexp[0]++; + } + } + } else { + result = re.executeRegExp(cx, scope, reImpl, str, indexp, + ((data.mode == RA_REPLACE) + ? NativeRegExp.TEST + : NativeRegExp.MATCH)); + } + + return result; + } + + + + public int find_split(Context cx, Scriptable scope, String target, + String separator, Scriptable reObj, + int[] ip, int[] matchlen, + boolean[] matched, String[][] parensp) + { + int i = ip[0]; + int length = target.length(); + int result; + + int version = cx.getLanguageVersion(); + NativeRegExp re = (NativeRegExp) reObj; + again: + while (true) { // imitating C label + /* JS1.2 deviated from Perl by never matching at end of string. */ + int ipsave = ip[0]; // reuse ip to save object creation + ip[0] = i; + Object ret = re.executeRegExp(cx, scope, this, target, ip, + NativeRegExp.TEST); + if (ret != Boolean.TRUE) { + // Mismatch: ensure our caller advances i past end of string. + ip[0] = ipsave; + matchlen[0] = 1; + matched[0] = false; + return length; + } + i = ip[0]; + ip[0] = ipsave; + matched[0] = true; + + SubString sep = this.lastMatch; + matchlen[0] = sep.length; + if (matchlen[0] == 0) { + /* + * Empty string match: never split on an empty + * match at the start of a find_split cycle. Same + * rule as for an empty global match in + * match_or_replace. + */ + if (i == ip[0]) { + /* + * "Bump-along" to avoid sticking at an empty + * match, but don't bump past end of string -- + * our caller must do that by adding + * sep->length to our return value. + */ + if (i == length) { + if (version == Context.VERSION_1_2) { + matchlen[0] = 1; + result = i; + } + else + result = -1; + break; + } + i++; + continue again; // imitating C goto + } + } + // PR_ASSERT((size_t)i >= sep->length); + result = i - matchlen[0]; + break; + } + int size = (parens == null) ? 0 : parens.length; + parensp[0] = new String[size]; + for (int num = 0; num < size; num++) { + SubString parsub = getParenSubString(num); + parensp[0][num] = parsub.toString(); + } + return result; + } + + /** + * Analog of REGEXP_PAREN_SUBSTRING in C jsregexp.h. + * Assumes zero-based; i.e., for $3, i==2 + */ + SubString getParenSubString(int i) + { + if (parens != null && i < parens.length) { + SubString parsub = parens[i]; + if (parsub != null) { + return parsub; + } + } + return SubString.emptySubString; + } + + /* + * Analog of match_glob() in jsstr.c + */ + private static void match_glob(GlobData mdata, Context cx, + Scriptable scope, int count, + RegExpImpl reImpl) + { + if (mdata.arrayobj == null) { + Scriptable s = ScriptableObject.getTopLevelScope(scope); + mdata.arrayobj = ScriptRuntime.newObject(cx, s, "Array", null); + } + SubString matchsub = reImpl.lastMatch; + String matchstr = matchsub.toString(); + mdata.arrayobj.put(count, mdata.arrayobj, matchstr); + } + + /* + * Analog of replace_glob() in jsstr.c + */ + private static void replace_glob(GlobData rdata, Context cx, + Scriptable scope, RegExpImpl reImpl, + int leftIndex, int leftlen) + { + int replen; + String lambdaStr; + if (rdata.lambda != null) { + // invoke lambda function with args lastMatch, $1, $2, ... $n, + // leftContext.length, whole string. + SubString[] parens = reImpl.parens; + int parenCount = (parens == null) ? 0 : parens.length; + Object[] args = new Object[parenCount + 3]; + args[0] = reImpl.lastMatch.toString(); + for (int i=0; i < parenCount; i++) { + SubString sub = parens[i]; + if (sub != null) { + args[i+1] = sub.toString(); + } else { + args[i+1] = Undefined.instance; + } + } + args[parenCount+1] = new Integer(reImpl.leftContext.length); + args[parenCount+2] = rdata.str; + // This is a hack to prevent expose of reImpl data to + // JS function which can run new regexps modifing + // regexp that are used later by the engine. + // TODO: redesign is necessary + if (reImpl != ScriptRuntime.getRegExpProxy(cx)) Kit.codeBug(); + RegExpImpl re2 = new RegExpImpl(); + re2.multiline = reImpl.multiline; + re2.input = reImpl.input; + ScriptRuntime.setRegExpProxy(cx, re2); + try { + Scriptable parent = ScriptableObject.getTopLevelScope(scope); + Object result = rdata.lambda.call(cx, parent, parent, args); + lambdaStr = ScriptRuntime.toString(result); + } finally { + ScriptRuntime.setRegExpProxy(cx, reImpl); + } + replen = lambdaStr.length(); + } else { + lambdaStr = null; + replen = rdata.repstr.length(); + if (rdata.dollar >= 0) { + int[] skip = new int[1]; + int dp = rdata.dollar; + do { + SubString sub = interpretDollar(cx, reImpl, rdata.repstr, + dp, skip); + if (sub != null) { + replen += sub.length - skip[0]; + dp += skip[0]; + } else { + ++dp; + } + dp = rdata.repstr.indexOf('$', dp); + } while (dp >= 0); + } + } + + int growth = leftlen + replen + reImpl.rightContext.length; + StringBuffer charBuf = rdata.charBuf; + if (charBuf == null) { + charBuf = new StringBuffer(growth); + rdata.charBuf = charBuf; + } else { + charBuf.ensureCapacity(rdata.charBuf.length() + growth); + } + + charBuf.append(reImpl.leftContext.charArray, leftIndex, leftlen); + if (rdata.lambda != null) { + charBuf.append(lambdaStr); + } else { + do_replace(rdata, cx, reImpl); + } + } + + private static SubString interpretDollar(Context cx, RegExpImpl res, + String da, int dp, int[] skip) + { + char dc; + int num, tmp; + + if (da.charAt(dp) != '$') Kit.codeBug(); + + /* Allow a real backslash (literal "\\") to escape "$1" etc. */ + int version = cx.getLanguageVersion(); + if (version != Context.VERSION_DEFAULT + && version <= Context.VERSION_1_4) + { + if (dp > 0 && da.charAt(dp - 1) == '\\') + return null; + } + int daL = da.length(); + if (dp + 1 >= daL) + return null; + /* Interpret all Perl match-induced dollar variables. */ + dc = da.charAt(dp + 1); + if (NativeRegExp.isDigit(dc)) { + int cp; + if (version != Context.VERSION_DEFAULT + && version <= Context.VERSION_1_4) + { + if (dc == '0') + return null; + /* Check for overflow to avoid gobbling arbitrary decimal digits. */ + num = 0; + cp = dp; + while (++cp < daL && NativeRegExp.isDigit(dc = da.charAt(cp))) + { + tmp = 10 * num + (dc - '0'); + if (tmp < num) + break; + num = tmp; + } + } + else { /* ECMA 3, 1-9 or 01-99 */ + int parenCount = (res.parens == null) ? 0 : res.parens.length; + num = dc - '0'; + if (num > parenCount) + return null; + cp = dp + 2; + if ((dp + 2) < daL) { + dc = da.charAt(dp + 2); + if (NativeRegExp.isDigit(dc)) { + tmp = 10 * num + (dc - '0'); + if (tmp <= parenCount) { + cp++; + num = tmp; + } + } + } + if (num == 0) return null; /* $0 or $00 is not valid */ + } + /* Adjust num from 1 $n-origin to 0 array-index-origin. */ + num--; + skip[0] = cp - dp; + return res.getParenSubString(num); + } + + skip[0] = 2; + switch (dc) { + case '$': + return new SubString("$"); + case '&': + return res.lastMatch; + case '+': + return res.lastParen; + case '`': + if (version == Context.VERSION_1_2) { + /* + * JS1.2 imitated the Perl4 bug where left context at each step + * in an iterative use of a global regexp started from last match, + * not from the start of the target string. But Perl4 does start + * $` at the beginning of the target string when it is used in a + * substitution, so we emulate that special case here. + */ + res.leftContext.index = 0; + res.leftContext.length = res.lastMatch.index; + } + return res.leftContext; + case '\'': + return res.rightContext; + } + return null; + } + + /** + * Analog of do_replace in jsstr.c + */ + private static void do_replace(GlobData rdata, Context cx, + RegExpImpl regExpImpl) + { + StringBuffer charBuf = rdata.charBuf; + int cp = 0; + String da = rdata.repstr; + int dp = rdata.dollar; + if (dp != -1) { + int[] skip = new int[1]; + do { + int len = dp - cp; + charBuf.append(da.substring(cp, dp)); + cp = dp; + SubString sub = interpretDollar(cx, regExpImpl, da, + dp, skip); + if (sub != null) { + len = sub.length; + if (len > 0) { + charBuf.append(sub.charArray, sub.index, len); + } + cp += skip[0]; + dp += skip[0]; + } else { + ++dp; + } + dp = da.indexOf('$', dp); + } while (dp >= 0); + } + int daL = da.length(); + if (daL > cp) { + charBuf.append(da.substring(cp, daL)); + } + } + + String input; /* input string to match (perl $_, GC root) */ + boolean multiline; /* whether input contains newlines (perl $*) */ + SubString[] parens; /* Vector of SubString; last set of parens + matched (perl $1, $2) */ + SubString lastMatch; /* last string matched (perl $&) */ + SubString lastParen; /* last paren matched (perl $+) */ + SubString leftContext; /* input to left of last match (perl $`) */ + SubString rightContext; /* input to right of last match (perl $') */ +} + + +final class GlobData +{ + int mode; /* input: return index, match object, or void */ + int optarg; /* input: index of optional flags argument */ + boolean global; /* output: whether regexp was global */ + String str; /* output: 'this' parameter object as string */ + NativeRegExp regexp;/* output: regexp parameter object private data */ + + // match-specific data + + Scriptable arrayobj; + + // replace-specific data + + Function lambda; /* replacement function object or null */ + String repstr; /* replacement string */ + int dollar = -1; /* -1 or index of first $ in repstr */ + StringBuffer charBuf; /* result characters, null initially */ + int leftIndex; /* leftContext index, always 0 for JS1.2 */ +} diff --git a/infrastructure/rhino1_7R1/src/org/mozilla/javascript/regexp/SubString.java b/infrastructure/rhino1_7R1/src/org/mozilla/javascript/regexp/SubString.java new file mode 100644 index 0000000..00905ca --- /dev/null +++ b/infrastructure/rhino1_7R1/src/org/mozilla/javascript/regexp/SubString.java @@ -0,0 +1,75 @@ +/* -*- Mode: java; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 4 -*- + * + * ***** BEGIN LICENSE BLOCK ***** + * Version: MPL 1.1/GPL 2.0 + * + * The contents of this file are subject to the Mozilla Public License Version + * 1.1 (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * http://www.mozilla.org/MPL/ + * + * Software distributed under the License is distributed on an "AS IS" basis, + * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License + * for the specific language governing rights and limitations under the + * License. + * + * The Original Code is Rhino code, released + * May 6, 1998. + * + * The Initial Developer of the Original Code is + * Netscape Communications Corporation. + * Portions created by the Initial Developer are Copyright (C) 1997-1999 + * the Initial Developer. All Rights Reserved. + * + * Contributor(s): + * + * Alternatively, the contents of this file may be used under the terms of + * the GNU General Public License Version 2 or later (the "GPL"), in which + * case the provisions of the GPL are applicable instead of those above. If + * you wish to allow use of your version of this file only under the terms of + * the GPL and not to allow others to use your version of this file under the + * MPL, indicate your decision by deleting the provisions above and replacing + * them with the notice and other provisions required by the GPL. If you do + * not delete the provisions above, a recipient may use your version of this + * file under either the MPL or the GPL. + * + * ***** END LICENSE BLOCK ***** */ + +package org.mozilla.javascript.regexp; + +class SubString { + + public SubString() + { + } + + public SubString(String str) + { + index = 0; + charArray = str.toCharArray(); + length = str.length(); + } + + public SubString(char[] source, int start, int len) + { + // there must be a better way of doing this?? + index = 0; + length = len; + charArray = new char[len]; + for (int j = 0; j < len; j++) + charArray[j] = source[start + j]; + } + + public String toString() { + return charArray == null + ? "" + : new String(charArray, index, length); + } + + static final SubString emptySubString = new SubString(); + + char[] charArray; + int index; + int length; +} + |