aboutsummaryrefslogtreecommitdiffstats
path: root/infrastructure/rhino1_7R1/src/org/mozilla/javascript/regexp
diff options
context:
space:
mode:
authorElliot Kroo <kroo@appjet.com>2010-03-11 15:21:30 -0800
committerElliot Kroo <kroo@appjet.com>2010-03-11 15:21:30 -0800
commit98e2821b38a775737e42a2479a6bc65107210859 (patch)
tree55939a8ba1dce4f4e48ebb13b658061d62bf1b9a /infrastructure/rhino1_7R1/src/org/mozilla/javascript/regexp
parentc1894c8e0a52f4e3d2f89fa92f0066bbf0fcf1b1 (diff)
downloadetherpad-98e2821b38a775737e42a2479a6bc65107210859.tar.gz
etherpad-98e2821b38a775737e42a2479a6bc65107210859.tar.xz
etherpad-98e2821b38a775737e42a2479a6bc65107210859.zip
reorganizing the first level of folders (trunk/branch folders are not the git way :)
Diffstat (limited to 'infrastructure/rhino1_7R1/src/org/mozilla/javascript/regexp')
-rw-r--r--infrastructure/rhino1_7R1/src/org/mozilla/javascript/regexp/NativeRegExp.java2782
-rw-r--r--infrastructure/rhino1_7R1/src/org/mozilla/javascript/regexp/NativeRegExpCtor.java289
-rw-r--r--infrastructure/rhino1_7R1/src/org/mozilla/javascript/regexp/RegExpImpl.java541
-rw-r--r--infrastructure/rhino1_7R1/src/org/mozilla/javascript/regexp/SubString.java75
4 files changed, 3687 insertions, 0 deletions
diff --git a/infrastructure/rhino1_7R1/src/org/mozilla/javascript/regexp/NativeRegExp.java b/infrastructure/rhino1_7R1/src/org/mozilla/javascript/regexp/NativeRegExp.java
new file mode 100644
index 0000000..a893841
--- /dev/null
+++ b/infrastructure/rhino1_7R1/src/org/mozilla/javascript/regexp/NativeRegExp.java
@@ -0,0 +1,2782 @@
+/* -*- Mode: java; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 4 -*-
+ *
+ * ***** BEGIN LICENSE BLOCK *****
+ * Version: MPL 1.1/GPL 2.0
+ *
+ * The contents of this file are subject to the Mozilla Public License Version
+ * 1.1 (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ * http://www.mozilla.org/MPL/
+ *
+ * Software distributed under the License is distributed on an "AS IS" basis,
+ * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
+ * for the specific language governing rights and limitations under the
+ * License.
+ *
+ * The Original Code is Rhino code, released
+ * May 6, 1998.
+ *
+ * The Initial Developer of the Original Code is
+ * Netscape Communications Corporation.
+ * Portions created by the Initial Developer are Copyright (C) 1997-1999
+ * the Initial Developer. All Rights Reserved.
+ *
+ * Contributor(s):
+ * Norris Boyd
+ * Igor Bukanov
+ * Brendan Eich
+ * Matthias Radestock
+ *
+ * Alternatively, the contents of this file may be used under the terms of
+ * the GNU General Public License Version 2 or later (the "GPL"), in which
+ * case the provisions of the GPL are applicable instead of those above. If
+ * you wish to allow use of your version of this file only under the terms of
+ * the GPL and not to allow others to use your version of this file under the
+ * MPL, indicate your decision by deleting the provisions above and replacing
+ * them with the notice and other provisions required by the GPL. If you do
+ * not delete the provisions above, a recipient may use your version of this
+ * file under either the MPL or the GPL.
+ *
+ * ***** END LICENSE BLOCK ***** */
+
+package org.mozilla.javascript.regexp;
+
+import java.io.Serializable;
+
+import org.mozilla.javascript.Context;
+import org.mozilla.javascript.Function;
+import org.mozilla.javascript.IdFunctionObject;
+import org.mozilla.javascript.IdScriptableObject;
+import org.mozilla.javascript.Kit;
+import org.mozilla.javascript.ScriptRuntime;
+import org.mozilla.javascript.Scriptable;
+import org.mozilla.javascript.ScriptableObject;
+import org.mozilla.javascript.Undefined;
+
+/**
+ * This class implements the RegExp native object.
+ *
+ * Revision History:
+ * Implementation in C by Brendan Eich
+ * Initial port to Java by Norris Boyd from jsregexp.c version 1.36
+ * Merged up to version 1.38, which included Unicode support.
+ * Merged bug fixes in version 1.39.
+ * Merged JSFUN13_BRANCH changes up to 1.32.2.13
+ *
+ * @author Brendan Eich
+ * @author Norris Boyd
+ */
+
+
+
+public class NativeRegExp extends IdScriptableObject implements Function
+{
+ static final long serialVersionUID = 4965263491464903264L;
+
+ private static final Object REGEXP_TAG = new Object();
+
+ public static final int JSREG_GLOB = 0x1; // 'g' flag: global
+ public static final int JSREG_FOLD = 0x2; // 'i' flag: fold
+ public static final int JSREG_MULTILINE = 0x4; // 'm' flag: multiline
+
+ //type of match to perform
+ public static final int TEST = 0;
+ public static final int MATCH = 1;
+ public static final int PREFIX = 2;
+
+ private static final boolean debug = false;
+
+ private static final byte REOP_EMPTY = 0; /* match rest of input against rest of r.e. */
+ private static final byte REOP_ALT = 1; /* alternative subexpressions in kid and next */
+ private static final byte REOP_BOL = 2; /* beginning of input (or line if multiline) */
+ private static final byte REOP_EOL = 3; /* end of input (or line if multiline) */
+ private static final byte REOP_WBDRY = 4; /* match "" at word boundary */
+ private static final byte REOP_WNONBDRY = 5; /* match "" at word non-boundary */
+ private static final byte REOP_QUANT = 6; /* quantified atom: atom{1,2} */
+ private static final byte REOP_STAR = 7; /* zero or more occurrences of kid */
+ private static final byte REOP_PLUS = 8; /* one or more occurrences of kid */
+ private static final byte REOP_OPT = 9; /* optional subexpression in kid */
+ private static final byte REOP_LPAREN = 10; /* left paren bytecode: kid is u.num'th sub-regexp */
+ private static final byte REOP_RPAREN = 11; /* right paren bytecode */
+ private static final byte REOP_DOT = 12; /* stands for any character */
+// private static final byte REOP_CCLASS = 13; /* character class: [a-f] */
+ private static final byte REOP_DIGIT = 14; /* match a digit char: [0-9] */
+ private static final byte REOP_NONDIGIT = 15; /* match a non-digit char: [^0-9] */
+ private static final byte REOP_ALNUM = 16; /* match an alphanumeric char: [0-9a-z_A-Z] */
+ private static final byte REOP_NONALNUM = 17; /* match a non-alphanumeric char: [^0-9a-z_A-Z] */
+ private static final byte REOP_SPACE = 18; /* match a whitespace char */
+ private static final byte REOP_NONSPACE = 19; /* match a non-whitespace char */
+ private static final byte REOP_BACKREF = 20; /* back-reference (e.g., \1) to a parenthetical */
+ private static final byte REOP_FLAT = 21; /* match a flat string */
+ private static final byte REOP_FLAT1 = 22; /* match a single char */
+ private static final byte REOP_JUMP = 23; /* for deoptimized closure loops */
+// private static final byte REOP_DOTSTAR = 24; /* optimize .* to use a single opcode */
+// private static final byte REOP_ANCHOR = 25; /* like .* but skips left context to unanchored r.e. */
+// private static final byte REOP_EOLONLY = 26; /* $ not preceded by any pattern */
+// private static final byte REOP_UCFLAT = 27; /* flat Unicode string; len immediate counts chars */
+ private static final byte REOP_UCFLAT1 = 28; /* single Unicode char */
+// private static final byte REOP_UCCLASS = 29; /* Unicode character class, vector of chars to match */
+// private static final byte REOP_NUCCLASS = 30; /* negated Unicode character class */
+// private static final byte REOP_BACKREFi = 31; /* case-independent REOP_BACKREF */
+ private static final byte REOP_FLATi = 32; /* case-independent REOP_FLAT */
+ private static final byte REOP_FLAT1i = 33; /* case-independent REOP_FLAT1 */
+// private static final byte REOP_UCFLATi = 34; /* case-independent REOP_UCFLAT */
+ private static final byte REOP_UCFLAT1i = 35; /* case-independent REOP_UCFLAT1 */
+// private static final byte REOP_ANCHOR1 = 36; /* first-char discriminating REOP_ANCHOR */
+// private static final byte REOP_NCCLASS = 37; /* negated 8-bit character class */
+// private static final byte REOP_DOTSTARMIN = 38; /* ungreedy version of REOP_DOTSTAR */
+// private static final byte REOP_LPARENNON = 39; /* non-capturing version of REOP_LPAREN */
+// private static final byte REOP_RPARENNON = 40; /* non-capturing version of REOP_RPAREN */
+ private static final byte REOP_ASSERT = 41; /* zero width positive lookahead assertion */
+ private static final byte REOP_ASSERT_NOT = 42; /* zero width negative lookahead assertion */
+ private static final byte REOP_ASSERTTEST = 43; /* sentinel at end of assertion child */
+ private static final byte REOP_ASSERTNOTTEST = 44; /* sentinel at end of !assertion child */
+ private static final byte REOP_MINIMALSTAR = 45; /* non-greedy version of * */
+ private static final byte REOP_MINIMALPLUS = 46; /* non-greedy version of + */
+ private static final byte REOP_MINIMALOPT = 47; /* non-greedy version of ? */
+ private static final byte REOP_MINIMALQUANT = 48; /* non-greedy version of {} */
+ private static final byte REOP_ENDCHILD = 49; /* sentinel at end of quantifier child */
+ private static final byte REOP_CLASS = 50; /* character class with index */
+ private static final byte REOP_REPEAT = 51; /* directs execution of greedy quantifier */
+ private static final byte REOP_MINIMALREPEAT = 52; /* directs execution of non-greedy quantifier */
+ private static final byte REOP_END = 53;
+
+
+
+ public static void init(Context cx, Scriptable scope, boolean sealed)
+ {
+
+ NativeRegExp proto = new NativeRegExp();
+ proto.re = (RECompiled)compileRE(cx, "", null, false);
+ proto.activatePrototypeMap(MAX_PROTOTYPE_ID);
+ proto.setParentScope(scope);
+ proto.setPrototype(getObjectPrototype(scope));
+
+ NativeRegExpCtor ctor = new NativeRegExpCtor();
+ // Bug #324006: ECMA-262 15.10.6.1 says "The initial value of
+ // RegExp.prototype.constructor is the builtin RegExp constructor."
+ proto.put("constructor", proto, ctor);
+
+ ScriptRuntime.setFunctionProtoAndParent(ctor, scope);
+
+ ctor.setImmunePrototypeProperty(proto);
+
+ if (sealed) {
+ proto.sealObject();
+ ctor.sealObject();
+ }
+
+ defineProperty(scope, "RegExp", ctor, ScriptableObject.DONTENUM);
+ }
+
+ NativeRegExp(Scriptable scope, Object regexpCompiled)
+ {
+ this.re = (RECompiled)regexpCompiled;
+ this.lastIndex = 0;
+ ScriptRuntime.setObjectProtoAndParent(this, scope);
+ }
+
+ public String getClassName()
+ {
+ return "RegExp";
+ }
+
+ public Object call(Context cx, Scriptable scope, Scriptable thisObj,
+ Object[] args)
+ {
+ return execSub(cx, scope, args, MATCH);
+ }
+
+ public Scriptable construct(Context cx, Scriptable scope, Object[] args)
+ {
+ return (Scriptable)execSub(cx, scope, args, MATCH);
+ }
+
+ Scriptable compile(Context cx, Scriptable scope, Object[] args)
+ {
+ if (args.length > 0 && args[0] instanceof NativeRegExp) {
+ if (args.length > 1 && args[1] != Undefined.instance) {
+ // report error
+ throw ScriptRuntime.typeError0("msg.bad.regexp.compile");
+ }
+ NativeRegExp thatObj = (NativeRegExp) args[0];
+ this.re = thatObj.re;
+ this.lastIndex = thatObj.lastIndex;
+ return this;
+ }
+ String s = args.length == 0 ? "" : ScriptRuntime.toString(args[0]);
+ String global = args.length > 1 && args[1] != Undefined.instance
+ ? ScriptRuntime.toString(args[1])
+ : null;
+ this.re = (RECompiled)compileRE(cx, s, global, false);
+ this.lastIndex = 0;
+ return this;
+ }
+
+ public String toString()
+ {
+ StringBuffer buf = new StringBuffer();
+ buf.append('/');
+ if (re.source.length != 0) {
+ buf.append(re.source);
+ } else {
+ // See bugzilla 226045
+ buf.append("(?:)");
+ }
+ buf.append('/');
+ if ((re.flags & JSREG_GLOB) != 0)
+ buf.append('g');
+ if ((re.flags & JSREG_FOLD) != 0)
+ buf.append('i');
+ if ((re.flags & JSREG_MULTILINE) != 0)
+ buf.append('m');
+ return buf.toString();
+ }
+
+ NativeRegExp() { }
+
+ private static RegExpImpl getImpl(Context cx)
+ {
+ return (RegExpImpl) ScriptRuntime.getRegExpProxy(cx);
+ }
+
+ private Object execSub(Context cx, Scriptable scopeObj,
+ Object[] args, int matchType)
+ {
+ RegExpImpl reImpl = getImpl(cx);
+ String str;
+ if (args.length == 0) {
+ str = reImpl.input;
+ if (str == null) {
+ reportError("msg.no.re.input.for", toString());
+ }
+ } else {
+ str = ScriptRuntime.toString(args[0]);
+ }
+ double d = ((re.flags & JSREG_GLOB) != 0) ? lastIndex : 0;
+
+ Object rval;
+ if (d < 0 || str.length() < d) {
+ lastIndex = 0;
+ rval = null;
+ }
+ else {
+ int indexp[] = { (int)d };
+ rval = executeRegExp(cx, scopeObj, reImpl, str, indexp, matchType);
+ if ((re.flags & JSREG_GLOB) != 0) {
+ lastIndex = (rval == null || rval == Undefined.instance)
+ ? 0 : indexp[0];
+ }
+ }
+ return rval;
+ }
+
+ static Object compileRE(Context cx, String str, String global, boolean flat)
+ {
+ RECompiled regexp = new RECompiled();
+ regexp.source = str.toCharArray();
+ int length = str.length();
+
+ int flags = 0;
+ if (global != null) {
+ for (int i = 0; i < global.length(); i++) {
+ char c = global.charAt(i);
+ if (c == 'g') {
+ flags |= JSREG_GLOB;
+ } else if (c == 'i') {
+ flags |= JSREG_FOLD;
+ } else if (c == 'm') {
+ flags |= JSREG_MULTILINE;
+ } else {
+ reportError("msg.invalid.re.flag", String.valueOf(c));
+ }
+ }
+ }
+ regexp.flags = flags;
+
+ CompilerState state = new CompilerState(cx, regexp.source, length, flags);
+ if (flat && length > 0) {
+if (debug) {
+System.out.println("flat = \"" + str + "\"");
+}
+ state.result = new RENode(REOP_FLAT);
+ state.result.chr = state.cpbegin[0];
+ state.result.length = length;
+ state.result.flatIndex = 0;
+ state.progLength += 5;
+ }
+ else
+ if (!parseDisjunction(state))
+ return null;
+
+ regexp.program = new byte[state.progLength + 1];
+ if (state.classCount != 0) {
+ regexp.classList = new RECharSet[state.classCount];
+ regexp.classCount = state.classCount;
+ }
+ int endPC = emitREBytecode(state, regexp, 0, state.result);
+ regexp.program[endPC++] = REOP_END;
+
+if (debug) {
+System.out.println("Prog. length = " + endPC);
+for (int i = 0; i < endPC; i++) {
+ System.out.print(regexp.program[i]);
+ if (i < (endPC - 1)) System.out.print(", ");
+}
+System.out.println();
+}
+ regexp.parenCount = state.parenCount;
+
+ // If re starts with literal, init anchorCh accordingly
+ switch (regexp.program[0]) {
+ case REOP_UCFLAT1:
+ case REOP_UCFLAT1i:
+ regexp.anchorCh = (char)getIndex(regexp.program, 1);
+ break;
+ case REOP_FLAT1:
+ case REOP_FLAT1i:
+ regexp.anchorCh = (char)(regexp.program[1] & 0xFF);
+ break;
+ case REOP_FLAT:
+ case REOP_FLATi:
+ int k = getIndex(regexp.program, 1);
+ regexp.anchorCh = regexp.source[k];
+ break;
+ }
+
+if (debug) {
+if (regexp.anchorCh >= 0) {
+ System.out.println("Anchor ch = '" + (char)regexp.anchorCh + "'");
+}
+}
+ return regexp;
+ }
+
+ static boolean isDigit(char c)
+ {
+ return '0' <= c && c <= '9';
+ }
+
+ private static boolean isWord(char c)
+ {
+ return Character.isLetter(c) || isDigit(c) || c == '_';
+ }
+
+ private static boolean isLineTerm(char c)
+ {
+ return ScriptRuntime.isJSLineTerminator(c);
+ }
+
+ private static boolean isREWhiteSpace(int c)
+ {
+ return (c == '\u0020' || c == '\u0009'
+ || c == '\n' || c == '\r'
+ || c == 0x2028 || c == 0x2029
+ || c == '\u000C' || c == '\u000B'
+ || c == '\u00A0'
+ || Character.getType((char)c) == Character.SPACE_SEPARATOR);
+ }
+
+ /*
+ *
+ * 1. If IgnoreCase is false, return ch.
+ * 2. Let u be ch converted to upper case as if by calling
+ * String.prototype.toUpperCase on the one-character string ch.
+ * 3. If u does not consist of a single character, return ch.
+ * 4. Let cu be u's character.
+ * 5. If ch's code point value is greater than or equal to decimal 128 and cu's
+ * code point value is less than decimal 128, then return ch.
+ * 6. Return cu.
+ */
+ private static char upcase(char ch)
+ {
+ if (ch < 128) {
+ if ('a' <= ch && ch <= 'z') {
+ return (char)(ch + ('A' - 'a'));
+ }
+ return ch;
+ }
+ char cu = Character.toUpperCase(ch);
+ if ((ch >= 128) && (cu < 128)) return ch;
+ return cu;
+ }
+
+ private static char downcase(char ch)
+ {
+ if (ch < 128) {
+ if ('A' <= ch && ch <= 'Z') {
+ return (char)(ch + ('a' - 'A'));
+ }
+ return ch;
+ }
+ char cl = Character.toLowerCase(ch);
+ if ((ch >= 128) && (cl < 128)) return ch;
+ return cl;
+ }
+
+/*
+ * Validates and converts hex ascii value.
+ */
+ private static int toASCIIHexDigit(int c)
+ {
+ if (c < '0')
+ return -1;
+ if (c <= '9') {
+ return c - '0';
+ }
+ c |= 0x20;
+ if ('a' <= c && c <= 'f') {
+ return c - 'a' + 10;
+ }
+ return -1;
+ }
+
+/*
+ * Top-down regular expression grammar, based closely on Perl4.
+ *
+ * regexp: altern A regular expression is one or more
+ * altern '|' regexp alternatives separated by vertical bar.
+ */
+ private static boolean parseDisjunction(CompilerState state)
+ {
+ if (!parseAlternative(state))
+ return false;
+ char[] source = state.cpbegin;
+ int index = state.cp;
+ if (index != source.length && source[index] == '|') {
+ RENode altResult;
+ ++state.cp;
+ altResult = new RENode(REOP_ALT);
+ altResult.kid = state.result;
+ if (!parseDisjunction(state))
+ return false;
+ altResult.kid2 = state.result;
+ state.result = altResult;
+ /* ALT, <next>, ..., JUMP, <end> ... JUMP <end> */
+ state.progLength += 9;
+ }
+ return true;
+ }
+
+/*
+ * altern: item An alternative is one or more items,
+ * item altern concatenated together.
+ */
+ private static boolean parseAlternative(CompilerState state)
+ {
+ RENode headTerm = null;
+ RENode tailTerm = null;
+ char[] source = state.cpbegin;
+ while (true) {
+ if (state.cp == state.cpend || source[state.cp] == '|'
+ || (state.parenNesting != 0 && source[state.cp] == ')'))
+ {
+ if (headTerm == null) {
+ state.result = new RENode(REOP_EMPTY);
+ }
+ else
+ state.result = headTerm;
+ return true;
+ }
+ if (!parseTerm(state))
+ return false;
+ if (headTerm == null)
+ headTerm = state.result;
+ else {
+ if (tailTerm == null) {
+ headTerm.next = state.result;
+ tailTerm = state.result;
+ while (tailTerm.next != null) tailTerm = tailTerm.next;
+ }
+ else {
+ tailTerm.next = state.result;
+ tailTerm = tailTerm.next;
+ while (tailTerm.next != null) tailTerm = tailTerm.next;
+ }
+ }
+ }
+ }
+
+ /* calculate the total size of the bitmap required for a class expression */
+ private static boolean
+ calculateBitmapSize(CompilerState state, RENode target, char[] src,
+ int index, int end)
+ {
+ char rangeStart = 0;
+ char c;
+ int n;
+ int nDigits;
+ int i;
+ int max = 0;
+ boolean inRange = false;
+
+ target.bmsize = 0;
+
+ if (index == end)
+ return true;
+
+ if (src[index] == '^')
+ ++index;
+
+ while (index != end) {
+ int localMax = 0;
+ nDigits = 2;
+ switch (src[index]) {
+ case '\\':
+ ++index;
+ c = src[index++];
+ switch (c) {
+ case 'b':
+ localMax = 0x8;
+ break;
+ case 'f':
+ localMax = 0xC;
+ break;
+ case 'n':
+ localMax = 0xA;
+ break;
+ case 'r':
+ localMax = 0xD;
+ break;
+ case 't':
+ localMax = 0x9;
+ break;
+ case 'v':
+ localMax = 0xB;
+ break;
+ case 'c':
+ if (((index + 1) < end) && Character.isLetter(src[index + 1]))
+ localMax = (char)(src[index++] & 0x1F);
+ else
+ localMax = '\\';
+ break;
+ case 'u':
+ nDigits += 2;
+ // fall thru...
+ case 'x':
+ n = 0;
+ for (i = 0; (i < nDigits) && (index < end); i++) {
+ c = src[index++];
+ n = Kit.xDigitToInt(c, n);
+ if (n < 0) {
+ // Back off to accepting the original
+ // '\' as a literal
+ index -= (i + 1);
+ n = '\\';
+ break;
+ }
+ }
+ localMax = n;
+ break;
+ case 'd':
+ if (inRange) {
+ reportError("msg.bad.range", "");
+ return false;
+ }
+ localMax = '9';
+ break;
+ case 'D':
+ case 's':
+ case 'S':
+ case 'w':
+ case 'W':
+ if (inRange) {
+ reportError("msg.bad.range", "");
+ return false;
+ }
+ target.bmsize = 65535;
+ return true;
+ case '0':
+ case '1':
+ case '2':
+ case '3':
+ case '4':
+ case '5':
+ case '6':
+ case '7':
+ /*
+ * This is a non-ECMA extension - decimal escapes (in this
+ * case, octal!) are supposed to be an error inside class
+ * ranges, but supported here for backwards compatibility.
+ *
+ */
+ n = (c - '0');
+ c = src[index];
+ if ('0' <= c && c <= '7') {
+ index++;
+ n = 8 * n + (c - '0');
+ c = src[index];
+ if ('0' <= c && c <= '7') {
+ index++;
+ i = 8 * n + (c - '0');
+ if (i <= 0377)
+ n = i;
+ else
+ index--;
+ }
+ }
+ localMax = n;
+ break;
+
+ default:
+ localMax = c;
+ break;
+ }
+ break;
+ default:
+ localMax = src[index++];
+ break;
+ }
+ if (inRange) {
+ if (rangeStart > localMax) {
+ reportError("msg.bad.range", "");
+ return false;
+ }
+ inRange = false;
+ }
+ else {
+ if (index < (end - 1)) {
+ if (src[index] == '-') {
+ ++index;
+ inRange = true;
+ rangeStart = (char)localMax;
+ continue;
+ }
+ }
+ }
+ if ((state.flags & JSREG_FOLD) != 0){
+ char cu = upcase((char)localMax);
+ char cd = downcase((char)localMax);
+ localMax = (cu >= cd) ? cu : cd;
+ }
+ if (localMax > max)
+ max = localMax;
+ }
+ target.bmsize = max;
+ return true;
+ }
+
+ /*
+ * item: assertion An item is either an assertion or
+ * quantatom a quantified atom.
+ *
+ * assertion: '^' Assertions match beginning of string
+ * (or line if the class static property
+ * RegExp.multiline is true).
+ * '$' End of string (or line if the class
+ * static property RegExp.multiline is
+ * true).
+ * '\b' Word boundary (between \w and \W).
+ * '\B' Word non-boundary.
+ *
+ * quantatom: atom An unquantified atom.
+ * quantatom '{' n ',' m '}'
+ * Atom must occur between n and m times.
+ * quantatom '{' n ',' '}' Atom must occur at least n times.
+ * quantatom '{' n '}' Atom must occur exactly n times.
+ * quantatom '*' Zero or more times (same as {0,}).
+ * quantatom '+' One or more times (same as {1,}).
+ * quantatom '?' Zero or one time (same as {0,1}).
+ *
+ * any of which can be optionally followed by '?' for ungreedy
+ *
+ * atom: '(' regexp ')' A parenthesized regexp (what matched
+ * can be addressed using a backreference,
+ * see '\' n below).
+ * '.' Matches any char except '\n'.
+ * '[' classlist ']' A character class.
+ * '[' '^' classlist ']' A negated character class.
+ * '\f' Form Feed.
+ * '\n' Newline (Line Feed).
+ * '\r' Carriage Return.
+ * '\t' Horizontal Tab.
+ * '\v' Vertical Tab.
+ * '\d' A digit (same as [0-9]).
+ * '\D' A non-digit.
+ * '\w' A word character, [0-9a-z_A-Z].
+ * '\W' A non-word character.
+ * '\s' A whitespace character, [ \b\f\n\r\t\v].
+ * '\S' A non-whitespace character.
+ * '\' n A backreference to the nth (n decimal
+ * and positive) parenthesized expression.
+ * '\' octal An octal escape sequence (octal must be
+ * two or three digits long, unless it is
+ * 0 for the null character).
+ * '\x' hex A hex escape (hex must be two digits).
+ * '\c' ctrl A control character, ctrl is a letter.
+ * '\' literalatomchar Any character except one of the above
+ * that follow '\' in an atom.
+ * otheratomchar Any character not first among the other
+ * atom right-hand sides.
+ */
+
+ private static void doFlat(CompilerState state, char c)
+ {
+ state.result = new RENode(REOP_FLAT);
+ state.result.chr = c;
+ state.result.length = 1;
+ state.result.flatIndex = -1;
+ state.progLength += 3;
+ }
+
+ private static int
+ getDecimalValue(char c, CompilerState state, int maxValue,
+ String overflowMessageId)
+ {
+ boolean overflow = false;
+ int start = state.cp;
+ char[] src = state.cpbegin;
+ int value = c - '0';
+ for (; state.cp != state.cpend; ++state.cp) {
+ c = src[state.cp];
+ if (!isDigit(c)) {
+ break;
+ }
+ if (!overflow) {
+ int digit = c - '0';
+ if (value < (maxValue - digit) / 10) {
+ value = value * 10 + digit;
+ } else {
+ overflow = true;
+ value = maxValue;
+ }
+ }
+ }
+ if (overflow) {
+ reportError(overflowMessageId,
+ String.valueOf(src, start, state.cp - start));
+ }
+ return value;
+ }
+
+ private static boolean
+ parseTerm(CompilerState state)
+ {
+ char[] src = state.cpbegin;
+ char c = src[state.cp++];
+ int nDigits = 2;
+ int parenBaseCount = state.parenCount;
+ int num, tmp;
+ RENode term;
+ int termStart;
+
+ switch (c) {
+ /* assertions and atoms */
+ case '^':
+ state.result = new RENode(REOP_BOL);
+ state.progLength++;
+ return true;
+ case '$':
+ state.result = new RENode(REOP_EOL);
+ state.progLength++;
+ return true;
+ case '\\':
+ if (state.cp < state.cpend) {
+ c = src[state.cp++];
+ switch (c) {
+ /* assertion escapes */
+ case 'b' :
+ state.result = new RENode(REOP_WBDRY);
+ state.progLength++;
+ return true;
+ case 'B':
+ state.result = new RENode(REOP_WNONBDRY);
+ state.progLength++;
+ return true;
+ /* Decimal escape */
+ case '0':
+/*
+ * Under 'strict' ECMA 3, we interpret \0 as NUL and don't accept octal.
+ * However, (XXX and since Rhino doesn't have a 'strict' mode) we'll just
+ * behave the old way for compatibility reasons.
+ * (see http://bugzilla.mozilla.org/show_bug.cgi?id=141078)
+ *
+ */
+ reportWarning(state.cx, "msg.bad.backref", "");
+ /* octal escape */
+ num = 0;
+ while (state.cp < state.cpend) {
+ c = src[state.cp];
+ if ((c >= '0') && (c <= '7')) {
+ state.cp++;
+ tmp = 8 * num + (c - '0');
+ if (tmp > 0377)
+ break;
+ num = tmp;
+ }
+ else
+ break;
+ }
+ c = (char)(num);
+ doFlat(state, c);
+ break;
+ case '1':
+ case '2':
+ case '3':
+ case '4':
+ case '5':
+ case '6':
+ case '7':
+ case '8':
+ case '9':
+ termStart = state.cp - 1;
+ num = getDecimalValue(c, state, 0xFFFF,
+ "msg.overlarge.backref");
+ if (num > state.parenCount)
+ reportWarning(state.cx, "msg.bad.backref", "");
+ /*
+ * n > 9 or > count of parentheses,
+ * then treat as octal instead.
+ */
+ if ((num > 9) && (num > state.parenCount)) {
+ state.cp = termStart;
+ num = 0;
+ while (state.cp < state.cpend) {
+ c = src[state.cp];
+ if ((c >= '0') && (c <= '7')) {
+ state.cp++;
+ tmp = 8 * num + (c - '0');
+ if (tmp > 0377)
+ break;
+ num = tmp;
+ }
+ else
+ break;
+ }
+ c = (char)(num);
+ doFlat(state, c);
+ break;
+ }
+ /* otherwise, it's a back-reference */
+ state.result = new RENode(REOP_BACKREF);
+ state.result.parenIndex = num - 1;
+ state.progLength += 3;
+ break;
+ /* Control escape */
+ case 'f':
+ c = 0xC;
+ doFlat(state, c);
+ break;
+ case 'n':
+ c = 0xA;
+ doFlat(state, c);
+ break;
+ case 'r':
+ c = 0xD;
+ doFlat(state, c);
+ break;
+ case 't':
+ c = 0x9;
+ doFlat(state, c);
+ break;
+ case 'v':
+ c = 0xB;
+ doFlat(state, c);
+ break;
+ /* Control letter */
+ case 'c':
+ if (((state.cp + 1) < state.cpend) &&
+ Character.isLetter(src[state.cp + 1]))
+ c = (char)(src[state.cp++] & 0x1F);
+ else {
+ /* back off to accepting the original '\' as a literal */
+ --state.cp;
+ c = '\\';
+ }
+ doFlat(state, c);
+ break;
+ /* UnicodeEscapeSequence */
+ case 'u':
+ nDigits += 2;
+ // fall thru...
+ /* HexEscapeSequence */
+ case 'x':
+ {
+ int n = 0;
+ int i;
+ for (i = 0; (i < nDigits)
+ && (state.cp < state.cpend); i++) {
+ c = src[state.cp++];
+ n = Kit.xDigitToInt(c, n);
+ if (n < 0) {
+ // Back off to accepting the original
+ // 'u' or 'x' as a literal
+ state.cp -= (i + 2);
+ n = src[state.cp++];
+ break;
+ }
+ }
+ c = (char)(n);
+ }
+ doFlat(state, c);
+ break;
+ /* Character class escapes */
+ case 'd':
+ state.result = new RENode(REOP_DIGIT);
+ state.progLength++;
+ break;
+ case 'D':
+ state.result = new RENode(REOP_NONDIGIT);
+ state.progLength++;
+ break;
+ case 's':
+ state.result = new RENode(REOP_SPACE);
+ state.progLength++;
+ break;
+ case 'S':
+ state.result = new RENode(REOP_NONSPACE);
+ state.progLength++;
+ break;
+ case 'w':
+ state.result = new RENode(REOP_ALNUM);
+ state.progLength++;
+ break;
+ case 'W':
+ state.result = new RENode(REOP_NONALNUM);
+ state.progLength++;
+ break;
+ /* IdentityEscape */
+ default:
+ state.result = new RENode(REOP_FLAT);
+ state.result.chr = c;
+ state.result.length = 1;
+ state.result.flatIndex = state.cp - 1;
+ state.progLength += 3;
+ break;
+ }
+ break;
+ }
+ else {
+ /* a trailing '\' is an error */
+ reportError("msg.trail.backslash", "");
+ return false;
+ }
+ case '(': {
+ RENode result = null;
+ termStart = state.cp;
+ if (state.cp + 1 < state.cpend && src[state.cp] == '?'
+ && ((c = src[state.cp + 1]) == '=' || c == '!' || c == ':'))
+ {
+ state.cp += 2;
+ if (c == '=') {
+ result = new RENode(REOP_ASSERT);
+ /* ASSERT, <next>, ... ASSERTTEST */
+ state.progLength += 4;
+ } else if (c == '!') {
+ result = new RENode(REOP_ASSERT_NOT);
+ /* ASSERTNOT, <next>, ... ASSERTNOTTEST */
+ state.progLength += 4;
+ }
+ } else {
+ result = new RENode(REOP_LPAREN);
+ /* LPAREN, <index>, ... RPAREN, <index> */
+ state.progLength += 6;
+ result.parenIndex = state.parenCount++;
+ }
+ ++state.parenNesting;
+ if (!parseDisjunction(state))
+ return false;
+ if (state.cp == state.cpend || src[state.cp] != ')') {
+ reportError("msg.unterm.paren", "in regular expression"/*APPJET*/);
+ return false;
+ }
+ ++state.cp;
+ --state.parenNesting;
+ if (result != null) {
+ result.kid = state.result;
+ state.result = result;
+ }
+ break;
+ }
+ case ')':
+ reportError("msg.re.unmatched.right.paren", "");
+ return false;
+ case '[':
+ state.result = new RENode(REOP_CLASS);
+ termStart = state.cp;
+ state.result.startIndex = termStart;
+ while (true) {
+ if (state.cp == state.cpend) {
+ reportError("msg.unterm.class", "");
+ return false;
+ }
+ if (src[state.cp] == '\\')
+ state.cp++;
+ else {
+ if (src[state.cp] == ']') {
+ state.result.kidlen = state.cp - termStart;
+ break;
+ }
+ }
+ state.cp++;
+ }
+ state.result.index = state.classCount++;
+ /*
+ * Call calculateBitmapSize now as we want any errors it finds
+ * to be reported during the parse phase, not at execution.
+ */
+ if (!calculateBitmapSize(state, state.result, src, termStart, state.cp++))
+ return false;
+ state.progLength += 3; /* CLASS, <index> */
+ break;
+
+ case '.':
+ state.result = new RENode(REOP_DOT);
+ state.progLength++;
+ break;
+ case '*':
+ case '+':
+ case '?':
+ reportError("msg.bad.quant", String.valueOf(src[state.cp - 1]));
+ return false;
+ default:
+ state.result = new RENode(REOP_FLAT);
+ state.result.chr = c;
+ state.result.length = 1;
+ state.result.flatIndex = state.cp - 1;
+ state.progLength += 3;
+ break;
+ }
+
+ term = state.result;
+ if (state.cp == state.cpend) {
+ return true;
+ }
+ boolean hasQ = false;
+ switch (src[state.cp]) {
+ case '+':
+ state.result = new RENode(REOP_QUANT);
+ state.result.min = 1;
+ state.result.max = -1;
+ /* <PLUS>, <parencount>, <parenindex>, <next> ... <ENDCHILD> */
+ state.progLength += 8;
+ hasQ = true;
+ break;
+ case '*':
+ state.result = new RENode(REOP_QUANT);
+ state.result.min = 0;
+ state.result.max = -1;
+ /* <STAR>, <parencount>, <parenindex>, <next> ... <ENDCHILD> */
+ state.progLength += 8;
+ hasQ = true;
+ break;
+ case '?':
+ state.result = new RENode(REOP_QUANT);
+ state.result.min = 0;
+ state.result.max = 1;
+ /* <OPT>, <parencount>, <parenindex>, <next> ... <ENDCHILD> */
+ state.progLength += 8;
+ hasQ = true;
+ break;
+ case '{': /* balance '}' */
+ {
+ int min = 0;
+ int max = -1;
+ int leftCurl = state.cp;
+
+ /* For Perl etc. compatibility, if quntifier does not match
+ * \{\d+(,\d*)?\} exactly back off from it
+ * being a quantifier, and chew it up as a literal
+ * atom next time instead.
+ */
+
+ c = src[++state.cp];
+ if (isDigit(c)) {
+ ++state.cp;
+ min = getDecimalValue(c, state, 0xFFFF,
+ "msg.overlarge.min");
+ c = src[state.cp];
+ if (c == ',') {
+ c = src[++state.cp];
+ if (isDigit(c)) {
+ ++state.cp;
+ max = getDecimalValue(c, state, 0xFFFF,
+ "msg.overlarge.max");
+ c = src[state.cp];
+ if (min > max) {
+ reportError("msg.max.lt.min",
+ String.valueOf(src[state.cp]));
+ return false;
+ }
+ }
+ } else {
+ max = min;
+ }
+ /* balance '{' */
+ if (c == '}') {
+ state.result = new RENode(REOP_QUANT);
+ state.result.min = min;
+ state.result.max = max;
+ // QUANT, <min>, <max>, <parencount>,
+ // <parenindex>, <next> ... <ENDCHILD>
+ state.progLength += 12;
+ hasQ = true;
+ }
+ }
+ if (!hasQ) {
+ state.cp = leftCurl;
+ }
+ break;
+ }
+ }
+ if (!hasQ)
+ return true;
+
+ ++state.cp;
+ state.result.kid = term;
+ state.result.parenIndex = parenBaseCount;
+ state.result.parenCount = state.parenCount - parenBaseCount;
+ if ((state.cp < state.cpend) && (src[state.cp] == '?')) {
+ ++state.cp;
+ state.result.greedy = false;
+ }
+ else
+ state.result.greedy = true;
+ return true;
+ }
+
+ private static void resolveForwardJump(byte[] array, int from, int pc)
+ {
+ if (from > pc) throw Kit.codeBug();
+ addIndex(array, from, pc - from);
+ }
+
+ private static int getOffset(byte[] array, int pc)
+ {
+ return getIndex(array, pc);
+ }
+
+ private static int addIndex(byte[] array, int pc, int index)
+ {
+ if (index < 0) throw Kit.codeBug();
+ if (index > 0xFFFF)
+ throw Context.reportRuntimeError("Too complex regexp");
+ array[pc] = (byte)(index >> 8);
+ array[pc + 1] = (byte)(index);
+ return pc + 2;
+ }
+
+ private static int getIndex(byte[] array, int pc)
+ {
+ return ((array[pc] & 0xFF) << 8) | (array[pc + 1] & 0xFF);
+ }
+
+ private static final int OFFSET_LEN = 2;
+ private static final int INDEX_LEN = 2;
+
+ private static int
+ emitREBytecode(CompilerState state, RECompiled re, int pc, RENode t)
+ {
+ RENode nextAlt;
+ int nextAltFixup, nextTermFixup;
+ byte[] program = re.program;
+
+ while (t != null) {
+ program[pc++] = t.op;
+ switch (t.op) {
+ case REOP_EMPTY:
+ --pc;
+ break;
+ case REOP_ALT:
+ nextAlt = t.kid2;
+ nextAltFixup = pc; /* address of next alternate */
+ pc += OFFSET_LEN;
+ pc = emitREBytecode(state, re, pc, t.kid);
+ program[pc++] = REOP_JUMP;
+ nextTermFixup = pc; /* address of following term */
+ pc += OFFSET_LEN;
+ resolveForwardJump(program, nextAltFixup, pc);
+ pc = emitREBytecode(state, re, pc, nextAlt);
+
+ program[pc++] = REOP_JUMP;
+ nextAltFixup = pc;
+ pc += OFFSET_LEN;
+
+ resolveForwardJump(program, nextTermFixup, pc);
+ resolveForwardJump(program, nextAltFixup, pc);
+ break;
+ case REOP_FLAT:
+ /*
+ * Consecutize FLAT's if possible.
+ */
+ if (t.flatIndex != -1) {
+ while ((t.next != null) && (t.next.op == REOP_FLAT)
+ && ((t.flatIndex + t.length)
+ == t.next.flatIndex)) {
+ t.length += t.next.length;
+ t.next = t.next.next;
+ }
+ }
+ if ((t.flatIndex != -1) && (t.length > 1)) {
+ if ((state.flags & JSREG_FOLD) != 0)
+ program[pc - 1] = REOP_FLATi;
+ else
+ program[pc - 1] = REOP_FLAT;
+ pc = addIndex(program, pc, t.flatIndex);
+ pc = addIndex(program, pc, t.length);
+ }
+ else {
+ if (t.chr < 256) {
+ if ((state.flags & JSREG_FOLD) != 0)
+ program[pc - 1] = REOP_FLAT1i;
+ else
+ program[pc - 1] = REOP_FLAT1;
+ program[pc++] = (byte)(t.chr);
+ }
+ else {
+ if ((state.flags & JSREG_FOLD) != 0)
+ program[pc - 1] = REOP_UCFLAT1i;
+ else
+ program[pc - 1] = REOP_UCFLAT1;
+ pc = addIndex(program, pc, t.chr);
+ }
+ }
+ break;
+ case REOP_LPAREN:
+ pc = addIndex(program, pc, t.parenIndex);
+ pc = emitREBytecode(state, re, pc, t.kid);
+ program[pc++] = REOP_RPAREN;
+ pc = addIndex(program, pc, t.parenIndex);
+ break;
+ case REOP_BACKREF:
+ pc = addIndex(program, pc, t.parenIndex);
+ break;
+ case REOP_ASSERT:
+ nextTermFixup = pc;
+ pc += OFFSET_LEN;
+ pc = emitREBytecode(state, re, pc, t.kid);
+ program[pc++] = REOP_ASSERTTEST;
+ resolveForwardJump(program, nextTermFixup, pc);
+ break;
+ case REOP_ASSERT_NOT:
+ nextTermFixup = pc;
+ pc += OFFSET_LEN;
+ pc = emitREBytecode(state, re, pc, t.kid);
+ program[pc++] = REOP_ASSERTNOTTEST;
+ resolveForwardJump(program, nextTermFixup, pc);
+ break;
+ case REOP_QUANT:
+ if ((t.min == 0) && (t.max == -1))
+ program[pc - 1] = (t.greedy) ? REOP_STAR : REOP_MINIMALSTAR;
+ else
+ if ((t.min == 0) && (t.max == 1))
+ program[pc - 1] = (t.greedy) ? REOP_OPT : REOP_MINIMALOPT;
+ else
+ if ((t.min == 1) && (t.max == -1))
+ program[pc - 1] = (t.greedy) ? REOP_PLUS : REOP_MINIMALPLUS;
+ else {
+ if (!t.greedy) program[pc - 1] = REOP_MINIMALQUANT;
+ pc = addIndex(program, pc, t.min);
+ // max can be -1 which addIndex does not accept
+ pc = addIndex(program, pc, t.max + 1);
+ }
+ pc = addIndex(program, pc, t.parenCount);
+ pc = addIndex(program, pc, t.parenIndex);
+ nextTermFixup = pc;
+ pc += OFFSET_LEN;
+ pc = emitREBytecode(state, re, pc, t.kid);
+ program[pc++] = REOP_ENDCHILD;
+ resolveForwardJump(program, nextTermFixup, pc);
+ break;
+ case REOP_CLASS:
+ pc = addIndex(program, pc, t.index);
+ re.classList[t.index] = new RECharSet(t.bmsize, t.startIndex,
+ t.kidlen);
+ break;
+ default:
+ break;
+ }
+ t = t.next;
+ }
+ return pc;
+ }
+
+ private static void
+ pushProgState(REGlobalData gData, int min, int max,
+ REBackTrackData backTrackLastToSave,
+ int continuation_pc, int continuation_op)
+ {
+ gData.stateStackTop = new REProgState(gData.stateStackTop, min, max,
+ gData.cp, backTrackLastToSave,
+ continuation_pc,
+ continuation_op);
+ }
+
+ private static REProgState
+ popProgState(REGlobalData gData)
+ {
+ REProgState state = gData.stateStackTop;
+ gData.stateStackTop = state.previous;
+ return state;
+ }
+
+ private static void
+ pushBackTrackState(REGlobalData gData, byte op, int target)
+ {
+ gData.backTrackStackTop = new REBackTrackData(gData, op, target);
+ }
+
+ /*
+ * Consecutive literal characters.
+ */
+ private static boolean
+ flatNMatcher(REGlobalData gData, int matchChars,
+ int length, char[] chars, int end)
+ {
+ if ((gData.cp + length) > end)
+ return false;
+ for (int i = 0; i < length; i++) {
+ if (gData.regexp.source[matchChars + i] != chars[gData.cp + i]) {
+ return false;
+ }
+ }
+ gData.cp += length;
+ return true;
+ }
+
+ private static boolean
+ flatNIMatcher(REGlobalData gData, int matchChars,
+ int length, char[] chars, int end)
+ {
+ if ((gData.cp + length) > end)
+ return false;
+ for (int i = 0; i < length; i++) {
+ if (upcase(gData.regexp.source[matchChars + i])
+ != upcase(chars[gData.cp + i]))
+ {
+ return false;
+ }
+ }
+ gData.cp += length;
+ return true;
+ }
+
+ /*
+ 1. Evaluate DecimalEscape to obtain an EscapeValue E.
+ 2. If E is not a character then go to step 6.
+ 3. Let ch be E's character.
+ 4. Let A be a one-element RECharSet containing the character ch.
+ 5. Call CharacterSetMatcher(A, false) and return its Matcher result.
+ 6. E must be an integer. Let n be that integer.
+ 7. If n=0 or n>NCapturingParens then throw a SyntaxError exception.
+ 8. Return an internal Matcher closure that takes two arguments, a State x
+ and a Continuation c, and performs the following:
+ 1. Let cap be x's captures internal array.
+ 2. Let s be cap[n].
+ 3. If s is undefined, then call c(x) and return its result.
+ 4. Let e be x's endIndex.
+ 5. Let len be s's length.
+ 6. Let f be e+len.
+ 7. If f>InputLength, return failure.
+ 8. If there exists an integer i between 0 (inclusive) and len (exclusive)
+ such that Canonicalize(s[i]) is not the same character as
+ Canonicalize(Input [e+i]), then return failure.
+ 9. Let y be the State (f, cap).
+ 10. Call c(y) and return its result.
+ */
+ private static boolean
+ backrefMatcher(REGlobalData gData, int parenIndex,
+ char[] chars, int end)
+ {
+ int len;
+ int i;
+ int parenContent = gData.parens_index(parenIndex);
+ if (parenContent == -1)
+ return true;
+
+ len = gData.parens_length(parenIndex);
+ if ((gData.cp + len) > end)
+ return false;
+
+ if ((gData.regexp.flags & JSREG_FOLD) != 0) {
+ for (i = 0; i < len; i++) {
+ if (upcase(chars[parenContent + i]) != upcase(chars[gData.cp + i]))
+ return false;
+ }
+ }
+ else {
+ for (i = 0; i < len; i++) {
+ if (chars[parenContent + i] != chars[gData.cp + i])
+ return false;
+ }
+ }
+ gData.cp += len;
+ return true;
+ }
+
+
+ /* Add a single character to the RECharSet */
+ private static void
+ addCharacterToCharSet(RECharSet cs, char c)
+ {
+ int byteIndex = (c / 8);
+ if (c > cs.length)
+ throw new RuntimeException();
+ cs.bits[byteIndex] |= 1 << (c & 0x7);
+ }
+
+
+ /* Add a character range, c1 to c2 (inclusive) to the RECharSet */
+ private static void
+ addCharacterRangeToCharSet(RECharSet cs, char c1, char c2)
+ {
+ int i;
+
+ int byteIndex1 = (c1 / 8);
+ int byteIndex2 = (c2 / 8);
+
+ if ((c2 > cs.length) || (c1 > c2))
+ throw new RuntimeException();
+
+ c1 &= 0x7;
+ c2 &= 0x7;
+
+ if (byteIndex1 == byteIndex2) {
+ cs.bits[byteIndex1] |= ((0xFF) >> (7 - (c2 - c1))) << c1;
+ }
+ else {
+ cs.bits[byteIndex1] |= 0xFF << c1;
+ for (i = byteIndex1 + 1; i < byteIndex2; i++)
+ cs.bits[i] = (byte)0xFF;
+ cs.bits[byteIndex2] |= (0xFF) >> (7 - c2);
+ }
+ }
+
+ /* Compile the source of the class into a RECharSet */
+ private static void
+ processCharSet(REGlobalData gData, RECharSet charSet)
+ {
+ synchronized (charSet) {
+ if (!charSet.converted) {
+ processCharSetImpl(gData, charSet);
+ charSet.converted = true;
+ }
+ }
+ }
+
+
+ private static void
+ processCharSetImpl(REGlobalData gData, RECharSet charSet)
+ {
+ int src = charSet.startIndex;
+ int end = src + charSet.strlength;
+
+ char rangeStart = 0, thisCh;
+ int byteLength;
+ char c;
+ int n;
+ int nDigits;
+ int i;
+ boolean inRange = false;
+
+ charSet.sense = true;
+ byteLength = (charSet.length / 8) + 1;
+ charSet.bits = new byte[byteLength];
+
+ if (src == end)
+ return;
+
+ if (gData.regexp.source[src] == '^') {
+ charSet.sense = false;
+ ++src;
+ }
+
+ while (src != end) {
+ nDigits = 2;
+ switch (gData.regexp.source[src]) {
+ case '\\':
+ ++src;
+ c = gData.regexp.source[src++];
+ switch (c) {
+ case 'b':
+ thisCh = 0x8;
+ break;
+ case 'f':
+ thisCh = 0xC;
+ break;
+ case 'n':
+ thisCh = 0xA;
+ break;
+ case 'r':
+ thisCh = 0xD;
+ break;
+ case 't':
+ thisCh = 0x9;
+ break;
+ case 'v':
+ thisCh = 0xB;
+ break;
+ case 'c':
+ if (((src + 1) < end) && isWord(gData.regexp.source[src + 1]))
+ thisCh = (char)(gData.regexp.source[src++] & 0x1F);
+ else {
+ --src;
+ thisCh = '\\';
+ }
+ break;
+ case 'u':
+ nDigits += 2;
+ // fall thru
+ case 'x':
+ n = 0;
+ for (i = 0; (i < nDigits) && (src < end); i++) {
+ c = gData.regexp.source[src++];
+ int digit = toASCIIHexDigit(c);
+ if (digit < 0) {
+ /* back off to accepting the original '\'
+ * as a literal
+ */
+ src -= (i + 1);
+ n = '\\';
+ break;
+ }
+ n = (n << 4) | digit;
+ }
+ thisCh = (char)(n);
+ break;
+ case '0':
+ case '1':
+ case '2':
+ case '3':
+ case '4':
+ case '5':
+ case '6':
+ case '7':
+ /*
+ * This is a non-ECMA extension - decimal escapes (in this
+ * case, octal!) are supposed to be an error inside class
+ * ranges, but supported here for backwards compatibility.
+ *
+ */
+ n = (c - '0');
+ c = gData.regexp.source[src];
+ if ('0' <= c && c <= '7') {
+ src++;
+ n = 8 * n + (c - '0');
+ c = gData.regexp.source[src];
+ if ('0' <= c && c <= '7') {
+ src++;
+ i = 8 * n + (c - '0');
+ if (i <= 0377)
+ n = i;
+ else
+ src--;
+ }
+ }
+ thisCh = (char)(n);
+ break;
+
+ case 'd':
+ addCharacterRangeToCharSet(charSet, '0', '9');
+ continue; /* don't need range processing */
+ case 'D':
+ addCharacterRangeToCharSet(charSet, (char)0, (char)('0' - 1));
+ addCharacterRangeToCharSet(charSet, (char)('9' + 1),
+ (char)(charSet.length));
+ continue;
+ case 's':
+ for (i = charSet.length; i >= 0; i--)
+ if (isREWhiteSpace(i))
+ addCharacterToCharSet(charSet, (char)(i));
+ continue;
+ case 'S':
+ for (i = charSet.length; i >= 0; i--)
+ if (!isREWhiteSpace(i))
+ addCharacterToCharSet(charSet, (char)(i));
+ continue;
+ case 'w':
+ for (i = charSet.length; i >= 0; i--)
+ if (isWord((char)i))
+ addCharacterToCharSet(charSet, (char)(i));
+ continue;
+ case 'W':
+ for (i = charSet.length; i >= 0; i--)
+ if (!isWord((char)i))
+ addCharacterToCharSet(charSet, (char)(i));
+ continue;
+ default:
+ thisCh = c;
+ break;
+
+ }
+ break;
+
+ default:
+ thisCh = gData.regexp.source[src++];
+ break;
+
+ }
+ if (inRange) {
+ if ((gData.regexp.flags & JSREG_FOLD) != 0) {
+ addCharacterRangeToCharSet(charSet,
+ upcase(rangeStart),
+ upcase(thisCh));
+ addCharacterRangeToCharSet(charSet,
+ downcase(rangeStart),
+ downcase(thisCh));
+ } else {
+ addCharacterRangeToCharSet(charSet, rangeStart, thisCh);
+ }
+ inRange = false;
+ }
+ else {
+ if ((gData.regexp.flags & JSREG_FOLD) != 0) {
+ addCharacterToCharSet(charSet, upcase(thisCh));
+ addCharacterToCharSet(charSet, downcase(thisCh));
+ } else {
+ addCharacterToCharSet(charSet, thisCh);
+ }
+ if (src < (end - 1)) {
+ if (gData.regexp.source[src] == '-') {
+ ++src;
+ inRange = true;
+ rangeStart = thisCh;
+ }
+ }
+ }
+ }
+ }
+
+
+ /*
+ * Initialize the character set if it this is the first call.
+ * Test the bit - if the ^ flag was specified, non-inclusion is a success
+ */
+ private static boolean
+ classMatcher(REGlobalData gData, RECharSet charSet, char ch)
+ {
+ if (!charSet.converted) {
+ processCharSet(gData, charSet);
+ }
+
+ int byteIndex = ch / 8;
+ if (charSet.sense) {
+ if ((charSet.length == 0) ||
+ ( (ch > charSet.length)
+ || ((charSet.bits[byteIndex] & (1 << (ch & 0x7))) == 0) ))
+ return false;
+ } else {
+ if (! ((charSet.length == 0) ||
+ ( (ch > charSet.length)
+ || ((charSet.bits[byteIndex] & (1 << (ch & 0x7))) == 0) )))
+ return false;
+ }
+ return true;
+ }
+
+ private static boolean
+ executeREBytecode(REGlobalData gData, char[] chars, int end)
+ {
+ int pc = 0;
+ byte program[] = gData.regexp.program;
+ int currentContinuation_op;
+ int currentContinuation_pc;
+ boolean result = false;
+
+ currentContinuation_pc = 0;
+ currentContinuation_op = REOP_END;
+if (debug) {
+System.out.println("Input = \"" + new String(chars) + "\", start at " + gData.cp);
+}
+ int op = program[pc++];
+ for (;;) {
+if (debug) {
+System.out.println("Testing at " + gData.cp + ", op = " + op);
+}
+ switch (op) {
+ case REOP_EMPTY:
+ result = true;
+ break;
+ case REOP_BOL:
+ if (gData.cp != 0) {
+ if (gData.multiline ||
+ ((gData.regexp.flags & JSREG_MULTILINE) != 0)) {
+ if (!isLineTerm(chars[gData.cp - 1])) {
+ result = false;
+ break;
+ }
+ }
+ else {
+ result = false;
+ break;
+ }
+ }
+ result = true;
+ break;
+ case REOP_EOL:
+ if (gData.cp != end) {
+ if (gData.multiline ||
+ ((gData.regexp.flags & JSREG_MULTILINE) != 0)) {
+ if (!isLineTerm(chars[gData.cp])) {
+ result = false;
+ break;
+ }
+ }
+ else {
+ result = false;
+ break;
+ }
+ }
+ result = true;
+ break;
+ case REOP_WBDRY:
+ result = ((gData.cp == 0 || !isWord(chars[gData.cp - 1]))
+ ^ !((gData.cp < end) && isWord(chars[gData.cp])));
+ break;
+ case REOP_WNONBDRY:
+ result = ((gData.cp == 0 || !isWord(chars[gData.cp - 1]))
+ ^ ((gData.cp < end) && isWord(chars[gData.cp])));
+ break;
+ case REOP_DOT:
+ result = (gData.cp != end && !isLineTerm(chars[gData.cp]));
+ if (result) {
+ gData.cp++;
+ }
+ break;
+ case REOP_DIGIT:
+ result = (gData.cp != end && isDigit(chars[gData.cp]));
+ if (result) {
+ gData.cp++;
+ }
+ break;
+ case REOP_NONDIGIT:
+ result = (gData.cp != end && !isDigit(chars[gData.cp]));
+ if (result) {
+ gData.cp++;
+ }
+ break;
+ case REOP_SPACE:
+ result = (gData.cp != end && isREWhiteSpace(chars[gData.cp]));
+ if (result) {
+ gData.cp++;
+ }
+ break;
+ case REOP_NONSPACE:
+ result = (gData.cp != end && !isREWhiteSpace(chars[gData.cp]));
+ if (result) {
+ gData.cp++;
+ }
+ break;
+ case REOP_ALNUM:
+ result = (gData.cp != end && isWord(chars[gData.cp]));
+ if (result) {
+ gData.cp++;
+ }
+ break;
+ case REOP_NONALNUM:
+ result = (gData.cp != end && !isWord(chars[gData.cp]));
+ if (result) {
+ gData.cp++;
+ }
+ break;
+ case REOP_FLAT:
+ {
+ int offset = getIndex(program, pc);
+ pc += INDEX_LEN;
+ int length = getIndex(program, pc);
+ pc += INDEX_LEN;
+ result = flatNMatcher(gData, offset, length, chars, end);
+ }
+ break;
+ case REOP_FLATi:
+ {
+ int offset = getIndex(program, pc);
+ pc += INDEX_LEN;
+ int length = getIndex(program, pc);
+ pc += INDEX_LEN;
+ result = flatNIMatcher(gData, offset, length, chars, end);
+ }
+ break;
+ case REOP_FLAT1:
+ {
+ char matchCh = (char)(program[pc++] & 0xFF);
+ result = (gData.cp != end && chars[gData.cp] == matchCh);
+ if (result) {
+ gData.cp++;
+ }
+ }
+ break;
+ case REOP_FLAT1i:
+ {
+ char matchCh = (char)(program[pc++] & 0xFF);
+ result = (gData.cp != end
+ && upcase(chars[gData.cp]) == upcase(matchCh));
+ if (result) {
+ gData.cp++;
+ }
+ }
+ break;
+ case REOP_UCFLAT1:
+ {
+ char matchCh = (char)getIndex(program, pc);
+ pc += INDEX_LEN;
+ result = (gData.cp != end && chars[gData.cp] == matchCh);
+ if (result) {
+ gData.cp++;
+ }
+ }
+ break;
+ case REOP_UCFLAT1i:
+ {
+ char matchCh = (char)getIndex(program, pc);
+ pc += INDEX_LEN;
+ result = (gData.cp != end
+ && upcase(chars[gData.cp]) == upcase(matchCh));
+ if (result) {
+ gData.cp++;
+ }
+ }
+ break;
+ case REOP_ALT:
+ {
+ int nextpc;
+ byte nextop;
+ pushProgState(gData, 0, 0, null,
+ currentContinuation_pc,
+ currentContinuation_op);
+ nextpc = pc + getOffset(program, pc);
+ nextop = program[nextpc++];
+ pushBackTrackState(gData, nextop, nextpc);
+ pc += INDEX_LEN;
+ op = program[pc++];
+ }
+ continue;
+
+ case REOP_JUMP:
+ {
+ int offset;
+ REProgState state = popProgState(gData);
+ currentContinuation_pc = state.continuation_pc;
+ currentContinuation_op = state.continuation_op;
+ offset = getOffset(program, pc);
+ pc += offset;
+ op = program[pc++];
+ }
+ continue;
+
+
+ case REOP_LPAREN:
+ {
+ int parenIndex = getIndex(program, pc);
+ pc += INDEX_LEN;
+ gData.set_parens(parenIndex, gData.cp, 0);
+ op = program[pc++];
+ }
+ continue;
+ case REOP_RPAREN:
+ {
+ int cap_index;
+ int parenIndex = getIndex(program, pc);
+ pc += INDEX_LEN;
+ cap_index = gData.parens_index(parenIndex);
+ gData.set_parens(parenIndex, cap_index,
+ gData.cp - cap_index);
+ if (parenIndex > gData.lastParen)
+ gData.lastParen = parenIndex;
+ op = program[pc++];
+ }
+ continue;
+ case REOP_BACKREF:
+ {
+ int parenIndex = getIndex(program, pc);
+ pc += INDEX_LEN;
+ result = backrefMatcher(gData, parenIndex, chars, end);
+ }
+ break;
+
+ case REOP_CLASS:
+ {
+ int index = getIndex(program, pc);
+ pc += INDEX_LEN;
+ if (gData.cp != end) {
+ if (classMatcher(gData, gData.regexp.classList[index],
+ chars[gData.cp]))
+ {
+ gData.cp++;
+ result = true;
+ break;
+ }
+ }
+ result = false;
+ }
+ break;
+
+ case REOP_ASSERT:
+ case REOP_ASSERT_NOT:
+ {
+ byte testOp;
+ pushProgState(gData, 0, 0, gData.backTrackStackTop,
+ currentContinuation_pc,
+ currentContinuation_op);
+ if (op == REOP_ASSERT) {
+ testOp = REOP_ASSERTTEST;
+ } else {
+ testOp = REOP_ASSERTNOTTEST;
+ }
+ pushBackTrackState(gData, testOp,
+ pc + getOffset(program, pc));
+ pc += INDEX_LEN;
+ op = program[pc++];
+ }
+ continue;
+
+ case REOP_ASSERTTEST:
+ case REOP_ASSERTNOTTEST:
+ {
+ REProgState state = popProgState(gData);
+ gData.cp = state.index;
+ gData.backTrackStackTop = state.backTrack;
+ currentContinuation_pc = state.continuation_pc;
+ currentContinuation_op = state.continuation_op;
+ if (result) {
+ if (op == REOP_ASSERTTEST) {
+ result = true;
+ } else {
+ result = false;
+ }
+ } else {
+ if (op == REOP_ASSERTTEST) {
+ // Do nothing
+ } else {
+ result = true;
+ }
+ }
+ }
+ break;
+
+ case REOP_STAR:
+ case REOP_PLUS:
+ case REOP_OPT:
+ case REOP_QUANT:
+ case REOP_MINIMALSTAR:
+ case REOP_MINIMALPLUS:
+ case REOP_MINIMALOPT:
+ case REOP_MINIMALQUANT:
+ {
+ int min, max;
+ boolean greedy = false;
+ switch (op) {
+ case REOP_STAR:
+ greedy = true;
+ // fallthrough
+ case REOP_MINIMALSTAR:
+ min = 0;
+ max = -1;
+ break;
+ case REOP_PLUS:
+ greedy = true;
+ // fallthrough
+ case REOP_MINIMALPLUS:
+ min = 1;
+ max = -1;
+ break;
+ case REOP_OPT:
+ greedy = true;
+ // fallthrough
+ case REOP_MINIMALOPT:
+ min = 0;
+ max = 1;
+ break;
+ case REOP_QUANT:
+ greedy = true;
+ // fallthrough
+ case REOP_MINIMALQUANT:
+ min = getOffset(program, pc);
+ pc += INDEX_LEN;
+ // See comments in emitREBytecode for " - 1" reason
+ max = getOffset(program, pc) - 1;
+ pc += INDEX_LEN;
+ break;
+ default:
+ throw Kit.codeBug();
+ }
+ pushProgState(gData, min, max, null,
+ currentContinuation_pc,
+ currentContinuation_op);
+ if (greedy) {
+ currentContinuation_op = REOP_REPEAT;
+ currentContinuation_pc = pc;
+ pushBackTrackState(gData, REOP_REPEAT, pc);
+ /* Step over <parencount>, <parenindex> & <next> */
+ pc += 3 * INDEX_LEN;
+ op = program[pc++];
+ } else {
+ if (min != 0) {
+ currentContinuation_op = REOP_MINIMALREPEAT;
+ currentContinuation_pc = pc;
+ /* <parencount> <parenindex> & <next> */
+ pc += 3 * INDEX_LEN;
+ op = program[pc++];
+ } else {
+ pushBackTrackState(gData, REOP_MINIMALREPEAT, pc);
+ popProgState(gData);
+ pc += 2 * INDEX_LEN; // <parencount> & <parenindex>
+ pc = pc + getOffset(program, pc);
+ op = program[pc++];
+ }
+ }
+ }
+ continue;
+
+ case REOP_ENDCHILD:
+ // Use the current continuation.
+ pc = currentContinuation_pc;
+ op = currentContinuation_op;
+ continue;
+
+ case REOP_REPEAT:
+ {
+ REProgState state = popProgState(gData);
+ if (!result) {
+ //
+ // There's been a failure, see if we have enough
+ // children.
+ //
+ if (state.min == 0)
+ result = true;
+ currentContinuation_pc = state.continuation_pc;
+ currentContinuation_op = state.continuation_op;
+ pc += 2 * INDEX_LEN; /* <parencount> & <parenindex> */
+ pc = pc + getOffset(program, pc);
+ break;
+ }
+ else {
+ if (state.min == 0 && gData.cp == state.index) {
+ // matched an empty string, that'll get us nowhere
+ result = false;
+ currentContinuation_pc = state.continuation_pc;
+ currentContinuation_op = state.continuation_op;
+ pc += 2 * INDEX_LEN;
+ pc = pc + getOffset(program, pc);
+ break;
+ }
+ int new_min = state.min, new_max = state.max;
+ if (new_min != 0) new_min--;
+ if (new_max != -1) new_max--;
+ if (new_max == 0) {
+ result = true;
+ currentContinuation_pc = state.continuation_pc;
+ currentContinuation_op = state.continuation_op;
+ pc += 2 * INDEX_LEN;
+ pc = pc + getOffset(program, pc);
+ break;
+ }
+ pushProgState(gData, new_min, new_max, null,
+ state.continuation_pc,
+ state.continuation_op);
+ currentContinuation_op = REOP_REPEAT;
+ currentContinuation_pc = pc;
+ pushBackTrackState(gData, REOP_REPEAT, pc);
+ int parenCount = getIndex(program, pc);
+ pc += INDEX_LEN;
+ int parenIndex = getIndex(program, pc);
+ pc += 2 * INDEX_LEN;
+ op = program[pc++];
+ for (int k = 0; k < parenCount; k++) {
+ gData.set_parens(parenIndex + k, -1, 0);
+ }
+ }
+ }
+ continue;
+
+ case REOP_MINIMALREPEAT:
+ {
+ REProgState state = popProgState(gData);
+ if (!result) {
+ //
+ // Non-greedy failure - try to consume another child.
+ //
+ if (state.max == -1 || state.max > 0) {
+ pushProgState(gData, state.min, state.max, null,
+ state.continuation_pc,
+ state.continuation_op);
+ currentContinuation_op = REOP_MINIMALREPEAT;
+ currentContinuation_pc = pc;
+ int parenCount = getIndex(program, pc);
+ pc += INDEX_LEN;
+ int parenIndex = getIndex(program, pc);
+ pc += 2 * INDEX_LEN;
+ for (int k = 0; k < parenCount; k++) {
+ gData.set_parens(parenIndex + k, -1, 0);
+ }
+ op = program[pc++];
+ continue;
+ } else {
+ // Don't need to adjust pc since we're going to pop.
+ currentContinuation_pc = state.continuation_pc;
+ currentContinuation_op = state.continuation_op;
+ break;
+ }
+ } else {
+ if (state.min == 0 && gData.cp == state.index) {
+ // Matched an empty string, that'll get us nowhere.
+ result = false;
+ currentContinuation_pc = state.continuation_pc;
+ currentContinuation_op = state.continuation_op;
+ break;
+ }
+ int new_min = state.min, new_max = state.max;
+ if (new_min != 0) new_min--;
+ if (new_max != -1) new_max--;
+ pushProgState(gData, new_min, new_max, null,
+ state.continuation_pc,
+ state.continuation_op);
+ if (new_min != 0) {
+ currentContinuation_op = REOP_MINIMALREPEAT;
+ currentContinuation_pc = pc;
+ int parenCount = getIndex(program, pc);
+ pc += INDEX_LEN;
+ int parenIndex = getIndex(program, pc);
+ pc += 2 * INDEX_LEN;
+ for (int k = 0; k < parenCount; k++) {
+ gData.set_parens(parenIndex + k, -1, 0);
+ }
+ op = program[pc++];
+ } else {
+ currentContinuation_pc = state.continuation_pc;
+ currentContinuation_op = state.continuation_op;
+ pushBackTrackState(gData, REOP_MINIMALREPEAT, pc);
+ popProgState(gData);
+ pc += 2 * INDEX_LEN;
+ pc = pc + getOffset(program, pc);
+ op = program[pc++];
+ }
+ continue;
+ }
+ }
+
+ case REOP_END:
+ return true;
+
+ default:
+ throw Kit.codeBug();
+
+ }
+ /*
+ * If the match failed and there's a backtrack option, take it.
+ * Otherwise this is a complete and utter failure.
+ */
+ if (!result) {
+ REBackTrackData backTrackData = gData.backTrackStackTop;
+ if (backTrackData != null) {
+ gData.backTrackStackTop = backTrackData.previous;
+
+ gData.lastParen = backTrackData.lastParen;
+
+ // XXX: If backTrackData will no longer be used, then
+ // there is no need to clone backTrackData.parens
+ if (backTrackData.parens != null) {
+ gData.parens = backTrackData.parens.clone();
+ }
+
+ gData.cp = backTrackData.cp;
+
+ gData.stateStackTop = backTrackData.stateStackTop;
+
+ currentContinuation_op
+ = gData.stateStackTop.continuation_op;
+ currentContinuation_pc
+ = gData.stateStackTop.continuation_pc;
+ pc = backTrackData.continuation_pc;
+ op = backTrackData.continuation_op;
+ continue;
+ }
+ else
+ return false;
+ }
+
+ op = program[pc++];
+ }
+
+ }
+
+ private static boolean
+ matchRegExp(REGlobalData gData, RECompiled re,
+ char[] chars, int start, int end, boolean multiline)
+ {
+ if (re.parenCount != 0) {
+ gData.parens = new long[re.parenCount];
+ } else {
+ gData.parens = null;
+ }
+
+ gData.backTrackStackTop = null;
+
+ gData.stateStackTop = null;
+
+ gData.multiline = multiline;
+ gData.regexp = re;
+ gData.lastParen = 0;
+
+ int anchorCh = gData.regexp.anchorCh;
+ //
+ // have to include the position beyond the last character
+ // in order to detect end-of-input/line condition
+ //
+ for (int i = start; i <= end; ++i) {
+ //
+ // If the first node is a literal match, step the index into
+ // the string until that match is made, or fail if it can't be
+ // found at all.
+ //
+ if (anchorCh >= 0) {
+ for (;;) {
+ if (i == end) {
+ return false;
+ }
+ char matchCh = chars[i];
+ if (matchCh == anchorCh ||
+ ((gData.regexp.flags & JSREG_FOLD) != 0
+ && upcase(matchCh) == upcase((char)anchorCh)))
+ {
+ break;
+ }
+ ++i;
+ }
+ }
+ gData.cp = i;
+ for (int j = 0; j < re.parenCount; j++) {
+ gData.set_parens(j, -1, 0);
+ }
+ boolean result = executeREBytecode(gData, chars, end);
+
+ gData.backTrackStackTop = null;
+ gData.stateStackTop = null;
+ if (result) {
+ gData.skipped = i - start;
+ return true;
+ }
+ }
+ return false;
+ }
+
+ /*
+ * indexp is assumed to be an array of length 1
+ */
+ Object executeRegExp(Context cx, Scriptable scopeObj, RegExpImpl res,
+ String str, int indexp[], int matchType)
+ {
+ REGlobalData gData = new REGlobalData();
+
+ int start = indexp[0];
+ char[] charArray = str.toCharArray();
+ int end = charArray.length;
+ if (start > end)
+ start = end;
+ //
+ // Call the recursive matcher to do the real work.
+ //
+ boolean matches = matchRegExp(gData, re, charArray, start, end,
+ res.multiline);
+ if (!matches) {
+ if (matchType != PREFIX) return null;
+ return Undefined.instance;
+ }
+ int index = gData.cp;
+ int i = index;
+ indexp[0] = i;
+ int matchlen = i - (start + gData.skipped);
+ int ep = index;
+ index -= matchlen;
+ Object result;
+ Scriptable obj;
+
+ if (matchType == TEST) {
+ /*
+ * Testing for a match and updating cx.regExpImpl: don't allocate
+ * an array object, do return true.
+ */
+ result = Boolean.TRUE;
+ obj = null;
+ }
+ else {
+ /*
+ * The array returned on match has element 0 bound to the matched
+ * string, elements 1 through re.parenCount bound to the paren
+ * matches, an index property telling the length of the left context,
+ * and an input property referring to the input string.
+ */
+ Scriptable scope = getTopLevelScope(scopeObj);
+ result = ScriptRuntime.newObject(cx, scope, "Array", null);
+ obj = (Scriptable) result;
+
+ String matchstr = new String(charArray, index, matchlen);
+ obj.put(0, obj, matchstr);
+ }
+
+ if (re.parenCount == 0) {
+ res.parens = null;
+ res.lastParen = SubString.emptySubString;
+ } else {
+ SubString parsub = null;
+ int num;
+ res.parens = new SubString[re.parenCount];
+ for (num = 0; num < re.parenCount; num++) {
+ int cap_index = gData.parens_index(num);
+ String parstr;
+ if (cap_index != -1) {
+ int cap_length = gData.parens_length(num);
+ parsub = new SubString(charArray, cap_index, cap_length);
+ res.parens[num] = parsub;
+ if (matchType == TEST) continue;
+ parstr = parsub.toString();
+ obj.put(num+1, obj, parstr);
+ }
+ else {
+ if (matchType != TEST)
+ obj.put(num+1, obj, Undefined.instance);
+ }
+ }
+ res.lastParen = parsub;
+ }
+
+ if (! (matchType == TEST)) {
+ /*
+ * Define the index and input properties last for better for/in loop
+ * order (so they come after the elements).
+ */
+ obj.put("index", obj, new Integer(start + gData.skipped));
+ obj.put("input", obj, str);
+ }
+
+ if (res.lastMatch == null) {
+ res.lastMatch = new SubString();
+ res.leftContext = new SubString();
+ res.rightContext = new SubString();
+ }
+ res.lastMatch.charArray = charArray;
+ res.lastMatch.index = index;
+ res.lastMatch.length = matchlen;
+
+ res.leftContext.charArray = charArray;
+ if (cx.getLanguageVersion() == Context.VERSION_1_2) {
+ /*
+ * JS1.2 emulated Perl4.0.1.8 (patch level 36) for global regexps used
+ * in scalar contexts, and unintentionally for the string.match "list"
+ * psuedo-context. On "hi there bye", the following would result:
+ *
+ * Language while(/ /g){print("$`");} s/ /$`/g
+ * perl4.036 "hi", "there" "hihitherehi therebye"
+ * perl5 "hi", "hi there" "hihitherehi therebye"
+ * js1.2 "hi", "there" "hihitheretherebye"
+ *
+ * Insofar as JS1.2 always defined $` as "left context from the last
+ * match" for global regexps, it was more consistent than perl4.
+ */
+ res.leftContext.index = start;
+ res.leftContext.length = gData.skipped;
+ } else {
+ /*
+ * For JS1.3 and ECMAv2, emulate Perl5 exactly:
+ *
+ * js1.3 "hi", "hi there" "hihitherehi therebye"
+ */
+ res.leftContext.index = 0;
+ res.leftContext.length = start + gData.skipped;
+ }
+
+ res.rightContext.charArray = charArray;
+ res.rightContext.index = ep;
+ res.rightContext.length = end - ep;
+
+ return result;
+ }
+
+ int getFlags()
+ {
+ return re.flags;
+ }
+
+ private static void reportWarning(Context cx, String messageId, String arg)
+ {
+ if (cx.hasFeature(Context.FEATURE_STRICT_MODE)) {
+ String msg = ScriptRuntime.getMessage1(messageId, arg);
+ Context.reportWarning(msg);
+ }
+ }
+
+ private static void reportError(String messageId, String arg)
+ {
+ String msg = ScriptRuntime.getMessage1(messageId, arg);
+ throw ScriptRuntime.constructError("SyntaxError", msg);
+ }
+
+// #string_id_map#
+
+ private static final int
+ Id_lastIndex = 1,
+ Id_source = 2,
+ Id_global = 3,
+ Id_ignoreCase = 4,
+ Id_multiline = 5,
+
+ MAX_INSTANCE_ID = 5;
+
+ protected int getMaxInstanceId()
+ {
+ return MAX_INSTANCE_ID;
+ }
+
+ protected int findInstanceIdInfo(String s)
+ {
+ int id;
+// #generated# Last update: 2007-05-09 08:16:24 EDT
+ L0: { id = 0; String X = null; int c;
+ int s_length = s.length();
+ if (s_length==6) {
+ c=s.charAt(0);
+ if (c=='g') { X="global";id=Id_global; }
+ else if (c=='s') { X="source";id=Id_source; }
+ }
+ else if (s_length==9) {
+ c=s.charAt(0);
+ if (c=='l') { X="lastIndex";id=Id_lastIndex; }
+ else if (c=='m') { X="multiline";id=Id_multiline; }
+ }
+ else if (s_length==10) { X="ignoreCase";id=Id_ignoreCase; }
+ if (X!=null && X!=s && !X.equals(s)) id = 0;
+ break L0;
+ }
+// #/generated#
+// #/string_id_map#
+
+ if (id == 0) return super.findInstanceIdInfo(s);
+
+ int attr;
+ switch (id) {
+ case Id_lastIndex:
+ attr = PERMANENT | DONTENUM;
+ break;
+ case Id_source:
+ case Id_global:
+ case Id_ignoreCase:
+ case Id_multiline:
+ attr = PERMANENT | READONLY | DONTENUM;
+ break;
+ default:
+ throw new IllegalStateException();
+ }
+ return instanceIdInfo(attr, id);
+ }
+
+ protected String getInstanceIdName(int id)
+ {
+ switch (id) {
+ case Id_lastIndex: return "lastIndex";
+ case Id_source: return "source";
+ case Id_global: return "global";
+ case Id_ignoreCase: return "ignoreCase";
+ case Id_multiline: return "multiline";
+ }
+ return super.getInstanceIdName(id);
+ }
+
+ protected Object getInstanceIdValue(int id)
+ {
+ switch (id) {
+ case Id_lastIndex:
+ return ScriptRuntime.wrapNumber(lastIndex);
+ case Id_source:
+ return new String(re.source);
+ case Id_global:
+ return ScriptRuntime.wrapBoolean((re.flags & JSREG_GLOB) != 0);
+ case Id_ignoreCase:
+ return ScriptRuntime.wrapBoolean((re.flags & JSREG_FOLD) != 0);
+ case Id_multiline:
+ return ScriptRuntime.wrapBoolean((re.flags & JSREG_MULTILINE) != 0);
+ }
+ return super.getInstanceIdValue(id);
+ }
+
+ protected void setInstanceIdValue(int id, Object value)
+ {
+ if (id == Id_lastIndex) {
+ lastIndex = ScriptRuntime.toNumber(value);
+ return;
+ }
+ super.setInstanceIdValue(id, value);
+ }
+
+ protected void initPrototypeId(int id)
+ {
+ String s;
+ int arity;
+ switch (id) {
+ case Id_compile: arity=1; s="compile"; break;
+ case Id_toString: arity=0; s="toString"; break;
+ case Id_toSource: arity=0; s="toSource"; break;
+ case Id_exec: arity=1; s="exec"; break;
+ case Id_test: arity=1; s="test"; break;
+ case Id_prefix: arity=1; s="prefix"; break;
+ default: throw new IllegalArgumentException(String.valueOf(id));
+ }
+ initPrototypeMethod(REGEXP_TAG, id, s, arity);
+ }
+
+ public Object execIdCall(IdFunctionObject f, Context cx, Scriptable scope,
+ Scriptable thisObj, Object[] args)
+ {
+ if (!f.hasTag(REGEXP_TAG)) {
+ return super.execIdCall(f, cx, scope, thisObj, args);
+ }
+ int id = f.methodId();
+ switch (id) {
+ case Id_compile:
+ return realThis(thisObj, f).compile(cx, scope, args);
+
+ case Id_toString:
+ case Id_toSource:
+ return realThis(thisObj, f).toString();
+
+ case Id_exec:
+ return realThis(thisObj, f).execSub(cx, scope, args, MATCH);
+
+ case Id_test: {
+ Object x = realThis(thisObj, f).execSub(cx, scope, args, TEST);
+ return Boolean.TRUE.equals(x) ? Boolean.TRUE : Boolean.FALSE;
+ }
+
+ case Id_prefix:
+ return realThis(thisObj, f).execSub(cx, scope, args, PREFIX);
+ }
+ throw new IllegalArgumentException(String.valueOf(id));
+ }
+
+ private static NativeRegExp realThis(Scriptable thisObj, IdFunctionObject f)
+ {
+ if (!(thisObj instanceof NativeRegExp))
+ throw incompatibleCallError(f);
+ return (NativeRegExp)thisObj;
+ }
+
+// #string_id_map#
+ protected int findPrototypeId(String s)
+ {
+ int id;
+// #generated# Last update: 2007-05-09 08:16:24 EDT
+ L0: { id = 0; String X = null; int c;
+ L: switch (s.length()) {
+ case 4: c=s.charAt(0);
+ if (c=='e') { X="exec";id=Id_exec; }
+ else if (c=='t') { X="test";id=Id_test; }
+ break L;
+ case 6: X="prefix";id=Id_prefix; break L;
+ case 7: X="compile";id=Id_compile; break L;
+ case 8: c=s.charAt(3);
+ if (c=='o') { X="toSource";id=Id_toSource; }
+ else if (c=='t') { X="toString";id=Id_toString; }
+ break L;
+ }
+ if (X!=null && X!=s && !X.equals(s)) id = 0;
+ break L0;
+ }
+// #/generated#
+ return id;
+ }
+
+ private static final int
+ Id_compile = 1,
+ Id_toString = 2,
+ Id_toSource = 3,
+ Id_exec = 4,
+ Id_test = 5,
+ Id_prefix = 6,
+
+ MAX_PROTOTYPE_ID = 6;
+
+// #/string_id_map#
+
+ private RECompiled re;
+ double lastIndex; /* index after last match, for //g iterator */
+
+} // class NativeRegExp
+
+class RECompiled implements Serializable
+{
+ static final long serialVersionUID = -6144956577595844213L;
+
+ char []source; /* locked source string, sans // */
+ int parenCount; /* number of parenthesized submatches */
+ int flags; /* flags */
+ byte[] program; /* regular expression bytecode */
+ int classCount; /* count [...] bitmaps */
+ RECharSet[] classList; /* list of [...] bitmaps */
+ int anchorCh = -1; /* if >= 0, then re starts with this literal char */
+}
+
+class RENode {
+
+ RENode(byte op)
+ {
+ this.op = op;
+ }
+
+ byte op; /* r.e. op bytecode */
+ RENode next; /* next in concatenation order */
+ RENode kid; /* first operand */
+
+ RENode kid2; /* second operand */
+ int num; /* could be a number */
+ int parenIndex; /* or a parenthesis index */
+
+ /* or a range */
+ int min;
+ int max;
+ int parenCount;
+ boolean greedy;
+
+ /* or a character class */
+ int startIndex;
+ int kidlen; /* length of string at kid, in chars */
+ int bmsize; /* bitmap size, based on max char code */
+ int index; /* index into class list */
+
+ /* or a literal sequence */
+ char chr; /* of one character */
+ int length; /* or many (via the index) */
+ int flatIndex; /* which is -1 if not sourced */
+
+}
+
+class CompilerState {
+
+ CompilerState(Context cx, char[] source, int length, int flags)
+ {
+ this.cx = cx;
+ this.cpbegin = source;
+ this.cp = 0;
+ this.cpend = length;
+ this.flags = flags;
+ this.parenCount = 0;
+ this.classCount = 0;
+ this.progLength = 0;
+ }
+
+ Context cx;
+ char cpbegin[];
+ int cpend;
+ int cp;
+ int flags;
+ int parenCount;
+ int parenNesting;
+ int classCount; /* number of [] encountered */
+ int progLength; /* estimated bytecode length */
+ RENode result;
+}
+
+class REProgState
+{
+ REProgState(REProgState previous, int min, int max, int index,
+ REBackTrackData backTrack,
+ int continuation_pc, int continuation_op)
+ {
+ this.previous = previous;
+ this.min = min;
+ this.max = max;
+ this.index = index;
+ this.continuation_op = continuation_op;
+ this.continuation_pc = continuation_pc;
+ this.backTrack = backTrack;
+ }
+
+ REProgState previous; // previous state in stack
+
+ int min; /* current quantifier min */
+ int max; /* current quantifier max */
+ int index; /* progress in text */
+ int continuation_op;
+ int continuation_pc;
+ REBackTrackData backTrack; // used by ASSERT_ to recover state
+}
+
+class REBackTrackData {
+
+ REBackTrackData(REGlobalData gData, int op, int pc)
+ {
+ previous = gData.backTrackStackTop;
+ continuation_op = op;
+ continuation_pc = pc;
+ lastParen = gData.lastParen;
+ if (gData.parens != null) {
+ parens = gData.parens.clone();
+ }
+ cp = gData.cp;
+ stateStackTop = gData.stateStackTop;
+ }
+
+ REBackTrackData previous;
+
+ int continuation_op; /* where to backtrack to */
+ int continuation_pc;
+ int lastParen;
+ long[] parens; /* parenthesis captures */
+ int cp; /* char buffer index */
+ REProgState stateStackTop; /* state of op that backtracked */
+}
+
+class REGlobalData {
+ boolean multiline;
+ RECompiled regexp; /* the RE in execution */
+ int lastParen; /* highest paren set so far */
+ int skipped; /* chars skipped anchoring this r.e. */
+
+ int cp; /* char buffer index */
+ long[] parens; /* parens captures */
+
+ REProgState stateStackTop; /* stack of state of current ancestors */
+
+ REBackTrackData backTrackStackTop; /* last matched-so-far position */
+
+
+ /**
+ * Get start of parenthesis capture contents, -1 for empty.
+ */
+ int parens_index(int i)
+ {
+ return (int)(parens[i]);
+ }
+
+ /**
+ * Get length of parenthesis capture contents.
+ */
+ int parens_length(int i)
+ {
+ return (int)(parens[i] >>> 32);
+ }
+
+ void set_parens(int i, int index, int length)
+ {
+ parens[i] = (index & 0xffffffffL) | ((long)length << 32);
+ }
+
+}
+
+/*
+ * This struct holds a bitmap representation of a class from a regexp.
+ * There's a list of these referenced by the classList field in the NativeRegExp
+ * struct below. The initial state has startIndex set to the offset in the
+ * original regexp source of the beginning of the class contents. The first
+ * use of the class converts the source representation into a bitmap.
+ *
+ */
+final class RECharSet implements Serializable
+{
+ static final long serialVersionUID = 7931787979395898394L;
+
+ RECharSet(int length, int startIndex, int strlength)
+ {
+ this.length = length;
+ this.startIndex = startIndex;
+ this.strlength = strlength;
+ }
+
+ int length;
+ int startIndex;
+ int strlength;
+
+ volatile transient boolean converted;
+ volatile transient boolean sense;
+ volatile transient byte[] bits;
+}
+
+
diff --git a/infrastructure/rhino1_7R1/src/org/mozilla/javascript/regexp/NativeRegExpCtor.java b/infrastructure/rhino1_7R1/src/org/mozilla/javascript/regexp/NativeRegExpCtor.java
new file mode 100644
index 0000000..808d62d
--- /dev/null
+++ b/infrastructure/rhino1_7R1/src/org/mozilla/javascript/regexp/NativeRegExpCtor.java
@@ -0,0 +1,289 @@
+/* -*- Mode: java; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 4 -*-
+ *
+ * ***** BEGIN LICENSE BLOCK *****
+ * Version: MPL 1.1/GPL 2.0
+ *
+ * The contents of this file are subject to the Mozilla Public License Version
+ * 1.1 (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ * http://www.mozilla.org/MPL/
+ *
+ * Software distributed under the License is distributed on an "AS IS" basis,
+ * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
+ * for the specific language governing rights and limitations under the
+ * License.
+ *
+ * The Original Code is Rhino code, released
+ * May 6, 1998.
+ *
+ * The Initial Developer of the Original Code is
+ * Netscape Communications Corporation.
+ * Portions created by the Initial Developer are Copyright (C) 1997-1999
+ * the Initial Developer. All Rights Reserved.
+ *
+ * Contributor(s):
+ * Norris Boyd
+ * Igor Bukanov
+ * Brendan Eich
+ *
+ * Alternatively, the contents of this file may be used under the terms of
+ * the GNU General Public License Version 2 or later (the "GPL"), in which
+ * case the provisions of the GPL are applicable instead of those above. If
+ * you wish to allow use of your version of this file only under the terms of
+ * the GPL and not to allow others to use your version of this file under the
+ * MPL, indicate your decision by deleting the provisions above and replacing
+ * them with the notice and other provisions required by the GPL. If you do
+ * not delete the provisions above, a recipient may use your version of this
+ * file under either the MPL or the GPL.
+ *
+ * ***** END LICENSE BLOCK ***** */
+
+package org.mozilla.javascript.regexp;
+
+import org.mozilla.javascript.*;
+
+/**
+ * This class implements the RegExp constructor native object.
+ *
+ * Revision History:
+ * Implementation in C by Brendan Eich
+ * Initial port to Java by Norris Boyd from jsregexp.c version 1.36
+ * Merged up to version 1.38, which included Unicode support.
+ * Merged bug fixes in version 1.39.
+ * Merged JSFUN13_BRANCH changes up to 1.32.2.11
+ *
+ * @author Brendan Eich
+ * @author Norris Boyd
+ */
+class NativeRegExpCtor extends BaseFunction
+{
+ static final long serialVersionUID = -5733330028285400526L;
+
+ NativeRegExpCtor()
+ {
+ }
+
+ public String getFunctionName()
+ {
+ return "RegExp";
+ }
+
+ public Object call(Context cx, Scriptable scope, Scriptable thisObj,
+ Object[] args)
+ {
+ if (args.length > 0 && args[0] instanceof NativeRegExp &&
+ (args.length == 1 || args[1] == Undefined.instance))
+ {
+ return args[0];
+ }
+ return construct(cx, scope, args);
+ }
+
+ public Scriptable construct(Context cx, Scriptable scope, Object[] args)
+ {
+ NativeRegExp re = new NativeRegExp();
+ re.compile(cx, scope, args);
+ ScriptRuntime.setObjectProtoAndParent(re, scope);
+ return re;
+ }
+
+ private static RegExpImpl getImpl()
+ {
+ Context cx = Context.getCurrentContext();
+ return (RegExpImpl) ScriptRuntime.getRegExpProxy(cx);
+ }
+
+// #string_id_map#
+
+ private static final int
+ Id_multiline = 1,
+ Id_STAR = 2, // #string=$*#
+
+ Id_input = 3,
+ Id_UNDERSCORE = 4, // #string=$_#
+
+ Id_lastMatch = 5,
+ Id_AMPERSAND = 6, // #string=$&#
+
+ Id_lastParen = 7,
+ Id_PLUS = 8, // #string=$+#
+
+ Id_leftContext = 9,
+ Id_BACK_QUOTE = 10, // #string=$`#
+
+ Id_rightContext = 11,
+ Id_QUOTE = 12, // #string=$'#
+
+ DOLLAR_ID_BASE = 12;
+
+ private static final int
+ Id_DOLLAR_1 = DOLLAR_ID_BASE + 1, // #string=$1#
+ Id_DOLLAR_2 = DOLLAR_ID_BASE + 2, // #string=$2#
+ Id_DOLLAR_3 = DOLLAR_ID_BASE + 3, // #string=$3#
+ Id_DOLLAR_4 = DOLLAR_ID_BASE + 4, // #string=$4#
+ Id_DOLLAR_5 = DOLLAR_ID_BASE + 5, // #string=$5#
+ Id_DOLLAR_6 = DOLLAR_ID_BASE + 6, // #string=$6#
+ Id_DOLLAR_7 = DOLLAR_ID_BASE + 7, // #string=$7#
+ Id_DOLLAR_8 = DOLLAR_ID_BASE + 8, // #string=$8#
+ Id_DOLLAR_9 = DOLLAR_ID_BASE + 9, // #string=$9#
+
+ MAX_INSTANCE_ID = DOLLAR_ID_BASE + 9;
+
+ protected int getMaxInstanceId()
+ {
+ return super.getMaxInstanceId() + MAX_INSTANCE_ID;
+ }
+
+ protected int findInstanceIdInfo(String s) {
+ int id;
+// #generated# Last update: 2001-05-24 16:09:31 GMT+02:00
+ L0: { id = 0; String X = null; int c;
+ L: switch (s.length()) {
+ case 2: switch (s.charAt(1)) {
+ case '&': if (s.charAt(0)=='$') {id=Id_AMPERSAND; break L0;} break L;
+ case '\'': if (s.charAt(0)=='$') {id=Id_QUOTE; break L0;} break L;
+ case '*': if (s.charAt(0)=='$') {id=Id_STAR; break L0;} break L;
+ case '+': if (s.charAt(0)=='$') {id=Id_PLUS; break L0;} break L;
+ case '1': if (s.charAt(0)=='$') {id=Id_DOLLAR_1; break L0;} break L;
+ case '2': if (s.charAt(0)=='$') {id=Id_DOLLAR_2; break L0;} break L;
+ case '3': if (s.charAt(0)=='$') {id=Id_DOLLAR_3; break L0;} break L;
+ case '4': if (s.charAt(0)=='$') {id=Id_DOLLAR_4; break L0;} break L;
+ case '5': if (s.charAt(0)=='$') {id=Id_DOLLAR_5; break L0;} break L;
+ case '6': if (s.charAt(0)=='$') {id=Id_DOLLAR_6; break L0;} break L;
+ case '7': if (s.charAt(0)=='$') {id=Id_DOLLAR_7; break L0;} break L;
+ case '8': if (s.charAt(0)=='$') {id=Id_DOLLAR_8; break L0;} break L;
+ case '9': if (s.charAt(0)=='$') {id=Id_DOLLAR_9; break L0;} break L;
+ case '_': if (s.charAt(0)=='$') {id=Id_UNDERSCORE; break L0;} break L;
+ case '`': if (s.charAt(0)=='$') {id=Id_BACK_QUOTE; break L0;} break L;
+ } break L;
+ case 5: X="input";id=Id_input; break L;
+ case 9: c=s.charAt(4);
+ if (c=='M') { X="lastMatch";id=Id_lastMatch; }
+ else if (c=='P') { X="lastParen";id=Id_lastParen; }
+ else if (c=='i') { X="multiline";id=Id_multiline; }
+ break L;
+ case 11: X="leftContext";id=Id_leftContext; break L;
+ case 12: X="rightContext";id=Id_rightContext; break L;
+ }
+ if (X!=null && X!=s && !X.equals(s)) id = 0;
+ }
+// #/generated#
+
+ if (id == 0) return super.findInstanceIdInfo(s);
+
+ int attr;
+ switch (id) {
+ case Id_multiline:
+ case Id_STAR:
+ case Id_input:
+ case Id_UNDERSCORE:
+ attr = PERMANENT;
+ break;
+ default:
+ attr = PERMANENT | READONLY;
+ break;
+ }
+
+ return instanceIdInfo(attr, super.getMaxInstanceId() + id);
+ }
+
+// #/string_id_map#
+
+ protected String getInstanceIdName(int id)
+ {
+ int shifted = id - super.getMaxInstanceId();
+ if (1 <= shifted && shifted <= MAX_INSTANCE_ID) {
+ switch (shifted) {
+ case Id_multiline: return "multiline";
+ case Id_STAR: return "$*";
+
+ case Id_input: return "input";
+ case Id_UNDERSCORE: return "$_";
+
+ case Id_lastMatch: return "lastMatch";
+ case Id_AMPERSAND: return "$&";
+
+ case Id_lastParen: return "lastParen";
+ case Id_PLUS: return "$+";
+
+ case Id_leftContext: return "leftContext";
+ case Id_BACK_QUOTE: return "$`";
+
+ case Id_rightContext: return "rightContext";
+ case Id_QUOTE: return "$'";
+ }
+ // Must be one of $1..$9, convert to 0..8
+ int substring_number = shifted - DOLLAR_ID_BASE - 1;
+ char[] buf = { '$', (char)('1' + substring_number) };
+ return new String(buf);
+ }
+ return super.getInstanceIdName(id);
+ }
+
+ protected Object getInstanceIdValue(int id)
+ {
+ int shifted = id - super.getMaxInstanceId();
+ if (1 <= shifted && shifted <= MAX_INSTANCE_ID) {
+ RegExpImpl impl = getImpl();
+ Object stringResult;
+ switch (shifted) {
+ case Id_multiline:
+ case Id_STAR:
+ return ScriptRuntime.wrapBoolean(impl.multiline);
+
+ case Id_input:
+ case Id_UNDERSCORE:
+ stringResult = impl.input;
+ break;
+
+ case Id_lastMatch:
+ case Id_AMPERSAND:
+ stringResult = impl.lastMatch;
+ break;
+
+ case Id_lastParen:
+ case Id_PLUS:
+ stringResult = impl.lastParen;
+ break;
+
+ case Id_leftContext:
+ case Id_BACK_QUOTE:
+ stringResult = impl.leftContext;
+ break;
+
+ case Id_rightContext:
+ case Id_QUOTE:
+ stringResult = impl.rightContext;
+ break;
+
+ default:
+ {
+ // Must be one of $1..$9, convert to 0..8
+ int substring_number = shifted - DOLLAR_ID_BASE - 1;
+ stringResult = impl.getParenSubString(substring_number);
+ break;
+ }
+ }
+ return (stringResult == null) ? "" : stringResult.toString();
+ }
+ return super.getInstanceIdValue(id);
+ }
+
+ protected void setInstanceIdValue(int id, Object value)
+ {
+ int shifted = id - super.getMaxInstanceId();
+ switch (shifted) {
+ case Id_multiline:
+ case Id_STAR:
+ getImpl().multiline = ScriptRuntime.toBoolean(value);
+ return;
+
+ case Id_input:
+ case Id_UNDERSCORE:
+ getImpl().input = ScriptRuntime.toString(value);
+ return;
+ }
+ super.setInstanceIdValue(id, value);
+ }
+
+}
diff --git a/infrastructure/rhino1_7R1/src/org/mozilla/javascript/regexp/RegExpImpl.java b/infrastructure/rhino1_7R1/src/org/mozilla/javascript/regexp/RegExpImpl.java
new file mode 100644
index 0000000..4b0a303
--- /dev/null
+++ b/infrastructure/rhino1_7R1/src/org/mozilla/javascript/regexp/RegExpImpl.java
@@ -0,0 +1,541 @@
+/* -*- Mode: java; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 4 -*-
+ *
+ * ***** BEGIN LICENSE BLOCK *****
+ * Version: MPL 1.1/GPL 2.0
+ *
+ * The contents of this file are subject to the Mozilla Public License Version
+ * 1.1 (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ * http://www.mozilla.org/MPL/
+ *
+ * Software distributed under the License is distributed on an "AS IS" basis,
+ * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
+ * for the specific language governing rights and limitations under the
+ * License.
+ *
+ * The Original Code is Rhino code, released
+ * May 6, 1998.
+ *
+ * The Initial Developer of the Original Code is
+ * Netscape Communications Corporation.
+ * Portions created by the Initial Developer are Copyright (C) 1997-1999
+ * the Initial Developer. All Rights Reserved.
+ *
+ * Contributor(s):
+ *
+ * Alternatively, the contents of this file may be used under the terms of
+ * the GNU General Public License Version 2 or later (the "GPL"), in which
+ * case the provisions of the GPL are applicable instead of those above. If
+ * you wish to allow use of your version of this file only under the terms of
+ * the GPL and not to allow others to use your version of this file under the
+ * MPL, indicate your decision by deleting the provisions above and replacing
+ * them with the notice and other provisions required by the GPL. If you do
+ * not delete the provisions above, a recipient may use your version of this
+ * file under either the MPL or the GPL.
+ *
+ * ***** END LICENSE BLOCK ***** */
+
+package org.mozilla.javascript.regexp;
+
+import org.mozilla.javascript.*;
+
+/**
+ *
+ */
+public class RegExpImpl implements RegExpProxy {
+
+ public boolean isRegExp(Scriptable obj) {
+ return obj instanceof NativeRegExp;
+ }
+
+ public Object compileRegExp(Context cx, String source, String flags)
+ {
+ return NativeRegExp.compileRE(cx, source, flags, false);
+ }
+
+ public Scriptable wrapRegExp(Context cx, Scriptable scope,
+ Object compiled)
+ {
+ return new NativeRegExp(scope, compiled);
+ }
+
+ public Object action(Context cx, Scriptable scope,
+ Scriptable thisObj, Object[] args,
+ int actionType)
+ {
+ GlobData data = new GlobData();
+ data.mode = actionType;
+
+ switch (actionType) {
+ case RA_MATCH:
+ {
+ Object rval;
+ data.optarg = 1;
+ rval = matchOrReplace(cx, scope, thisObj, args,
+ this, data, false);
+ return data.arrayobj == null ? rval : data.arrayobj;
+ }
+
+ case RA_SEARCH:
+ data.optarg = 1;
+ return matchOrReplace(cx, scope, thisObj, args,
+ this, data, false);
+
+ case RA_REPLACE:
+ {
+ Object arg1 = args.length < 2 ? Undefined.instance : args[1];
+ String repstr = null;
+ Function lambda = null;
+ if (arg1 instanceof Function) {
+ lambda = (Function) arg1;
+ } else {
+ repstr = ScriptRuntime.toString(arg1);
+ }
+
+ data.optarg = 2;
+ data.lambda = lambda;
+ data.repstr = repstr;
+ data.dollar = repstr == null ? -1 : repstr.indexOf('$');
+ data.charBuf = null;
+ data.leftIndex = 0;
+ Object val = matchOrReplace(cx, scope, thisObj, args,
+ this, data, true);
+ SubString rc = this.rightContext;
+
+ if (data.charBuf == null) {
+ if (data.global || val == null
+ || !val.equals(Boolean.TRUE))
+ {
+ /* Didn't match even once. */
+ return data.str;
+ }
+ SubString lc = this.leftContext;
+ replace_glob(data, cx, scope, this, lc.index, lc.length);
+ }
+ data.charBuf.append(rc.charArray, rc.index, rc.length);
+ return data.charBuf.toString();
+ }
+
+ default:
+ throw Kit.codeBug();
+ }
+ }
+
+ /**
+ * Analog of C match_or_replace.
+ */
+ private static Object matchOrReplace(Context cx, Scriptable scope,
+ Scriptable thisObj, Object[] args,
+ RegExpImpl reImpl,
+ GlobData data, boolean forceFlat)
+ {
+ NativeRegExp re;
+
+ String str = ScriptRuntime.toString(thisObj);
+ data.str = str;
+ Scriptable topScope = ScriptableObject.getTopLevelScope(scope);
+
+ if (args.length == 0) {
+ Object compiled = NativeRegExp.compileRE(cx, "", "", false);
+ re = new NativeRegExp(topScope, compiled);
+ } else if (args[0] instanceof NativeRegExp) {
+ re = (NativeRegExp) args[0];
+ } else {
+ String src = ScriptRuntime.toString(args[0]);
+ String opt;
+ if (data.optarg < args.length) {
+ args[0] = src;
+ opt = ScriptRuntime.toString(args[data.optarg]);
+ } else {
+ opt = null;
+ }
+ Object compiled = NativeRegExp.compileRE(cx, src, opt, forceFlat);
+ re = new NativeRegExp(topScope, compiled);
+ }
+ data.regexp = re;
+
+ data.global = (re.getFlags() & NativeRegExp.JSREG_GLOB) != 0;
+ int[] indexp = { 0 };
+ Object result = null;
+ if (data.mode == RA_SEARCH) {
+ result = re.executeRegExp(cx, scope, reImpl,
+ str, indexp, NativeRegExp.TEST);
+ if (result != null && result.equals(Boolean.TRUE))
+ result = new Integer(reImpl.leftContext.length);
+ else
+ result = new Integer(-1);
+ } else if (data.global) {
+ re.lastIndex = 0;
+ for (int count = 0; indexp[0] <= str.length(); count++) {
+ result = re.executeRegExp(cx, scope, reImpl,
+ str, indexp, NativeRegExp.TEST);
+ if (result == null || !result.equals(Boolean.TRUE))
+ break;
+ if (data.mode == RA_MATCH) {
+ match_glob(data, cx, scope, count, reImpl);
+ } else {
+ if (data.mode != RA_REPLACE) Kit.codeBug();
+ SubString lastMatch = reImpl.lastMatch;
+ int leftIndex = data.leftIndex;
+ int leftlen = lastMatch.index - leftIndex;
+ data.leftIndex = lastMatch.index + lastMatch.length;
+ replace_glob(data, cx, scope, reImpl, leftIndex, leftlen);
+ }
+ if (reImpl.lastMatch.length == 0) {
+ if (indexp[0] == str.length())
+ break;
+ indexp[0]++;
+ }
+ }
+ } else {
+ result = re.executeRegExp(cx, scope, reImpl, str, indexp,
+ ((data.mode == RA_REPLACE)
+ ? NativeRegExp.TEST
+ : NativeRegExp.MATCH));
+ }
+
+ return result;
+ }
+
+
+
+ public int find_split(Context cx, Scriptable scope, String target,
+ String separator, Scriptable reObj,
+ int[] ip, int[] matchlen,
+ boolean[] matched, String[][] parensp)
+ {
+ int i = ip[0];
+ int length = target.length();
+ int result;
+
+ int version = cx.getLanguageVersion();
+ NativeRegExp re = (NativeRegExp) reObj;
+ again:
+ while (true) { // imitating C label
+ /* JS1.2 deviated from Perl by never matching at end of string. */
+ int ipsave = ip[0]; // reuse ip to save object creation
+ ip[0] = i;
+ Object ret = re.executeRegExp(cx, scope, this, target, ip,
+ NativeRegExp.TEST);
+ if (ret != Boolean.TRUE) {
+ // Mismatch: ensure our caller advances i past end of string.
+ ip[0] = ipsave;
+ matchlen[0] = 1;
+ matched[0] = false;
+ return length;
+ }
+ i = ip[0];
+ ip[0] = ipsave;
+ matched[0] = true;
+
+ SubString sep = this.lastMatch;
+ matchlen[0] = sep.length;
+ if (matchlen[0] == 0) {
+ /*
+ * Empty string match: never split on an empty
+ * match at the start of a find_split cycle. Same
+ * rule as for an empty global match in
+ * match_or_replace.
+ */
+ if (i == ip[0]) {
+ /*
+ * "Bump-along" to avoid sticking at an empty
+ * match, but don't bump past end of string --
+ * our caller must do that by adding
+ * sep->length to our return value.
+ */
+ if (i == length) {
+ if (version == Context.VERSION_1_2) {
+ matchlen[0] = 1;
+ result = i;
+ }
+ else
+ result = -1;
+ break;
+ }
+ i++;
+ continue again; // imitating C goto
+ }
+ }
+ // PR_ASSERT((size_t)i >= sep->length);
+ result = i - matchlen[0];
+ break;
+ }
+ int size = (parens == null) ? 0 : parens.length;
+ parensp[0] = new String[size];
+ for (int num = 0; num < size; num++) {
+ SubString parsub = getParenSubString(num);
+ parensp[0][num] = parsub.toString();
+ }
+ return result;
+ }
+
+ /**
+ * Analog of REGEXP_PAREN_SUBSTRING in C jsregexp.h.
+ * Assumes zero-based; i.e., for $3, i==2
+ */
+ SubString getParenSubString(int i)
+ {
+ if (parens != null && i < parens.length) {
+ SubString parsub = parens[i];
+ if (parsub != null) {
+ return parsub;
+ }
+ }
+ return SubString.emptySubString;
+ }
+
+ /*
+ * Analog of match_glob() in jsstr.c
+ */
+ private static void match_glob(GlobData mdata, Context cx,
+ Scriptable scope, int count,
+ RegExpImpl reImpl)
+ {
+ if (mdata.arrayobj == null) {
+ Scriptable s = ScriptableObject.getTopLevelScope(scope);
+ mdata.arrayobj = ScriptRuntime.newObject(cx, s, "Array", null);
+ }
+ SubString matchsub = reImpl.lastMatch;
+ String matchstr = matchsub.toString();
+ mdata.arrayobj.put(count, mdata.arrayobj, matchstr);
+ }
+
+ /*
+ * Analog of replace_glob() in jsstr.c
+ */
+ private static void replace_glob(GlobData rdata, Context cx,
+ Scriptable scope, RegExpImpl reImpl,
+ int leftIndex, int leftlen)
+ {
+ int replen;
+ String lambdaStr;
+ if (rdata.lambda != null) {
+ // invoke lambda function with args lastMatch, $1, $2, ... $n,
+ // leftContext.length, whole string.
+ SubString[] parens = reImpl.parens;
+ int parenCount = (parens == null) ? 0 : parens.length;
+ Object[] args = new Object[parenCount + 3];
+ args[0] = reImpl.lastMatch.toString();
+ for (int i=0; i < parenCount; i++) {
+ SubString sub = parens[i];
+ if (sub != null) {
+ args[i+1] = sub.toString();
+ } else {
+ args[i+1] = Undefined.instance;
+ }
+ }
+ args[parenCount+1] = new Integer(reImpl.leftContext.length);
+ args[parenCount+2] = rdata.str;
+ // This is a hack to prevent expose of reImpl data to
+ // JS function which can run new regexps modifing
+ // regexp that are used later by the engine.
+ // TODO: redesign is necessary
+ if (reImpl != ScriptRuntime.getRegExpProxy(cx)) Kit.codeBug();
+ RegExpImpl re2 = new RegExpImpl();
+ re2.multiline = reImpl.multiline;
+ re2.input = reImpl.input;
+ ScriptRuntime.setRegExpProxy(cx, re2);
+ try {
+ Scriptable parent = ScriptableObject.getTopLevelScope(scope);
+ Object result = rdata.lambda.call(cx, parent, parent, args);
+ lambdaStr = ScriptRuntime.toString(result);
+ } finally {
+ ScriptRuntime.setRegExpProxy(cx, reImpl);
+ }
+ replen = lambdaStr.length();
+ } else {
+ lambdaStr = null;
+ replen = rdata.repstr.length();
+ if (rdata.dollar >= 0) {
+ int[] skip = new int[1];
+ int dp = rdata.dollar;
+ do {
+ SubString sub = interpretDollar(cx, reImpl, rdata.repstr,
+ dp, skip);
+ if (sub != null) {
+ replen += sub.length - skip[0];
+ dp += skip[0];
+ } else {
+ ++dp;
+ }
+ dp = rdata.repstr.indexOf('$', dp);
+ } while (dp >= 0);
+ }
+ }
+
+ int growth = leftlen + replen + reImpl.rightContext.length;
+ StringBuffer charBuf = rdata.charBuf;
+ if (charBuf == null) {
+ charBuf = new StringBuffer(growth);
+ rdata.charBuf = charBuf;
+ } else {
+ charBuf.ensureCapacity(rdata.charBuf.length() + growth);
+ }
+
+ charBuf.append(reImpl.leftContext.charArray, leftIndex, leftlen);
+ if (rdata.lambda != null) {
+ charBuf.append(lambdaStr);
+ } else {
+ do_replace(rdata, cx, reImpl);
+ }
+ }
+
+ private static SubString interpretDollar(Context cx, RegExpImpl res,
+ String da, int dp, int[] skip)
+ {
+ char dc;
+ int num, tmp;
+
+ if (da.charAt(dp) != '$') Kit.codeBug();
+
+ /* Allow a real backslash (literal "\\") to escape "$1" etc. */
+ int version = cx.getLanguageVersion();
+ if (version != Context.VERSION_DEFAULT
+ && version <= Context.VERSION_1_4)
+ {
+ if (dp > 0 && da.charAt(dp - 1) == '\\')
+ return null;
+ }
+ int daL = da.length();
+ if (dp + 1 >= daL)
+ return null;
+ /* Interpret all Perl match-induced dollar variables. */
+ dc = da.charAt(dp + 1);
+ if (NativeRegExp.isDigit(dc)) {
+ int cp;
+ if (version != Context.VERSION_DEFAULT
+ && version <= Context.VERSION_1_4)
+ {
+ if (dc == '0')
+ return null;
+ /* Check for overflow to avoid gobbling arbitrary decimal digits. */
+ num = 0;
+ cp = dp;
+ while (++cp < daL && NativeRegExp.isDigit(dc = da.charAt(cp)))
+ {
+ tmp = 10 * num + (dc - '0');
+ if (tmp < num)
+ break;
+ num = tmp;
+ }
+ }
+ else { /* ECMA 3, 1-9 or 01-99 */
+ int parenCount = (res.parens == null) ? 0 : res.parens.length;
+ num = dc - '0';
+ if (num > parenCount)
+ return null;
+ cp = dp + 2;
+ if ((dp + 2) < daL) {
+ dc = da.charAt(dp + 2);
+ if (NativeRegExp.isDigit(dc)) {
+ tmp = 10 * num + (dc - '0');
+ if (tmp <= parenCount) {
+ cp++;
+ num = tmp;
+ }
+ }
+ }
+ if (num == 0) return null; /* $0 or $00 is not valid */
+ }
+ /* Adjust num from 1 $n-origin to 0 array-index-origin. */
+ num--;
+ skip[0] = cp - dp;
+ return res.getParenSubString(num);
+ }
+
+ skip[0] = 2;
+ switch (dc) {
+ case '$':
+ return new SubString("$");
+ case '&':
+ return res.lastMatch;
+ case '+':
+ return res.lastParen;
+ case '`':
+ if (version == Context.VERSION_1_2) {
+ /*
+ * JS1.2 imitated the Perl4 bug where left context at each step
+ * in an iterative use of a global regexp started from last match,
+ * not from the start of the target string. But Perl4 does start
+ * $` at the beginning of the target string when it is used in a
+ * substitution, so we emulate that special case here.
+ */
+ res.leftContext.index = 0;
+ res.leftContext.length = res.lastMatch.index;
+ }
+ return res.leftContext;
+ case '\'':
+ return res.rightContext;
+ }
+ return null;
+ }
+
+ /**
+ * Analog of do_replace in jsstr.c
+ */
+ private static void do_replace(GlobData rdata, Context cx,
+ RegExpImpl regExpImpl)
+ {
+ StringBuffer charBuf = rdata.charBuf;
+ int cp = 0;
+ String da = rdata.repstr;
+ int dp = rdata.dollar;
+ if (dp != -1) {
+ int[] skip = new int[1];
+ do {
+ int len = dp - cp;
+ charBuf.append(da.substring(cp, dp));
+ cp = dp;
+ SubString sub = interpretDollar(cx, regExpImpl, da,
+ dp, skip);
+ if (sub != null) {
+ len = sub.length;
+ if (len > 0) {
+ charBuf.append(sub.charArray, sub.index, len);
+ }
+ cp += skip[0];
+ dp += skip[0];
+ } else {
+ ++dp;
+ }
+ dp = da.indexOf('$', dp);
+ } while (dp >= 0);
+ }
+ int daL = da.length();
+ if (daL > cp) {
+ charBuf.append(da.substring(cp, daL));
+ }
+ }
+
+ String input; /* input string to match (perl $_, GC root) */
+ boolean multiline; /* whether input contains newlines (perl $*) */
+ SubString[] parens; /* Vector of SubString; last set of parens
+ matched (perl $1, $2) */
+ SubString lastMatch; /* last string matched (perl $&) */
+ SubString lastParen; /* last paren matched (perl $+) */
+ SubString leftContext; /* input to left of last match (perl $`) */
+ SubString rightContext; /* input to right of last match (perl $') */
+}
+
+
+final class GlobData
+{
+ int mode; /* input: return index, match object, or void */
+ int optarg; /* input: index of optional flags argument */
+ boolean global; /* output: whether regexp was global */
+ String str; /* output: 'this' parameter object as string */
+ NativeRegExp regexp;/* output: regexp parameter object private data */
+
+ // match-specific data
+
+ Scriptable arrayobj;
+
+ // replace-specific data
+
+ Function lambda; /* replacement function object or null */
+ String repstr; /* replacement string */
+ int dollar = -1; /* -1 or index of first $ in repstr */
+ StringBuffer charBuf; /* result characters, null initially */
+ int leftIndex; /* leftContext index, always 0 for JS1.2 */
+}
diff --git a/infrastructure/rhino1_7R1/src/org/mozilla/javascript/regexp/SubString.java b/infrastructure/rhino1_7R1/src/org/mozilla/javascript/regexp/SubString.java
new file mode 100644
index 0000000..00905ca
--- /dev/null
+++ b/infrastructure/rhino1_7R1/src/org/mozilla/javascript/regexp/SubString.java
@@ -0,0 +1,75 @@
+/* -*- Mode: java; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 4 -*-
+ *
+ * ***** BEGIN LICENSE BLOCK *****
+ * Version: MPL 1.1/GPL 2.0
+ *
+ * The contents of this file are subject to the Mozilla Public License Version
+ * 1.1 (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ * http://www.mozilla.org/MPL/
+ *
+ * Software distributed under the License is distributed on an "AS IS" basis,
+ * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
+ * for the specific language governing rights and limitations under the
+ * License.
+ *
+ * The Original Code is Rhino code, released
+ * May 6, 1998.
+ *
+ * The Initial Developer of the Original Code is
+ * Netscape Communications Corporation.
+ * Portions created by the Initial Developer are Copyright (C) 1997-1999
+ * the Initial Developer. All Rights Reserved.
+ *
+ * Contributor(s):
+ *
+ * Alternatively, the contents of this file may be used under the terms of
+ * the GNU General Public License Version 2 or later (the "GPL"), in which
+ * case the provisions of the GPL are applicable instead of those above. If
+ * you wish to allow use of your version of this file only under the terms of
+ * the GPL and not to allow others to use your version of this file under the
+ * MPL, indicate your decision by deleting the provisions above and replacing
+ * them with the notice and other provisions required by the GPL. If you do
+ * not delete the provisions above, a recipient may use your version of this
+ * file under either the MPL or the GPL.
+ *
+ * ***** END LICENSE BLOCK ***** */
+
+package org.mozilla.javascript.regexp;
+
+class SubString {
+
+ public SubString()
+ {
+ }
+
+ public SubString(String str)
+ {
+ index = 0;
+ charArray = str.toCharArray();
+ length = str.length();
+ }
+
+ public SubString(char[] source, int start, int len)
+ {
+ // there must be a better way of doing this??
+ index = 0;
+ length = len;
+ charArray = new char[len];
+ for (int j = 0; j < len; j++)
+ charArray[j] = source[start + j];
+ }
+
+ public String toString() {
+ return charArray == null
+ ? ""
+ : new String(charArray, index, length);
+ }
+
+ static final SubString emptySubString = new SubString();
+
+ char[] charArray;
+ int index;
+ int length;
+}
+