aboutsummaryrefslogtreecommitdiffstats
path: root/trunk/infrastructure/rhino1_7R1/src/org/mozilla/javascript/regexp/RegExpImpl.java
diff options
context:
space:
mode:
Diffstat (limited to 'trunk/infrastructure/rhino1_7R1/src/org/mozilla/javascript/regexp/RegExpImpl.java')
-rw-r--r--trunk/infrastructure/rhino1_7R1/src/org/mozilla/javascript/regexp/RegExpImpl.java541
1 files changed, 541 insertions, 0 deletions
diff --git a/trunk/infrastructure/rhino1_7R1/src/org/mozilla/javascript/regexp/RegExpImpl.java b/trunk/infrastructure/rhino1_7R1/src/org/mozilla/javascript/regexp/RegExpImpl.java
new file mode 100644
index 0000000..4b0a303
--- /dev/null
+++ b/trunk/infrastructure/rhino1_7R1/src/org/mozilla/javascript/regexp/RegExpImpl.java
@@ -0,0 +1,541 @@
+/* -*- Mode: java; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 4 -*-
+ *
+ * ***** BEGIN LICENSE BLOCK *****
+ * Version: MPL 1.1/GPL 2.0
+ *
+ * The contents of this file are subject to the Mozilla Public License Version
+ * 1.1 (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ * http://www.mozilla.org/MPL/
+ *
+ * Software distributed under the License is distributed on an "AS IS" basis,
+ * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
+ * for the specific language governing rights and limitations under the
+ * License.
+ *
+ * The Original Code is Rhino code, released
+ * May 6, 1998.
+ *
+ * The Initial Developer of the Original Code is
+ * Netscape Communications Corporation.
+ * Portions created by the Initial Developer are Copyright (C) 1997-1999
+ * the Initial Developer. All Rights Reserved.
+ *
+ * Contributor(s):
+ *
+ * Alternatively, the contents of this file may be used under the terms of
+ * the GNU General Public License Version 2 or later (the "GPL"), in which
+ * case the provisions of the GPL are applicable instead of those above. If
+ * you wish to allow use of your version of this file only under the terms of
+ * the GPL and not to allow others to use your version of this file under the
+ * MPL, indicate your decision by deleting the provisions above and replacing
+ * them with the notice and other provisions required by the GPL. If you do
+ * not delete the provisions above, a recipient may use your version of this
+ * file under either the MPL or the GPL.
+ *
+ * ***** END LICENSE BLOCK ***** */
+
+package org.mozilla.javascript.regexp;
+
+import org.mozilla.javascript.*;
+
+/**
+ *
+ */
+public class RegExpImpl implements RegExpProxy {
+
+ public boolean isRegExp(Scriptable obj) {
+ return obj instanceof NativeRegExp;
+ }
+
+ public Object compileRegExp(Context cx, String source, String flags)
+ {
+ return NativeRegExp.compileRE(cx, source, flags, false);
+ }
+
+ public Scriptable wrapRegExp(Context cx, Scriptable scope,
+ Object compiled)
+ {
+ return new NativeRegExp(scope, compiled);
+ }
+
+ public Object action(Context cx, Scriptable scope,
+ Scriptable thisObj, Object[] args,
+ int actionType)
+ {
+ GlobData data = new GlobData();
+ data.mode = actionType;
+
+ switch (actionType) {
+ case RA_MATCH:
+ {
+ Object rval;
+ data.optarg = 1;
+ rval = matchOrReplace(cx, scope, thisObj, args,
+ this, data, false);
+ return data.arrayobj == null ? rval : data.arrayobj;
+ }
+
+ case RA_SEARCH:
+ data.optarg = 1;
+ return matchOrReplace(cx, scope, thisObj, args,
+ this, data, false);
+
+ case RA_REPLACE:
+ {
+ Object arg1 = args.length < 2 ? Undefined.instance : args[1];
+ String repstr = null;
+ Function lambda = null;
+ if (arg1 instanceof Function) {
+ lambda = (Function) arg1;
+ } else {
+ repstr = ScriptRuntime.toString(arg1);
+ }
+
+ data.optarg = 2;
+ data.lambda = lambda;
+ data.repstr = repstr;
+ data.dollar = repstr == null ? -1 : repstr.indexOf('$');
+ data.charBuf = null;
+ data.leftIndex = 0;
+ Object val = matchOrReplace(cx, scope, thisObj, args,
+ this, data, true);
+ SubString rc = this.rightContext;
+
+ if (data.charBuf == null) {
+ if (data.global || val == null
+ || !val.equals(Boolean.TRUE))
+ {
+ /* Didn't match even once. */
+ return data.str;
+ }
+ SubString lc = this.leftContext;
+ replace_glob(data, cx, scope, this, lc.index, lc.length);
+ }
+ data.charBuf.append(rc.charArray, rc.index, rc.length);
+ return data.charBuf.toString();
+ }
+
+ default:
+ throw Kit.codeBug();
+ }
+ }
+
+ /**
+ * Analog of C match_or_replace.
+ */
+ private static Object matchOrReplace(Context cx, Scriptable scope,
+ Scriptable thisObj, Object[] args,
+ RegExpImpl reImpl,
+ GlobData data, boolean forceFlat)
+ {
+ NativeRegExp re;
+
+ String str = ScriptRuntime.toString(thisObj);
+ data.str = str;
+ Scriptable topScope = ScriptableObject.getTopLevelScope(scope);
+
+ if (args.length == 0) {
+ Object compiled = NativeRegExp.compileRE(cx, "", "", false);
+ re = new NativeRegExp(topScope, compiled);
+ } else if (args[0] instanceof NativeRegExp) {
+ re = (NativeRegExp) args[0];
+ } else {
+ String src = ScriptRuntime.toString(args[0]);
+ String opt;
+ if (data.optarg < args.length) {
+ args[0] = src;
+ opt = ScriptRuntime.toString(args[data.optarg]);
+ } else {
+ opt = null;
+ }
+ Object compiled = NativeRegExp.compileRE(cx, src, opt, forceFlat);
+ re = new NativeRegExp(topScope, compiled);
+ }
+ data.regexp = re;
+
+ data.global = (re.getFlags() & NativeRegExp.JSREG_GLOB) != 0;
+ int[] indexp = { 0 };
+ Object result = null;
+ if (data.mode == RA_SEARCH) {
+ result = re.executeRegExp(cx, scope, reImpl,
+ str, indexp, NativeRegExp.TEST);
+ if (result != null && result.equals(Boolean.TRUE))
+ result = new Integer(reImpl.leftContext.length);
+ else
+ result = new Integer(-1);
+ } else if (data.global) {
+ re.lastIndex = 0;
+ for (int count = 0; indexp[0] <= str.length(); count++) {
+ result = re.executeRegExp(cx, scope, reImpl,
+ str, indexp, NativeRegExp.TEST);
+ if (result == null || !result.equals(Boolean.TRUE))
+ break;
+ if (data.mode == RA_MATCH) {
+ match_glob(data, cx, scope, count, reImpl);
+ } else {
+ if (data.mode != RA_REPLACE) Kit.codeBug();
+ SubString lastMatch = reImpl.lastMatch;
+ int leftIndex = data.leftIndex;
+ int leftlen = lastMatch.index - leftIndex;
+ data.leftIndex = lastMatch.index + lastMatch.length;
+ replace_glob(data, cx, scope, reImpl, leftIndex, leftlen);
+ }
+ if (reImpl.lastMatch.length == 0) {
+ if (indexp[0] == str.length())
+ break;
+ indexp[0]++;
+ }
+ }
+ } else {
+ result = re.executeRegExp(cx, scope, reImpl, str, indexp,
+ ((data.mode == RA_REPLACE)
+ ? NativeRegExp.TEST
+ : NativeRegExp.MATCH));
+ }
+
+ return result;
+ }
+
+
+
+ public int find_split(Context cx, Scriptable scope, String target,
+ String separator, Scriptable reObj,
+ int[] ip, int[] matchlen,
+ boolean[] matched, String[][] parensp)
+ {
+ int i = ip[0];
+ int length = target.length();
+ int result;
+
+ int version = cx.getLanguageVersion();
+ NativeRegExp re = (NativeRegExp) reObj;
+ again:
+ while (true) { // imitating C label
+ /* JS1.2 deviated from Perl by never matching at end of string. */
+ int ipsave = ip[0]; // reuse ip to save object creation
+ ip[0] = i;
+ Object ret = re.executeRegExp(cx, scope, this, target, ip,
+ NativeRegExp.TEST);
+ if (ret != Boolean.TRUE) {
+ // Mismatch: ensure our caller advances i past end of string.
+ ip[0] = ipsave;
+ matchlen[0] = 1;
+ matched[0] = false;
+ return length;
+ }
+ i = ip[0];
+ ip[0] = ipsave;
+ matched[0] = true;
+
+ SubString sep = this.lastMatch;
+ matchlen[0] = sep.length;
+ if (matchlen[0] == 0) {
+ /*
+ * Empty string match: never split on an empty
+ * match at the start of a find_split cycle. Same
+ * rule as for an empty global match in
+ * match_or_replace.
+ */
+ if (i == ip[0]) {
+ /*
+ * "Bump-along" to avoid sticking at an empty
+ * match, but don't bump past end of string --
+ * our caller must do that by adding
+ * sep->length to our return value.
+ */
+ if (i == length) {
+ if (version == Context.VERSION_1_2) {
+ matchlen[0] = 1;
+ result = i;
+ }
+ else
+ result = -1;
+ break;
+ }
+ i++;
+ continue again; // imitating C goto
+ }
+ }
+ // PR_ASSERT((size_t)i >= sep->length);
+ result = i - matchlen[0];
+ break;
+ }
+ int size = (parens == null) ? 0 : parens.length;
+ parensp[0] = new String[size];
+ for (int num = 0; num < size; num++) {
+ SubString parsub = getParenSubString(num);
+ parensp[0][num] = parsub.toString();
+ }
+ return result;
+ }
+
+ /**
+ * Analog of REGEXP_PAREN_SUBSTRING in C jsregexp.h.
+ * Assumes zero-based; i.e., for $3, i==2
+ */
+ SubString getParenSubString(int i)
+ {
+ if (parens != null && i < parens.length) {
+ SubString parsub = parens[i];
+ if (parsub != null) {
+ return parsub;
+ }
+ }
+ return SubString.emptySubString;
+ }
+
+ /*
+ * Analog of match_glob() in jsstr.c
+ */
+ private static void match_glob(GlobData mdata, Context cx,
+ Scriptable scope, int count,
+ RegExpImpl reImpl)
+ {
+ if (mdata.arrayobj == null) {
+ Scriptable s = ScriptableObject.getTopLevelScope(scope);
+ mdata.arrayobj = ScriptRuntime.newObject(cx, s, "Array", null);
+ }
+ SubString matchsub = reImpl.lastMatch;
+ String matchstr = matchsub.toString();
+ mdata.arrayobj.put(count, mdata.arrayobj, matchstr);
+ }
+
+ /*
+ * Analog of replace_glob() in jsstr.c
+ */
+ private static void replace_glob(GlobData rdata, Context cx,
+ Scriptable scope, RegExpImpl reImpl,
+ int leftIndex, int leftlen)
+ {
+ int replen;
+ String lambdaStr;
+ if (rdata.lambda != null) {
+ // invoke lambda function with args lastMatch, $1, $2, ... $n,
+ // leftContext.length, whole string.
+ SubString[] parens = reImpl.parens;
+ int parenCount = (parens == null) ? 0 : parens.length;
+ Object[] args = new Object[parenCount + 3];
+ args[0] = reImpl.lastMatch.toString();
+ for (int i=0; i < parenCount; i++) {
+ SubString sub = parens[i];
+ if (sub != null) {
+ args[i+1] = sub.toString();
+ } else {
+ args[i+1] = Undefined.instance;
+ }
+ }
+ args[parenCount+1] = new Integer(reImpl.leftContext.length);
+ args[parenCount+2] = rdata.str;
+ // This is a hack to prevent expose of reImpl data to
+ // JS function which can run new regexps modifing
+ // regexp that are used later by the engine.
+ // TODO: redesign is necessary
+ if (reImpl != ScriptRuntime.getRegExpProxy(cx)) Kit.codeBug();
+ RegExpImpl re2 = new RegExpImpl();
+ re2.multiline = reImpl.multiline;
+ re2.input = reImpl.input;
+ ScriptRuntime.setRegExpProxy(cx, re2);
+ try {
+ Scriptable parent = ScriptableObject.getTopLevelScope(scope);
+ Object result = rdata.lambda.call(cx, parent, parent, args);
+ lambdaStr = ScriptRuntime.toString(result);
+ } finally {
+ ScriptRuntime.setRegExpProxy(cx, reImpl);
+ }
+ replen = lambdaStr.length();
+ } else {
+ lambdaStr = null;
+ replen = rdata.repstr.length();
+ if (rdata.dollar >= 0) {
+ int[] skip = new int[1];
+ int dp = rdata.dollar;
+ do {
+ SubString sub = interpretDollar(cx, reImpl, rdata.repstr,
+ dp, skip);
+ if (sub != null) {
+ replen += sub.length - skip[0];
+ dp += skip[0];
+ } else {
+ ++dp;
+ }
+ dp = rdata.repstr.indexOf('$', dp);
+ } while (dp >= 0);
+ }
+ }
+
+ int growth = leftlen + replen + reImpl.rightContext.length;
+ StringBuffer charBuf = rdata.charBuf;
+ if (charBuf == null) {
+ charBuf = new StringBuffer(growth);
+ rdata.charBuf = charBuf;
+ } else {
+ charBuf.ensureCapacity(rdata.charBuf.length() + growth);
+ }
+
+ charBuf.append(reImpl.leftContext.charArray, leftIndex, leftlen);
+ if (rdata.lambda != null) {
+ charBuf.append(lambdaStr);
+ } else {
+ do_replace(rdata, cx, reImpl);
+ }
+ }
+
+ private static SubString interpretDollar(Context cx, RegExpImpl res,
+ String da, int dp, int[] skip)
+ {
+ char dc;
+ int num, tmp;
+
+ if (da.charAt(dp) != '$') Kit.codeBug();
+
+ /* Allow a real backslash (literal "\\") to escape "$1" etc. */
+ int version = cx.getLanguageVersion();
+ if (version != Context.VERSION_DEFAULT
+ && version <= Context.VERSION_1_4)
+ {
+ if (dp > 0 && da.charAt(dp - 1) == '\\')
+ return null;
+ }
+ int daL = da.length();
+ if (dp + 1 >= daL)
+ return null;
+ /* Interpret all Perl match-induced dollar variables. */
+ dc = da.charAt(dp + 1);
+ if (NativeRegExp.isDigit(dc)) {
+ int cp;
+ if (version != Context.VERSION_DEFAULT
+ && version <= Context.VERSION_1_4)
+ {
+ if (dc == '0')
+ return null;
+ /* Check for overflow to avoid gobbling arbitrary decimal digits. */
+ num = 0;
+ cp = dp;
+ while (++cp < daL && NativeRegExp.isDigit(dc = da.charAt(cp)))
+ {
+ tmp = 10 * num + (dc - '0');
+ if (tmp < num)
+ break;
+ num = tmp;
+ }
+ }
+ else { /* ECMA 3, 1-9 or 01-99 */
+ int parenCount = (res.parens == null) ? 0 : res.parens.length;
+ num = dc - '0';
+ if (num > parenCount)
+ return null;
+ cp = dp + 2;
+ if ((dp + 2) < daL) {
+ dc = da.charAt(dp + 2);
+ if (NativeRegExp.isDigit(dc)) {
+ tmp = 10 * num + (dc - '0');
+ if (tmp <= parenCount) {
+ cp++;
+ num = tmp;
+ }
+ }
+ }
+ if (num == 0) return null; /* $0 or $00 is not valid */
+ }
+ /* Adjust num from 1 $n-origin to 0 array-index-origin. */
+ num--;
+ skip[0] = cp - dp;
+ return res.getParenSubString(num);
+ }
+
+ skip[0] = 2;
+ switch (dc) {
+ case '$':
+ return new SubString("$");
+ case '&':
+ return res.lastMatch;
+ case '+':
+ return res.lastParen;
+ case '`':
+ if (version == Context.VERSION_1_2) {
+ /*
+ * JS1.2 imitated the Perl4 bug where left context at each step
+ * in an iterative use of a global regexp started from last match,
+ * not from the start of the target string. But Perl4 does start
+ * $` at the beginning of the target string when it is used in a
+ * substitution, so we emulate that special case here.
+ */
+ res.leftContext.index = 0;
+ res.leftContext.length = res.lastMatch.index;
+ }
+ return res.leftContext;
+ case '\'':
+ return res.rightContext;
+ }
+ return null;
+ }
+
+ /**
+ * Analog of do_replace in jsstr.c
+ */
+ private static void do_replace(GlobData rdata, Context cx,
+ RegExpImpl regExpImpl)
+ {
+ StringBuffer charBuf = rdata.charBuf;
+ int cp = 0;
+ String da = rdata.repstr;
+ int dp = rdata.dollar;
+ if (dp != -1) {
+ int[] skip = new int[1];
+ do {
+ int len = dp - cp;
+ charBuf.append(da.substring(cp, dp));
+ cp = dp;
+ SubString sub = interpretDollar(cx, regExpImpl, da,
+ dp, skip);
+ if (sub != null) {
+ len = sub.length;
+ if (len > 0) {
+ charBuf.append(sub.charArray, sub.index, len);
+ }
+ cp += skip[0];
+ dp += skip[0];
+ } else {
+ ++dp;
+ }
+ dp = da.indexOf('$', dp);
+ } while (dp >= 0);
+ }
+ int daL = da.length();
+ if (daL > cp) {
+ charBuf.append(da.substring(cp, daL));
+ }
+ }
+
+ String input; /* input string to match (perl $_, GC root) */
+ boolean multiline; /* whether input contains newlines (perl $*) */
+ SubString[] parens; /* Vector of SubString; last set of parens
+ matched (perl $1, $2) */
+ SubString lastMatch; /* last string matched (perl $&) */
+ SubString lastParen; /* last paren matched (perl $+) */
+ SubString leftContext; /* input to left of last match (perl $`) */
+ SubString rightContext; /* input to right of last match (perl $') */
+}
+
+
+final class GlobData
+{
+ int mode; /* input: return index, match object, or void */
+ int optarg; /* input: index of optional flags argument */
+ boolean global; /* output: whether regexp was global */
+ String str; /* output: 'this' parameter object as string */
+ NativeRegExp regexp;/* output: regexp parameter object private data */
+
+ // match-specific data
+
+ Scriptable arrayobj;
+
+ // replace-specific data
+
+ Function lambda; /* replacement function object or null */
+ String repstr; /* replacement string */
+ int dollar = -1; /* -1 or index of first $ in repstr */
+ StringBuffer charBuf; /* result characters, null initially */
+ int leftIndex; /* leftContext index, always 0 for JS1.2 */
+}