From 4a9748e10084ca99fadec4f0fb1a3c22af7f3794 Mon Sep 17 00:00:00 2001 From: "alexanders@b2ef00c0-3703-41da-baef-cfe82387ac0c" Date: Wed, 3 Feb 2010 00:46:50 +0000 Subject: initial code push --HG-- extra : convert_revision : svn%3Ab2ef00c0-3703-41da-baef-cfe82387ac0c/trunk%403 --- .../org/mozilla/javascript/regexp/RegExpImpl.java | 541 +++++++++++++++++++++ 1 file changed, 541 insertions(+) create mode 100644 trunk/trunk/infrastructure/rhino1_7R1/src/org/mozilla/javascript/regexp/RegExpImpl.java (limited to 'trunk/trunk/infrastructure/rhino1_7R1/src/org/mozilla/javascript/regexp/RegExpImpl.java') diff --git a/trunk/trunk/infrastructure/rhino1_7R1/src/org/mozilla/javascript/regexp/RegExpImpl.java b/trunk/trunk/infrastructure/rhino1_7R1/src/org/mozilla/javascript/regexp/RegExpImpl.java new file mode 100644 index 0000000..4b0a303 --- /dev/null +++ b/trunk/trunk/infrastructure/rhino1_7R1/src/org/mozilla/javascript/regexp/RegExpImpl.java @@ -0,0 +1,541 @@ +/* -*- Mode: java; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 4 -*- + * + * ***** BEGIN LICENSE BLOCK ***** + * Version: MPL 1.1/GPL 2.0 + * + * The contents of this file are subject to the Mozilla Public License Version + * 1.1 (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * http://www.mozilla.org/MPL/ + * + * Software distributed under the License is distributed on an "AS IS" basis, + * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License + * for the specific language governing rights and limitations under the + * License. + * + * The Original Code is Rhino code, released + * May 6, 1998. + * + * The Initial Developer of the Original Code is + * Netscape Communications Corporation. + * Portions created by the Initial Developer are Copyright (C) 1997-1999 + * the Initial Developer. All Rights Reserved. + * + * Contributor(s): + * + * Alternatively, the contents of this file may be used under the terms of + * the GNU General Public License Version 2 or later (the "GPL"), in which + * case the provisions of the GPL are applicable instead of those above. If + * you wish to allow use of your version of this file only under the terms of + * the GPL and not to allow others to use your version of this file under the + * MPL, indicate your decision by deleting the provisions above and replacing + * them with the notice and other provisions required by the GPL. If you do + * not delete the provisions above, a recipient may use your version of this + * file under either the MPL or the GPL. + * + * ***** END LICENSE BLOCK ***** */ + +package org.mozilla.javascript.regexp; + +import org.mozilla.javascript.*; + +/** + * + */ +public class RegExpImpl implements RegExpProxy { + + public boolean isRegExp(Scriptable obj) { + return obj instanceof NativeRegExp; + } + + public Object compileRegExp(Context cx, String source, String flags) + { + return NativeRegExp.compileRE(cx, source, flags, false); + } + + public Scriptable wrapRegExp(Context cx, Scriptable scope, + Object compiled) + { + return new NativeRegExp(scope, compiled); + } + + public Object action(Context cx, Scriptable scope, + Scriptable thisObj, Object[] args, + int actionType) + { + GlobData data = new GlobData(); + data.mode = actionType; + + switch (actionType) { + case RA_MATCH: + { + Object rval; + data.optarg = 1; + rval = matchOrReplace(cx, scope, thisObj, args, + this, data, false); + return data.arrayobj == null ? rval : data.arrayobj; + } + + case RA_SEARCH: + data.optarg = 1; + return matchOrReplace(cx, scope, thisObj, args, + this, data, false); + + case RA_REPLACE: + { + Object arg1 = args.length < 2 ? Undefined.instance : args[1]; + String repstr = null; + Function lambda = null; + if (arg1 instanceof Function) { + lambda = (Function) arg1; + } else { + repstr = ScriptRuntime.toString(arg1); + } + + data.optarg = 2; + data.lambda = lambda; + data.repstr = repstr; + data.dollar = repstr == null ? -1 : repstr.indexOf('$'); + data.charBuf = null; + data.leftIndex = 0; + Object val = matchOrReplace(cx, scope, thisObj, args, + this, data, true); + SubString rc = this.rightContext; + + if (data.charBuf == null) { + if (data.global || val == null + || !val.equals(Boolean.TRUE)) + { + /* Didn't match even once. */ + return data.str; + } + SubString lc = this.leftContext; + replace_glob(data, cx, scope, this, lc.index, lc.length); + } + data.charBuf.append(rc.charArray, rc.index, rc.length); + return data.charBuf.toString(); + } + + default: + throw Kit.codeBug(); + } + } + + /** + * Analog of C match_or_replace. + */ + private static Object matchOrReplace(Context cx, Scriptable scope, + Scriptable thisObj, Object[] args, + RegExpImpl reImpl, + GlobData data, boolean forceFlat) + { + NativeRegExp re; + + String str = ScriptRuntime.toString(thisObj); + data.str = str; + Scriptable topScope = ScriptableObject.getTopLevelScope(scope); + + if (args.length == 0) { + Object compiled = NativeRegExp.compileRE(cx, "", "", false); + re = new NativeRegExp(topScope, compiled); + } else if (args[0] instanceof NativeRegExp) { + re = (NativeRegExp) args[0]; + } else { + String src = ScriptRuntime.toString(args[0]); + String opt; + if (data.optarg < args.length) { + args[0] = src; + opt = ScriptRuntime.toString(args[data.optarg]); + } else { + opt = null; + } + Object compiled = NativeRegExp.compileRE(cx, src, opt, forceFlat); + re = new NativeRegExp(topScope, compiled); + } + data.regexp = re; + + data.global = (re.getFlags() & NativeRegExp.JSREG_GLOB) != 0; + int[] indexp = { 0 }; + Object result = null; + if (data.mode == RA_SEARCH) { + result = re.executeRegExp(cx, scope, reImpl, + str, indexp, NativeRegExp.TEST); + if (result != null && result.equals(Boolean.TRUE)) + result = new Integer(reImpl.leftContext.length); + else + result = new Integer(-1); + } else if (data.global) { + re.lastIndex = 0; + for (int count = 0; indexp[0] <= str.length(); count++) { + result = re.executeRegExp(cx, scope, reImpl, + str, indexp, NativeRegExp.TEST); + if (result == null || !result.equals(Boolean.TRUE)) + break; + if (data.mode == RA_MATCH) { + match_glob(data, cx, scope, count, reImpl); + } else { + if (data.mode != RA_REPLACE) Kit.codeBug(); + SubString lastMatch = reImpl.lastMatch; + int leftIndex = data.leftIndex; + int leftlen = lastMatch.index - leftIndex; + data.leftIndex = lastMatch.index + lastMatch.length; + replace_glob(data, cx, scope, reImpl, leftIndex, leftlen); + } + if (reImpl.lastMatch.length == 0) { + if (indexp[0] == str.length()) + break; + indexp[0]++; + } + } + } else { + result = re.executeRegExp(cx, scope, reImpl, str, indexp, + ((data.mode == RA_REPLACE) + ? NativeRegExp.TEST + : NativeRegExp.MATCH)); + } + + return result; + } + + + + public int find_split(Context cx, Scriptable scope, String target, + String separator, Scriptable reObj, + int[] ip, int[] matchlen, + boolean[] matched, String[][] parensp) + { + int i = ip[0]; + int length = target.length(); + int result; + + int version = cx.getLanguageVersion(); + NativeRegExp re = (NativeRegExp) reObj; + again: + while (true) { // imitating C label + /* JS1.2 deviated from Perl by never matching at end of string. */ + int ipsave = ip[0]; // reuse ip to save object creation + ip[0] = i; + Object ret = re.executeRegExp(cx, scope, this, target, ip, + NativeRegExp.TEST); + if (ret != Boolean.TRUE) { + // Mismatch: ensure our caller advances i past end of string. + ip[0] = ipsave; + matchlen[0] = 1; + matched[0] = false; + return length; + } + i = ip[0]; + ip[0] = ipsave; + matched[0] = true; + + SubString sep = this.lastMatch; + matchlen[0] = sep.length; + if (matchlen[0] == 0) { + /* + * Empty string match: never split on an empty + * match at the start of a find_split cycle. Same + * rule as for an empty global match in + * match_or_replace. + */ + if (i == ip[0]) { + /* + * "Bump-along" to avoid sticking at an empty + * match, but don't bump past end of string -- + * our caller must do that by adding + * sep->length to our return value. + */ + if (i == length) { + if (version == Context.VERSION_1_2) { + matchlen[0] = 1; + result = i; + } + else + result = -1; + break; + } + i++; + continue again; // imitating C goto + } + } + // PR_ASSERT((size_t)i >= sep->length); + result = i - matchlen[0]; + break; + } + int size = (parens == null) ? 0 : parens.length; + parensp[0] = new String[size]; + for (int num = 0; num < size; num++) { + SubString parsub = getParenSubString(num); + parensp[0][num] = parsub.toString(); + } + return result; + } + + /** + * Analog of REGEXP_PAREN_SUBSTRING in C jsregexp.h. + * Assumes zero-based; i.e., for $3, i==2 + */ + SubString getParenSubString(int i) + { + if (parens != null && i < parens.length) { + SubString parsub = parens[i]; + if (parsub != null) { + return parsub; + } + } + return SubString.emptySubString; + } + + /* + * Analog of match_glob() in jsstr.c + */ + private static void match_glob(GlobData mdata, Context cx, + Scriptable scope, int count, + RegExpImpl reImpl) + { + if (mdata.arrayobj == null) { + Scriptable s = ScriptableObject.getTopLevelScope(scope); + mdata.arrayobj = ScriptRuntime.newObject(cx, s, "Array", null); + } + SubString matchsub = reImpl.lastMatch; + String matchstr = matchsub.toString(); + mdata.arrayobj.put(count, mdata.arrayobj, matchstr); + } + + /* + * Analog of replace_glob() in jsstr.c + */ + private static void replace_glob(GlobData rdata, Context cx, + Scriptable scope, RegExpImpl reImpl, + int leftIndex, int leftlen) + { + int replen; + String lambdaStr; + if (rdata.lambda != null) { + // invoke lambda function with args lastMatch, $1, $2, ... $n, + // leftContext.length, whole string. + SubString[] parens = reImpl.parens; + int parenCount = (parens == null) ? 0 : parens.length; + Object[] args = new Object[parenCount + 3]; + args[0] = reImpl.lastMatch.toString(); + for (int i=0; i < parenCount; i++) { + SubString sub = parens[i]; + if (sub != null) { + args[i+1] = sub.toString(); + } else { + args[i+1] = Undefined.instance; + } + } + args[parenCount+1] = new Integer(reImpl.leftContext.length); + args[parenCount+2] = rdata.str; + // This is a hack to prevent expose of reImpl data to + // JS function which can run new regexps modifing + // regexp that are used later by the engine. + // TODO: redesign is necessary + if (reImpl != ScriptRuntime.getRegExpProxy(cx)) Kit.codeBug(); + RegExpImpl re2 = new RegExpImpl(); + re2.multiline = reImpl.multiline; + re2.input = reImpl.input; + ScriptRuntime.setRegExpProxy(cx, re2); + try { + Scriptable parent = ScriptableObject.getTopLevelScope(scope); + Object result = rdata.lambda.call(cx, parent, parent, args); + lambdaStr = ScriptRuntime.toString(result); + } finally { + ScriptRuntime.setRegExpProxy(cx, reImpl); + } + replen = lambdaStr.length(); + } else { + lambdaStr = null; + replen = rdata.repstr.length(); + if (rdata.dollar >= 0) { + int[] skip = new int[1]; + int dp = rdata.dollar; + do { + SubString sub = interpretDollar(cx, reImpl, rdata.repstr, + dp, skip); + if (sub != null) { + replen += sub.length - skip[0]; + dp += skip[0]; + } else { + ++dp; + } + dp = rdata.repstr.indexOf('$', dp); + } while (dp >= 0); + } + } + + int growth = leftlen + replen + reImpl.rightContext.length; + StringBuffer charBuf = rdata.charBuf; + if (charBuf == null) { + charBuf = new StringBuffer(growth); + rdata.charBuf = charBuf; + } else { + charBuf.ensureCapacity(rdata.charBuf.length() + growth); + } + + charBuf.append(reImpl.leftContext.charArray, leftIndex, leftlen); + if (rdata.lambda != null) { + charBuf.append(lambdaStr); + } else { + do_replace(rdata, cx, reImpl); + } + } + + private static SubString interpretDollar(Context cx, RegExpImpl res, + String da, int dp, int[] skip) + { + char dc; + int num, tmp; + + if (da.charAt(dp) != '$') Kit.codeBug(); + + /* Allow a real backslash (literal "\\") to escape "$1" etc. */ + int version = cx.getLanguageVersion(); + if (version != Context.VERSION_DEFAULT + && version <= Context.VERSION_1_4) + { + if (dp > 0 && da.charAt(dp - 1) == '\\') + return null; + } + int daL = da.length(); + if (dp + 1 >= daL) + return null; + /* Interpret all Perl match-induced dollar variables. */ + dc = da.charAt(dp + 1); + if (NativeRegExp.isDigit(dc)) { + int cp; + if (version != Context.VERSION_DEFAULT + && version <= Context.VERSION_1_4) + { + if (dc == '0') + return null; + /* Check for overflow to avoid gobbling arbitrary decimal digits. */ + num = 0; + cp = dp; + while (++cp < daL && NativeRegExp.isDigit(dc = da.charAt(cp))) + { + tmp = 10 * num + (dc - '0'); + if (tmp < num) + break; + num = tmp; + } + } + else { /* ECMA 3, 1-9 or 01-99 */ + int parenCount = (res.parens == null) ? 0 : res.parens.length; + num = dc - '0'; + if (num > parenCount) + return null; + cp = dp + 2; + if ((dp + 2) < daL) { + dc = da.charAt(dp + 2); + if (NativeRegExp.isDigit(dc)) { + tmp = 10 * num + (dc - '0'); + if (tmp <= parenCount) { + cp++; + num = tmp; + } + } + } + if (num == 0) return null; /* $0 or $00 is not valid */ + } + /* Adjust num from 1 $n-origin to 0 array-index-origin. */ + num--; + skip[0] = cp - dp; + return res.getParenSubString(num); + } + + skip[0] = 2; + switch (dc) { + case '$': + return new SubString("$"); + case '&': + return res.lastMatch; + case '+': + return res.lastParen; + case '`': + if (version == Context.VERSION_1_2) { + /* + * JS1.2 imitated the Perl4 bug where left context at each step + * in an iterative use of a global regexp started from last match, + * not from the start of the target string. But Perl4 does start + * $` at the beginning of the target string when it is used in a + * substitution, so we emulate that special case here. + */ + res.leftContext.index = 0; + res.leftContext.length = res.lastMatch.index; + } + return res.leftContext; + case '\'': + return res.rightContext; + } + return null; + } + + /** + * Analog of do_replace in jsstr.c + */ + private static void do_replace(GlobData rdata, Context cx, + RegExpImpl regExpImpl) + { + StringBuffer charBuf = rdata.charBuf; + int cp = 0; + String da = rdata.repstr; + int dp = rdata.dollar; + if (dp != -1) { + int[] skip = new int[1]; + do { + int len = dp - cp; + charBuf.append(da.substring(cp, dp)); + cp = dp; + SubString sub = interpretDollar(cx, regExpImpl, da, + dp, skip); + if (sub != null) { + len = sub.length; + if (len > 0) { + charBuf.append(sub.charArray, sub.index, len); + } + cp += skip[0]; + dp += skip[0]; + } else { + ++dp; + } + dp = da.indexOf('$', dp); + } while (dp >= 0); + } + int daL = da.length(); + if (daL > cp) { + charBuf.append(da.substring(cp, daL)); + } + } + + String input; /* input string to match (perl $_, GC root) */ + boolean multiline; /* whether input contains newlines (perl $*) */ + SubString[] parens; /* Vector of SubString; last set of parens + matched (perl $1, $2) */ + SubString lastMatch; /* last string matched (perl $&) */ + SubString lastParen; /* last paren matched (perl $+) */ + SubString leftContext; /* input to left of last match (perl $`) */ + SubString rightContext; /* input to right of last match (perl $') */ +} + + +final class GlobData +{ + int mode; /* input: return index, match object, or void */ + int optarg; /* input: index of optional flags argument */ + boolean global; /* output: whether regexp was global */ + String str; /* output: 'this' parameter object as string */ + NativeRegExp regexp;/* output: regexp parameter object private data */ + + // match-specific data + + Scriptable arrayobj; + + // replace-specific data + + Function lambda; /* replacement function object or null */ + String repstr; /* replacement string */ + int dollar = -1; /* -1 or index of first $ in repstr */ + StringBuffer charBuf; /* result characters, null initially */ + int leftIndex; /* leftContext index, always 0 for JS1.2 */ +} -- cgit v1.2.3