/* * gnu/regexp/REMatch.java * Copyright (C) 1998-2001 Wes Biggs * * This library is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published * by the Free Software Foundation; either version 2.1 of the License, or * (at your option) any later version. * * This library is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public License * along with this program; if not, write to the Free Software * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */ package gnu.regexp; import java.io.Serializable; /** * An instance of this class represents a match * completed by a gnu.regexp matching function. It can be used * to obtain relevant information about the location of a match * or submatch. * * @author Wes Biggs */ public final class REMatch implements Serializable, Cloneable { private String matchedText; // These variables are package scope for fast access within the engine int eflags; // execution flags this match was made using // Offset in source text where match was tried. This is zero-based; // the actual position in the source text is given by (offset + anchor). int offset; // Anchor position refers to the index into the source input // at which the matching operation began. // This is also useful for the ANCHORINDEX option. int anchor; // Package scope; used by RE. int index; // used while matching to mark current match position in input int[] start; // start positions (relative to offset) for each (sub)exp. int[] end; // end positions for the same REMatch next; // other possibility (to avoid having to use arrays) public Object clone() { try { REMatch copy = (REMatch) super.clone(); copy.next = null; copy.start = (int[]) start.clone(); copy.end = (int[]) end.clone(); return copy; } catch (CloneNotSupportedException e) { throw new Error(); // doesn't happen } } void assignFrom(REMatch other) { start = other.start; end = other.end; index = other.index; // need to deep clone? next = other.next; } REMatch(int subs, int anchor, int eflags) { start = new int[subs+1]; end = new int[subs+1]; this.anchor = anchor; this.eflags = eflags; clear(anchor); } void finish(CharIndexed text) { start[0] = 0; StringBuffer sb = new StringBuffer(); int i; for (i = 0; i < end[0]; i++) sb.append(text.charAt(i)); matchedText = sb.toString(); for (i = 0; i < start.length; i++) { // If any subexpressions didn't terminate, they don't count // TODO check if this code ever gets hit if ((start[i] == -1) ^ (end[i] == -1)) { start[i] = -1; end[i] = -1; } } next = null; // cut off alternates } /** Clears the current match and moves the offset to the new index. */ void clear(int index) { offset = index; this.index = 0; for (int i = 0; i < start.length; i++) { start[i] = end[i] = -1; } next = null; // cut off alternates } /** * Returns the string matching the pattern. This makes it convenient * to write code like the following: *

* * REMatch myMatch = myExpression.getMatch(myString);
* if (myMatch != null) System.out.println("Regexp found: "+myMatch); *
*/ public String toString() { return matchedText; } /** * Returns the index within the input text where the match in its entirety * began. */ public int getStartIndex() { return offset + start[0]; } /** * Returns the index within the input string where the match in * its entirety ends. The return value is the next position after * the end of the string; therefore, a match created by the * following call: * *

* REMatch myMatch = myExpression.getMatch(myString); *

* can be viewed (given that myMatch is not null) by creating *

* String theMatch = myString.substring(myMatch.getStartIndex(), * myMatch.getEndIndex()); *

* But you can save yourself that work, since the toString() * method (above) does exactly that for you. */ public int getEndIndex() { return offset + end[0]; } /** * Returns the string matching the given subexpression. The subexpressions * are indexed starting with one, not zero. That is, the subexpression * identified by the first set of parentheses in a regular expression * could be retrieved from an REMatch by calling match.toString(1). * * @param sub Index of the subexpression. */ public String toString(int sub) { if ((sub >= start.length) || (start[sub] == -1)) return ""; return (matchedText.substring(start[sub],end[sub])); } /** * Returns the index within the input string used to generate this match * where subexpression number sub begins, or -1 if * the subexpression does not exist. The initial position is zero. * * @param sub Subexpression index * @deprecated Use getStartIndex(int) instead. */ public int getSubStartIndex(int sub) { if (sub >= start.length) return -1; int x = start[sub]; return (x == -1) ? x : offset + x; } /** * Returns the index within the input string used to generate this match * where subexpression number sub begins, or -1 if * the subexpression does not exist. The initial position is zero. * * @param sub Subexpression index * @since gnu.regexp 1.1.0 */ public int getStartIndex(int sub) { if (sub >= start.length) return -1; int x = start[sub]; return (x == -1) ? x : offset + x; } /** * Returns the index within the input string used to generate this match * where subexpression number sub ends, or -1 if * the subexpression does not exist. The initial position is zero. * * @param sub Subexpression index * @deprecated Use getEndIndex(int) instead */ public int getSubEndIndex(int sub) { if (sub >= start.length) return -1; int x = end[sub]; return (x == -1) ? x : offset + x; } /** * Returns the index within the input string used to generate this match * where subexpression number sub ends, or -1 if * the subexpression does not exist. The initial position is zero. * * @param sub Subexpression index */ public int getEndIndex(int sub) { if (sub >= start.length) return -1; int x = end[sub]; return (x == -1) ? x : offset + x; } /** * Substitute the results of this match to create a new string. * This is patterned after PERL, so the tokens to watch out for are * $0 through $9. $0 matches * the full substring matched; $n matches * subexpression number n. * * @param input A string consisting of literals and $n tokens. */ public String substituteInto(String input) { // a la Perl, $0 is whole thing, $1 - $9 are subexpressions StringBuffer output = new StringBuffer(); int pos; for (pos = 0; pos < input.length()-1; pos++) { if ((input.charAt(pos) == '$') && (Character.isDigit(input.charAt(pos+1)))) { int val = Character.digit(input.charAt(++pos),10); if (val < start.length) { output.append(toString(val)); } } else output.append(input.charAt(pos)); } if (pos < input.length()) output.append(input.charAt(pos)); return output.toString(); } }