/* * gnu/regexp/REMatch.java * Copyright (C) 1998-2001 Wes Biggs * * This library is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published * by the Free Software Foundation; either version 2.1 of the License, or * (at your option) any later version. * * This library is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public License * along with this program; if not, write to the Free Software * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */ package gnu.regexp; import java.io.Serializable; /** * An instance of this class represents a match * completed by a gnu.regexp matching function. It can be used * to obtain relevant information about the location of a match * or submatch. * * @author Wes Biggs */ public final class REMatch implements Serializable, Cloneable { private String matchedText; // These variables are package scope for fast access within the engine int eflags; // execution flags this match was made using // Offset in source text where match was tried. This is zero-based; // the actual position in the source text is given by (offset + anchor). int offset; // Anchor position refers to the index into the source input // at which the matching operation began. // This is also useful for the ANCHORINDEX option. int anchor; // Package scope; used by RE. int index; // used while matching to mark current match position in input int[] start; // start positions (relative to offset) for each (sub)exp. int[] end; // end positions for the same REMatch next; // other possibility (to avoid having to use arrays) public Object clone() { try { REMatch copy = (REMatch) super.clone(); copy.next = null; copy.start = (int[]) start.clone(); copy.end = (int[]) end.clone(); return copy; } catch (CloneNotSupportedException e) { throw new Error(); // doesn't happen } } void assignFrom(REMatch other) { start = other.start; end = other.end; index = other.index; // need to deep clone? next = other.next; } REMatch(int subs, int anchor, int eflags) { start = new int[subs+1]; end = new int[subs+1]; this.anchor = anchor; this.eflags = eflags; clear(anchor); } void finish(CharIndexed text) { start[0] = 0; StringBuffer sb = new StringBuffer(); int i; for (i = 0; i < end[0]; i++) sb.append(text.charAt(i)); matchedText = sb.toString(); for (i = 0; i < start.length; i++) { // If any subexpressions didn't terminate, they don't count // TODO check if this code ever gets hit if ((start[i] == -1) ^ (end[i] == -1)) { start[i] = -1; end[i] = -1; } } next = null; // cut off alternates } /** Clears the current match and moves the offset to the new index. */ void clear(int index) { offset = index; this.index = 0; for (int i = 0; i < start.length; i++) { start[i] = end[i] = -1; } next = null; // cut off alternates } /** * Returns the string matching the pattern. This makes it convenient * to write code like the following: *
*
* REMatch myMatch = myExpression.getMatch(myString);
*/
public String toString() {
return matchedText;
}
/**
* Returns the index within the input text where the match in its entirety
* began.
*/
public int getStartIndex() {
return offset + start[0];
}
/**
* Returns the index within the input string where the match in
* its entirety ends. The return value is the next position after
* the end of the string; therefore, a match created by the
* following call:
*
*
* if (myMatch != null) System.out.println("Regexp found: "+myMatch);
*
* REMatch myMatch = myExpression.getMatch(myString);
*
* can be viewed (given that myMatch is not null) by creating *
* String theMatch = myString.substring(myMatch.getStartIndex(),
* myMatch.getEndIndex());
*
* But you can save yourself that work, since the toString()
* method (above) does exactly that for you.
*/
public int getEndIndex() {
return offset + end[0];
}
/**
* Returns the string matching the given subexpression. The subexpressions
* are indexed starting with one, not zero. That is, the subexpression
* identified by the first set of parentheses in a regular expression
* could be retrieved from an REMatch by calling match.toString(1).
*
* @param sub Index of the subexpression.
*/
public String toString(int sub) {
if ((sub >= start.length) || (start[sub] == -1)) return "";
return (matchedText.substring(start[sub],end[sub]));
}
/**
* Returns the index within the input string used to generate this match
* where subexpression number sub begins, or -1
if
* the subexpression does not exist. The initial position is zero.
*
* @param sub Subexpression index
* @deprecated Use getStartIndex(int) instead.
*/
public int getSubStartIndex(int sub) {
if (sub >= start.length) return -1;
int x = start[sub];
return (x == -1) ? x : offset + x;
}
/**
* Returns the index within the input string used to generate this match
* where subexpression number sub begins, or -1
if
* the subexpression does not exist. The initial position is zero.
*
* @param sub Subexpression index
* @since gnu.regexp 1.1.0
*/
public int getStartIndex(int sub) {
if (sub >= start.length) return -1;
int x = start[sub];
return (x == -1) ? x : offset + x;
}
/**
* Returns the index within the input string used to generate this match
* where subexpression number sub ends, or -1
if
* the subexpression does not exist. The initial position is zero.
*
* @param sub Subexpression index
* @deprecated Use getEndIndex(int) instead
*/
public int getSubEndIndex(int sub) {
if (sub >= start.length) return -1;
int x = end[sub];
return (x == -1) ? x : offset + x;
}
/**
* Returns the index within the input string used to generate this match
* where subexpression number sub ends, or -1
if
* the subexpression does not exist. The initial position is zero.
*
* @param sub Subexpression index
*/
public int getEndIndex(int sub) {
if (sub >= start.length) return -1;
int x = end[sub];
return (x == -1) ? x : offset + x;
}
/**
* Substitute the results of this match to create a new string.
* This is patterned after PERL, so the tokens to watch out for are
* $0
through $9
. $0
matches
* the full substring matched; $n
matches
* subexpression number n.
*
* @param input A string consisting of literals and $n
tokens.
*/
public String substituteInto(String input) {
// a la Perl, $0 is whole thing, $1 - $9 are subexpressions
StringBuffer output = new StringBuffer();
int pos;
for (pos = 0; pos < input.length()-1; pos++) {
if ((input.charAt(pos) == '$') && (Character.isDigit(input.charAt(pos+1)))) {
int val = Character.digit(input.charAt(++pos),10);
if (val < start.length) {
output.append(toString(val));
}
} else output.append(input.charAt(pos));
}
if (pos < input.length()) output.append(input.charAt(pos));
return output.toString();
}
}