Frames | No Frames |
1: /* gnu/regexp/REMatch.java 2: Copyright (C) 1998-2001, 2004 Free Software Foundation, Inc. 3: 4: This file is part of GNU Classpath. 5: 6: GNU Classpath is free software; you can redistribute it and/or modify 7: it under the terms of the GNU General Public License as published by 8: the Free Software Foundation; either version 2, or (at your option) 9: any later version. 10: 11: GNU Classpath is distributed in the hope that it will be useful, but 12: WITHOUT ANY WARRANTY; without even the implied warranty of 13: MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 14: General Public License for more details. 15: 16: You should have received a copy of the GNU General Public License 17: along with GNU Classpath; see the file COPYING. If not, write to the 18: Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 19: 02110-1301 USA. 20: 21: Linking this library statically or dynamically with other modules is 22: making a combined work based on this library. Thus, the terms and 23: conditions of the GNU General Public License cover the whole 24: combination. 25: 26: As a special exception, the copyright holders of this library give you 27: permission to link this library with independent modules to produce an 28: executable, regardless of the license terms of these independent 29: modules, and to copy and distribute the resulting executable under 30: terms of your choice, provided that you also meet, for each linked 31: independent module, the terms and conditions of the license of that 32: module. An independent module is a module which is not derived from 33: or based on this library. If you modify this library, you may extend 34: this exception to your version of the library, but you are not 35: obligated to do so. If you do not wish to do so, delete this 36: exception statement from your version. */ 37: 38: 39: package gnu.regexp; 40: import java.io.Serializable; 41: 42: /** 43: * An instance of this class represents a match 44: * completed by a gnu.regexp matching function. It can be used 45: * to obtain relevant information about the location of a match 46: * or submatch. 47: * 48: * @author <A HREF="mailto:wes@cacas.org">Wes Biggs</A> 49: */ 50: public final class REMatch implements Serializable, Cloneable { 51: private String matchedText; 52: 53: // These variables are package scope for fast access within the engine 54: int eflags; // execution flags this match was made using 55: 56: // Offset in source text where match was tried. This is zero-based; 57: // the actual position in the source text is given by (offset + anchor). 58: int offset; 59: 60: // Anchor position refers to the index into the source input 61: // at which the matching operation began. 62: // This is also useful for the ANCHORINDEX option. 63: int anchor; 64: 65: // Package scope; used by RE. 66: int index; // used while matching to mark current match position in input 67: int[] start; // start positions (relative to offset) for each (sub)exp. 68: int[] end; // end positions for the same 69: REMatch next; // other possibility (to avoid having to use arrays) 70: boolean empty; // empty string matched. This flag is used only within 71: // RETokenRepeated. 72: 73: public Object clone() { 74: try { 75: REMatch copy = (REMatch) super.clone(); 76: copy.next = null; 77: 78: copy.start = (int[]) start.clone(); 79: copy.end = (int[]) end.clone(); 80: 81: return copy; 82: } catch (CloneNotSupportedException e) { 83: throw new Error(); // doesn't happen 84: } 85: } 86: 87: void assignFrom(REMatch other) { 88: start = other.start; 89: end = other.end; 90: index = other.index; 91: // need to deep clone? 92: next = other.next; 93: } 94: 95: REMatch(int subs, int anchor, int eflags) { 96: start = new int[subs+1]; 97: end = new int[subs+1]; 98: this.anchor = anchor; 99: this.eflags = eflags; 100: clear(anchor); 101: } 102: 103: void finish(CharIndexed text) { 104: start[0] = 0; 105: StringBuffer sb = new StringBuffer(); 106: int i; 107: for (i = 0; i < end[0]; i++) 108: sb.append(text.charAt(i)); 109: matchedText = sb.toString(); 110: for (i = 0; i < start.length; i++) { 111: // If any subexpressions didn't terminate, they don't count 112: // TODO check if this code ever gets hit 113: if ((start[i] == -1) ^ (end[i] == -1)) { 114: start[i] = -1; 115: end[i] = -1; 116: } 117: } 118: next = null; // cut off alternates 119: } 120: 121: /** Clears the current match and moves the offset to the new index. */ 122: void clear(int index) { 123: offset = index; 124: this.index = 0; 125: for (int i = 0; i < start.length; i++) { 126: start[i] = end[i] = -1; 127: } 128: next = null; // cut off alternates 129: } 130: 131: /** 132: * Returns the string matching the pattern. This makes it convenient 133: * to write code like the following: 134: * <P> 135: * <code> 136: * REMatch myMatch = myExpression.getMatch(myString);<br> 137: * if (myMatch != null) System.out.println("Regexp found: "+myMatch); 138: * </code> 139: */ 140: public String toString() { 141: return matchedText; 142: } 143: 144: /** 145: * Returns the index within the input text where the match in its entirety 146: * began. 147: */ 148: public int getStartIndex() { 149: return offset + start[0]; 150: } 151: 152: /** 153: * Returns the index within the input string where the match in 154: * its entirety ends. The return value is the next position after 155: * the end of the string; therefore, a match created by the 156: * following call: 157: * 158: * <P> 159: * <code>REMatch myMatch = myExpression.getMatch(myString);</code> 160: * <P> 161: * can be viewed (given that myMatch is not null) by creating 162: * <P> 163: * <code>String theMatch = myString.substring(myMatch.getStartIndex(), 164: * myMatch.getEndIndex());</code> 165: * <P> 166: * But you can save yourself that work, since the <code>toString()</code> 167: * method (above) does exactly that for you. 168: */ 169: public int getEndIndex() { 170: return offset + end[0]; 171: } 172: 173: /** 174: * Returns the string matching the given subexpression. The subexpressions 175: * are indexed starting with one, not zero. That is, the subexpression 176: * identified by the first set of parentheses in a regular expression 177: * could be retrieved from an REMatch by calling match.toString(1). 178: * 179: * @param sub Index of the subexpression. 180: */ 181: public String toString(int sub) { 182: if ((sub >= start.length) || sub < 0) 183: throw new IndexOutOfBoundsException("No group " + sub); 184: if (start[sub] == -1) return null; 185: return (matchedText.substring(start[sub],end[sub])); 186: } 187: 188: /** 189: * Returns the index within the input string used to generate this match 190: * where subexpression number <i>sub</i> begins, or <code>-1</code> if 191: * the subexpression does not exist. The initial position is zero. 192: * 193: * @param sub Subexpression index 194: * @deprecated Use getStartIndex(int) instead. 195: */ 196: public int getSubStartIndex(int sub) { 197: if (sub >= start.length) return -1; 198: int x = start[sub]; 199: return (x == -1) ? x : offset + x; 200: } 201: 202: /** 203: * Returns the index within the input string used to generate this match 204: * where subexpression number <i>sub</i> begins, or <code>-1</code> if 205: * the subexpression does not exist. The initial position is zero. 206: * 207: * @param sub Subexpression index 208: * @since gnu.regexp 1.1.0 209: */ 210: public int getStartIndex(int sub) { 211: if (sub >= start.length) return -1; 212: int x = start[sub]; 213: return (x == -1) ? x : offset + x; 214: } 215: 216: /** 217: * Returns the index within the input string used to generate this match 218: * where subexpression number <i>sub</i> ends, or <code>-1</code> if 219: * the subexpression does not exist. The initial position is zero. 220: * 221: * @param sub Subexpression index 222: * @deprecated Use getEndIndex(int) instead 223: */ 224: public int getSubEndIndex(int sub) { 225: if (sub >= start.length) return -1; 226: int x = end[sub]; 227: return (x == -1) ? x : offset + x; 228: } 229: 230: /** 231: * Returns the index within the input string used to generate this match 232: * where subexpression number <i>sub</i> ends, or <code>-1</code> if 233: * the subexpression does not exist. The initial position is zero. 234: * 235: * @param sub Subexpression index 236: */ 237: public int getEndIndex(int sub) { 238: if (sub >= start.length) return -1; 239: int x = end[sub]; 240: return (x == -1) ? x : offset + x; 241: } 242: 243: /** 244: * Substitute the results of this match to create a new string. 245: * This is patterned after PERL, so the tokens to watch out for are 246: * <code>$0</code> through <code>$9</code>. <code>$0</code> matches 247: * the full substring matched; <code>$<i>n</i></code> matches 248: * subexpression number <i>n</i>. 249: * <code>$10, $11, ...</code> may match the 10th, 11th, ... subexpressions 250: * if such subexpressions exist. 251: * 252: * @param input A string consisting of literals and <code>$<i>n</i></code> tokens. 253: */ 254: public String substituteInto(String input) { 255: // a la Perl, $0 is whole thing, $1 - $9 are subexpressions 256: StringBuffer output = new StringBuffer(); 257: int pos; 258: for (pos = 0; pos < input.length()-1; pos++) { 259: if ((input.charAt(pos) == '$') && (Character.isDigit(input.charAt(pos+1)))) { 260: int val = Character.digit(input.charAt(++pos),10); 261: int pos1 = pos + 1; 262: while (pos1 < input.length() && 263: Character.isDigit(input.charAt(pos1))) { 264: int val1 = val*10 + Character.digit(input.charAt(pos1),10); 265: if (val1 >= start.length) break; 266: pos1++; 267: val = val1; 268: } 269: pos = pos1 - 1; 270: 271: if (val < start.length) { 272: output.append(toString(val)); 273: } 274: } else output.append(input.charAt(pos)); 275: } 276: if (pos < input.length()) output.append(input.charAt(pos)); 277: return output.toString(); 278: } 279: 280: static class REMatchList { 281: REMatch head; 282: REMatch tail; 283: REMatchList() { 284: head = tail = null; 285: } 286: /* Not used now. But we may need this some day? 287: void addHead(REMatch newone) { 288: if (head == null) { 289: head = newone; 290: tail = newone; 291: while (tail.next != null) { 292: tail = tail.next; 293: } 294: } 295: else { 296: REMatch tmp = newone; 297: while (tmp.next != null) tmp = tmp.next; 298: tmp.next = head; 299: head = newone; 300: } 301: } 302: */ 303: void addTail(REMatch newone) { 304: if (head == null) { 305: head = newone; 306: tail = newone; 307: } 308: else { 309: tail.next = newone; 310: } 311: while (tail.next != null) { 312: tail = tail.next; 313: } 314: } 315: } 316: 317: }