Source for gnu.regexp.REMatch

   1: /* gnu/regexp/REMatch.java
   2:    Copyright (C) 1998-2001, 2004 Free Software Foundation, Inc.
   3: 
   4: This file is part of GNU Classpath.
   5: 
   6: GNU Classpath is free software; you can redistribute it and/or modify
   7: it under the terms of the GNU General Public License as published by
   8: the Free Software Foundation; either version 2, or (at your option)
   9: any later version.
  10: 
  11: GNU Classpath is distributed in the hope that it will be useful, but
  12: WITHOUT ANY WARRANTY; without even the implied warranty of
  13: MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  14: General Public License for more details.
  15: 
  16: You should have received a copy of the GNU General Public License
  17: along with GNU Classpath; see the file COPYING.  If not, write to the
  18: Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
  19: 02110-1301 USA.
  20: 
  21: Linking this library statically or dynamically with other modules is
  22: making a combined work based on this library.  Thus, the terms and
  23: conditions of the GNU General Public License cover the whole
  24: combination.
  25: 
  26: As a special exception, the copyright holders of this library give you
  27: permission to link this library with independent modules to produce an
  28: executable, regardless of the license terms of these independent
  29: modules, and to copy and distribute the resulting executable under
  30: terms of your choice, provided that you also meet, for each linked
  31: independent module, the terms and conditions of the license of that
  32: module.  An independent module is a module which is not derived from
  33: or based on this library.  If you modify this library, you may extend
  34: this exception to your version of the library, but you are not
  35: obligated to do so.  If you do not wish to do so, delete this
  36: exception statement from your version. */
  37: 
  38: 
  39: package gnu.regexp;
  40: import java.io.Serializable;
  41: 
  42: /**
  43:  * An instance of this class represents a match
  44:  * completed by a gnu.regexp matching function. It can be used
  45:  * to obtain relevant information about the location of a match
  46:  * or submatch.
  47:  *
  48:  * @author <A HREF="mailto:wes@cacas.org">Wes Biggs</A>
  49:  */
  50: public final class REMatch implements Serializable, Cloneable {
  51:     private String matchedText;
  52: 
  53:     // These variables are package scope for fast access within the engine
  54:     int eflags; // execution flags this match was made using
  55: 
  56:     // Offset in source text where match was tried.  This is zero-based;
  57:     // the actual position in the source text is given by (offset + anchor).
  58:     int offset;
  59: 
  60:     // Anchor position refers to the index into the source input
  61:     // at which the matching operation began.
  62:     // This is also useful for the ANCHORINDEX option.
  63:     int anchor;
  64: 
  65:     // Package scope; used by RE.
  66:     int index; // used while matching to mark current match position in input
  67:     int[] start; // start positions (relative to offset) for each (sub)exp.
  68:     int[] end;   // end positions for the same
  69:     REMatch next; // other possibility (to avoid having to use arrays)
  70:     boolean empty; // empty string matched. This flag is used only within
  71:            // RETokenRepeated.
  72: 
  73:     public Object clone() {
  74:     try {
  75:         REMatch copy = (REMatch) super.clone();
  76:         copy.next = null;
  77: 
  78:         copy.start = (int[]) start.clone();
  79:         copy.end = (int[]) end.clone();
  80: 
  81:         return copy;
  82:     } catch (CloneNotSupportedException e) {
  83:         throw new Error(); // doesn't happen
  84:     }
  85:     }
  86: 
  87:     void assignFrom(REMatch other) {
  88:     start = other.start;
  89:     end = other.end;
  90:     index = other.index;
  91:     // need to deep clone?
  92:     next = other.next;
  93:     }
  94: 
  95:     REMatch(int subs, int anchor, int eflags) {
  96:     start = new int[subs+1];
  97:     end = new int[subs+1];
  98:     this.anchor = anchor;
  99:     this.eflags = eflags;
 100:     clear(anchor);
 101:     }
 102: 
 103:     void finish(CharIndexed text) {
 104:     start[0] = 0;
 105:     StringBuffer sb = new StringBuffer();
 106:     int i;
 107:     for (i = 0; i < end[0]; i++)
 108:         sb.append(text.charAt(i));
 109:     matchedText = sb.toString();
 110:     for (i = 0; i < start.length; i++) {
 111:         // If any subexpressions didn't terminate, they don't count
 112:         // TODO check if this code ever gets hit
 113:         if ((start[i] == -1) ^ (end[i] == -1)) {
 114:         start[i] = -1;
 115:         end[i] = -1;
 116:         }
 117:     }
 118:     next = null; // cut off alternates
 119:     }
 120:     
 121:     /** Clears the current match and moves the offset to the new index. */
 122:     void clear(int index) {
 123:     offset = index;
 124:     this.index = 0;
 125:     for (int i = 0; i < start.length; i++) {
 126:         start[i] = end[i] = -1;
 127:     }
 128:     next = null; // cut off alternates
 129:     }
 130:     
 131:     /**
 132:      * Returns the string matching the pattern.  This makes it convenient
 133:      * to write code like the following:
 134:      * <P>
 135:      * <code> 
 136:      * REMatch myMatch = myExpression.getMatch(myString);<br>
 137:      * if (myMatch != null) System.out.println("Regexp found: "+myMatch);
 138:      * </code>
 139:      */
 140:     public String toString() {
 141:     return matchedText;
 142:     }
 143:     
 144:     /**
 145:      * Returns the index within the input text where the match in its entirety
 146:      * began.
 147:      */
 148:     public int getStartIndex() {
 149:     return offset + start[0];
 150:     }
 151:     
 152:     /**
 153:      * Returns the index within the input string where the match in
 154:      * its entirety ends.  The return value is the next position after
 155:      * the end of the string; therefore, a match created by the
 156:      * following call:
 157:      *
 158:      * <P>
 159:      * <code>REMatch myMatch = myExpression.getMatch(myString);</code>
 160:      * <P>
 161:      * can be viewed (given that myMatch is not null) by creating
 162:      * <P>
 163:      * <code>String theMatch = myString.substring(myMatch.getStartIndex(),
 164:      * myMatch.getEndIndex());</code>
 165:      * <P>
 166:      * But you can save yourself that work, since the <code>toString()</code>
 167:      * method (above) does exactly that for you.  
 168:      */
 169:     public int getEndIndex() {
 170:     return offset + end[0];
 171:     }
 172:   
 173:     /**
 174:      * Returns the string matching the given subexpression.  The subexpressions
 175:      * are indexed starting with one, not zero.  That is, the subexpression
 176:      * identified by the first set of parentheses in a regular expression
 177:      * could be retrieved from an REMatch by calling match.toString(1).
 178:      *
 179:      * @param sub Index of the subexpression.
 180:      */
 181:     public String toString(int sub) {
 182:     if ((sub >= start.length) || sub < 0)
 183:         throw new IndexOutOfBoundsException("No group " + sub);
 184:     if (start[sub] == -1) return null;
 185:     return (matchedText.substring(start[sub],end[sub]));
 186:     }
 187:     
 188:     /** 
 189:      * Returns the index within the input string used to generate this match
 190:      * where subexpression number <i>sub</i> begins, or <code>-1</code> if
 191:      * the subexpression does not exist.  The initial position is zero.
 192:      *
 193:      * @param sub Subexpression index
 194:      * @deprecated Use getStartIndex(int) instead.
 195:      */
 196:     public int getSubStartIndex(int sub) {
 197:     if (sub >= start.length) return -1;
 198:     int x = start[sub];
 199:     return (x == -1) ? x : offset + x;
 200:     }
 201:     
 202:     /** 
 203:      * Returns the index within the input string used to generate this match
 204:      * where subexpression number <i>sub</i> begins, or <code>-1</code> if
 205:      * the subexpression does not exist.  The initial position is zero.
 206:      *
 207:      * @param sub Subexpression index
 208:      * @since gnu.regexp 1.1.0
 209:      */
 210:     public int getStartIndex(int sub) {
 211:     if (sub >= start.length) return -1;
 212:     int x = start[sub];
 213:     return (x == -1) ? x : offset + x;
 214:     }
 215:   
 216:     /** 
 217:      * Returns the index within the input string used to generate this match
 218:      * where subexpression number <i>sub</i> ends, or <code>-1</code> if
 219:      * the subexpression does not exist.  The initial position is zero.
 220:      *
 221:      * @param sub Subexpression index
 222:      * @deprecated Use getEndIndex(int) instead
 223:      */
 224:     public int getSubEndIndex(int sub) {
 225:     if (sub >= start.length) return -1;
 226:     int x = end[sub];
 227:     return (x == -1) ? x : offset + x;
 228:     }
 229:     
 230:     /** 
 231:      * Returns the index within the input string used to generate this match
 232:      * where subexpression number <i>sub</i> ends, or <code>-1</code> if
 233:      * the subexpression does not exist.  The initial position is zero.
 234:      *
 235:      * @param sub Subexpression index
 236:      */
 237:     public int getEndIndex(int sub) {
 238:     if (sub >= start.length) return -1;
 239:     int x = end[sub];
 240:     return (x == -1) ? x : offset + x;
 241:     }
 242:     
 243:     /**
 244:      * Substitute the results of this match to create a new string.
 245:      * This is patterned after PERL, so the tokens to watch out for are
 246:      * <code>$0</code> through <code>$9</code>.  <code>$0</code> matches
 247:      * the full substring matched; <code>$<i>n</i></code> matches
 248:      * subexpression number <i>n</i>.
 249:      * <code>$10, $11, ...</code> may match the 10th, 11th, ... subexpressions
 250:      * if such subexpressions exist.
 251:      *
 252:      * @param input A string consisting of literals and <code>$<i>n</i></code> tokens.
 253:      */
 254:     public String substituteInto(String input) {
 255:     // a la Perl, $0 is whole thing, $1 - $9 are subexpressions
 256:     StringBuffer output = new StringBuffer();
 257:     int pos;
 258:     for (pos = 0; pos < input.length()-1; pos++) {
 259:         if ((input.charAt(pos) == '$') && (Character.isDigit(input.charAt(pos+1)))) {
 260:         int val = Character.digit(input.charAt(++pos),10);
 261:         int pos1 = pos + 1;
 262:         while (pos1 < input.length() &&
 263:                Character.isDigit(input.charAt(pos1))) {
 264:             int val1 = val*10 + Character.digit(input.charAt(pos1),10);
 265:             if (val1 >= start.length) break;
 266:             pos1++;
 267:             val = val1;
 268:         }
 269:         pos = pos1 - 1;
 270: 
 271:         if (val < start.length) {
 272:             output.append(toString(val));
 273:         } 
 274:         } else output.append(input.charAt(pos));
 275:     }
 276:     if (pos < input.length()) output.append(input.charAt(pos));
 277:     return output.toString();
 278:     }
 279: 
 280:     static class REMatchList {
 281:         REMatch head;
 282:     REMatch tail;
 283:         REMatchList() {
 284:         head = tail = null;
 285:     }
 286:     /* Not used now. But we may need this some day?
 287:     void addHead(REMatch newone) {
 288:             if (head == null) {
 289:                 head = newone;
 290:                 tail = newone;
 291:                 while (tail.next != null) {
 292:                     tail = tail.next;
 293:                 }
 294:             }
 295:         else {
 296:                 REMatch tmp = newone;
 297:                 while (tmp.next != null) tmp = tmp.next;
 298:                 tmp.next = head;
 299:             head = newone;
 300:         }
 301:     }
 302:     */
 303:     void addTail(REMatch newone) {
 304:             if (head == null) {
 305:                 head = newone;
 306:                 tail = newone;
 307:             }
 308:             else {
 309:                 tail.next = newone;
 310:             }
 311:             while (tail.next != null) {
 312:                 tail = tail.next;
 313:             }
 314:     }
 315:     }
 316: 
 317: }