Source for java.util.regex.Pattern

   1: /* Pattern.java -- Compiled regular expression ready to be applied.
   2:    Copyright (C) 2002, 2004, 2005 Free Software Foundation, Inc.
   3: 
   4: This file is part of GNU Classpath.
   5: 
   6: GNU Classpath is free software; you can redistribute it and/or modify
   7: it under the terms of the GNU General Public License as published by
   8: the Free Software Foundation; either version 2, or (at your option)
   9: any later version.
  10: 
  11: GNU Classpath is distributed in the hope that it will be useful, but
  12: WITHOUT ANY WARRANTY; without even the implied warranty of
  13: MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  14: General Public License for more details.
  15: 
  16: You should have received a copy of the GNU General Public License
  17: along with GNU Classpath; see the file COPYING.  If not, write to the
  18: Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
  19: 02110-1301 USA.
  20: 
  21: Linking this library statically or dynamically with other modules is
  22: making a combined work based on this library.  Thus, the terms and
  23: conditions of the GNU General Public License cover the whole
  24: combination.
  25: 
  26: As a special exception, the copyright holders of this library give you
  27: permission to link this library with independent modules to produce an
  28: executable, regardless of the license terms of these independent
  29: modules, and to copy and distribute the resulting executable under
  30: terms of your choice, provided that you also meet, for each linked
  31: independent module, the terms and conditions of the license of that
  32: module.  An independent module is a module which is not derived from
  33: or based on this library.  If you modify this library, you may extend
  34: this exception to your version of the library, but you are not
  35: obligated to do so.  If you do not wish to do so, delete this
  36: exception statement from your version. */
  37: 
  38: package java.util.regex;
  39: 
  40: import gnu.regexp.RE;
  41: import gnu.regexp.REException;
  42: import gnu.regexp.RESyntax;
  43: 
  44: import java.io.Serializable;
  45: import java.util.ArrayList;
  46: 
  47: 
  48: /**
  49:  * Compiled regular expression ready to be applied. 
  50:  *
  51:  * @since 1.4
  52:  */
  53: public final class Pattern implements Serializable
  54: {
  55:   private static final long serialVersionUID = 5073258162644648461L;
  56:   
  57:   public static final int CANON_EQ = 128;
  58:   public static final int CASE_INSENSITIVE = 2;
  59:   public static final int COMMENTS = 4;
  60:   public static final int DOTALL = 32;
  61:   public static final int MULTILINE = 8;
  62:   public static final int UNICODE_CASE = 64;
  63:   public static final int UNIX_LINES = 1;
  64:   
  65:   private final String regex;
  66:   private final int flags;
  67: 
  68:   private final RE re;
  69: 
  70:   private Pattern (String regex, int flags)
  71:     throws PatternSyntaxException
  72:   {
  73:     this.regex = regex;
  74:     this.flags = flags;
  75: 
  76:     int gnuFlags = 0;
  77:     if ((flags & CASE_INSENSITIVE) != 0)
  78:       gnuFlags |= RE.REG_ICASE;
  79:     if ((flags & MULTILINE) != 0)
  80:       gnuFlags |= RE.REG_MULTILINE;
  81:     if ((flags & DOTALL) != 0)
  82:       gnuFlags |= RE.REG_DOT_NEWLINE;
  83:     // not yet supported:
  84:     // if ((flags & UNICODE_CASE) != 0) gnuFlags =
  85:     // if ((flags & CANON_EQ) != 0) gnuFlags =
  86: 
  87:     RESyntax syntax = RESyntax.RE_SYNTAX_JAVA_1_4;
  88:     if ((flags & UNIX_LINES) != 0)
  89:       {
  90:     // Use a syntax set with \n for linefeeds?
  91:     syntax = new RESyntax(syntax);
  92:     syntax.setLineSeparator("\n");
  93:       }
  94: 
  95:     if ((flags & COMMENTS) != 0)
  96:       {
  97:     // Use a syntax with support for comments?
  98:       }
  99: 
 100:     try
 101:       {
 102:     this.re = new RE(regex, gnuFlags, syntax);
 103:       }
 104:     catch (REException e)
 105:       {
 106:     throw new PatternSyntaxException(e.getMessage(),
 107:                      regex, e.getPosition());
 108:       }
 109:   }
 110:  
 111:   // package private accessor method
 112:   RE getRE()
 113:   {
 114:     return re;
 115:   }
 116: 
 117:   /**
 118:    * @param regex The regular expression
 119:    *
 120:    * @exception PatternSyntaxException If the expression's syntax is invalid
 121:    */
 122:   public static Pattern compile (String regex)
 123:     throws PatternSyntaxException
 124:   {
 125:     return compile(regex, 0);
 126:   }
 127:   
 128:   /**
 129:    * @param regex The regular expression
 130:    * @param flags The match flags, a bit mask
 131:    *
 132:    * @exception PatternSyntaxException If the expression's syntax is invalid
 133:    * @exception IllegalArgumentException If bit values other than those
 134:    * corresponding to the defined match flags are set in flags
 135:    */
 136:   public static Pattern compile (String regex, int flags)
 137:     throws PatternSyntaxException
 138:   {
 139:     // FIXME: check which flags are really accepted
 140:     if ((flags & ~0xEF) != 0)
 141:       throw new IllegalArgumentException ();
 142:     
 143:     return new Pattern (regex, flags); 
 144:   }
 145:   
 146:   public int flags ()
 147:   {
 148:     return this.flags;
 149:   }
 150:   
 151:   /**
 152:    * @param regex The regular expression
 153:    * @param input The character sequence to be matched
 154:    *
 155:    * @exception PatternSyntaxException If the expression's syntax is invalid
 156:    */
 157:   public static boolean matches (String regex, CharSequence input) 
 158:   {
 159:     return compile(regex).matcher(input).matches();
 160:   }
 161:   
 162:   /**
 163:    * @param input The character sequence to be matched
 164:    */
 165:   public Matcher matcher (CharSequence input)
 166:   {
 167:     return new Matcher(this, input);
 168:   }
 169:   
 170:   /**
 171:    * @param input The character sequence to be matched
 172:    */
 173:   public String[] split (CharSequence input)
 174:   {
 175:     return split(input, 0);
 176:   }
 177:   
 178:   /**
 179:    * @param input The character sequence to be matched
 180:    * @param limit The result threshold
 181:    */
 182:   public String[] split (CharSequence input, int limit)
 183:   {
 184:     Matcher matcher = new Matcher(this, input);
 185:     ArrayList list = new ArrayList();
 186:     int empties = 0;
 187:     int count = 0;
 188:     int start = 0;
 189:     int end;
 190:     boolean matched = matcher.find();
 191: 
 192:     while (matched && (limit <= 0 || count < limit - 1))
 193:       {
 194:     ++count;
 195:     end = matcher.start();
 196:     if (start == end)
 197:       empties++;
 198:     else
 199:       {
 200:         while (empties > 0)
 201:           {
 202:         list.add("");
 203:         empties--;
 204:           }
 205: 
 206:         String text = input.subSequence(start, end).toString();
 207:         list.add(text);
 208:       }
 209:     start = matcher.end();
 210:     matched = matcher.find();
 211:       }
 212: 
 213:     // We matched nothing.
 214:     if (!matched && count == 0)
 215:       return new String[] { input.toString() };
 216:     
 217:     // Is the last token empty?
 218:     boolean emptyLast = (start == input.length());
 219: 
 220:     // Can/Must we add empties or an extra last token at the end?
 221:     if (list.size() < limit || limit < 0 || (limit == 0 && !emptyLast))
 222:       {
 223:     if (limit > list.size())
 224:       {
 225:         int max = limit - list.size();
 226:         empties = (empties > max) ? max : empties;
 227:       }
 228:     while (empties > 0)
 229:       {
 230:         list.add("");
 231:         empties--;
 232:       }
 233:       }
 234: 
 235:     // last token at end
 236:     if (limit != 0 || (limit == 0 && !emptyLast))
 237:       {
 238:     String t = input.subSequence(start, input.length()).toString();
 239:     if ("".equals(t) && limit == 0)
 240:       ; // Don't add.
 241:     else
 242:       list.add(t);
 243:       }
 244: 
 245:     String[] output = new String [list.size()];
 246:     list.toArray(output);
 247:     return output;
 248:   }
 249:   
 250:   public String pattern ()
 251:   {
 252:     return regex;
 253:   }
 254: }