Source for java.text.CollationElementIterator

   1: /* CollationElementIterator.java -- Walks through collation elements
   2:    Copyright (C) 1998, 1999, 2001, 2002, 2003, 2004  Free Software Foundation
   3: 
   4: This file is part of GNU Classpath.
   5: 
   6: GNU Classpath is free software; you can redistribute it and/or modify
   7: it under the terms of the GNU General Public License as published by
   8: the Free Software Foundation; either version 2, or (at your option)
   9: any later version.
  10:  
  11: GNU Classpath is distributed in the hope that it will be useful, but
  12: WITHOUT ANY WARRANTY; without even the implied warranty of
  13: MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  14: General Public License for more details.
  15: 
  16: You should have received a copy of the GNU General Public License
  17: along with GNU Classpath; see the file COPYING.  If not, write to the
  18: Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
  19: 02110-1301 USA.
  20: 
  21: Linking this library statically or dynamically with other modules is
  22: making a combined work based on this library.  Thus, the terms and
  23: conditions of the GNU General Public License cover the whole
  24: combination.
  25: 
  26: As a special exception, the copyright holders of this library give you
  27: permission to link this library with independent modules to produce an
  28: executable, regardless of the license terms of these independent
  29: modules, and to copy and distribute the resulting executable under
  30: terms of your choice, provided that you also meet, for each linked
  31: independent module, the terms and conditions of the license of that
  32: module.  An independent module is a module which is not derived from
  33: or based on this library.  If you modify this library, you may extend
  34: this exception to your version of the library, but you are not
  35: obligated to do so.  If you do not wish to do so, delete this
  36: exception statement from your version. */
  37: 
  38: 
  39: package java.text;
  40: 
  41: import gnu.java.lang.CPStringBuilder;
  42: 
  43: import java.util.ArrayList;
  44: 
  45: /* Written using "Java Class Libraries", 2nd edition, plus online
  46:  * API docs for JDK 1.2 from http://www.javasoft.com.
  47:  * Status: Believed complete and correct to JDK 1.1.
  48:  */
  49: 
  50: /**
  51:  * This class walks through the character collation elements of a 
  52:  * <code>String</code> as defined by the collation rules in an instance of 
  53:  * <code>RuleBasedCollator</code>.  There is no public constructor for
  54:  * this class.  An instance is created by calling the
  55:  * <code>getCollationElementIterator</code> method on 
  56:  * <code>RuleBasedCollator</code>.
  57:  *
  58:  * @author Aaron M. Renn (arenn@urbanophile.com)
  59:  * @author Tom Tromey (tromey@cygnus.com)
  60:  * @author Guilhem Lavaux (guilhem.lavaux@free.fr)
  61:  */
  62: public final class CollationElementIterator
  63: {
  64:   /**
  65:    * This is a constant value that is returned to indicate that the end of 
  66:    * the string was encountered.
  67:    */
  68:   public static final int NULLORDER = -1;
  69: 
  70:   /**
  71:    * This is the RuleBasedCollator this object was created from.
  72:    */
  73:   RuleBasedCollator collator;
  74: 
  75:   /**
  76:    * This is the String that is being iterated over.
  77:    */
  78:   CharacterIterator text;
  79: 
  80:   /**
  81:    * This is the index into the collation decomposition where we are currently scanning.
  82:    */
  83:   int index;
  84: 
  85:   /**
  86:    * This is the index into the String where we are currently scanning.
  87:    */
  88:   int textIndex;
  89: 
  90:   /**
  91:    * Array containing the collation decomposition of the
  92:    * text given to the constructor.
  93:    */
  94:   private RuleBasedCollator.CollationElement[] text_decomposition;
  95: 
  96:   /**
  97:    * Array containing the index of the specified block.
  98:    */
  99:   private int[] text_indexes;
 100: 
 101:   /**
 102:    * This method initializes a new instance of <code>CollationElementIterator</code>
 103:    * to iterate over the specified <code>String</code> using the rules in the
 104:    * specified <code>RuleBasedCollator</code>.
 105:    *
 106:    * @param collator The <code>RuleBasedCollation</code> used for calculating collation values
 107:    * @param text The <code>String</code> to iterate over.
 108:    */
 109:   CollationElementIterator(RuleBasedCollator collator, String text)
 110:   {
 111:     this.collator = collator;
 112:     
 113:     setText (text);    
 114:   }
 115: 
 116:   /**
 117:    * This method initializes a new instance of <code>CollationElementIterator</code>
 118:    * to iterate over the specified <code>String</code> using the rules in the
 119:    * specified <code>RuleBasedCollator</code>.
 120:    *
 121:    * @param collator The <code>RuleBasedCollation</code> used for calculating collation values
 122:    * @param text The character iterator to iterate over.
 123:    */
 124:   CollationElementIterator(RuleBasedCollator collator, CharacterIterator text)
 125:   {
 126:     this.collator = collator;
 127:     
 128:     setText (text);    
 129:   }
 130: 
 131:   RuleBasedCollator.CollationElement nextBlock()
 132:   {
 133:     if (index >= text_decomposition.length)
 134:       return null;
 135:     
 136:     RuleBasedCollator.CollationElement e = text_decomposition[index];
 137:     
 138:     textIndex = text_indexes[index+1];
 139: 
 140:     index++;
 141: 
 142:     return e;
 143:   }
 144: 
 145:   RuleBasedCollator.CollationElement previousBlock()
 146:   {
 147:     if (index == 0)
 148:       return null;
 149:     
 150:     index--;
 151:     RuleBasedCollator.CollationElement e = text_decomposition[index];
 152: 
 153:     textIndex = text_indexes[index+1];
 154:     
 155:     return e;
 156:   }
 157: 
 158:   /**
 159:    * This method returns the collation ordering value of the next character sequence
 160:    * in the string (it may be an extended character following collation rules).
 161:    * This method will return <code>NULLORDER</code> if the
 162:    * end of the string was reached.
 163:    *
 164:    * @return The collation ordering value.
 165:    */
 166:   public int next()
 167:   {
 168:     RuleBasedCollator.CollationElement e = nextBlock();
 169: 
 170:     if (e == null)
 171:       return NULLORDER;
 172:     
 173:     return e.getValue();
 174:   }
 175: 
 176:   /**
 177:    * This method returns the collation ordering value of the previous character
 178:    * in the string.  This method will return <code>NULLORDER</code> if the
 179:    * beginning of the string was reached.
 180:    *
 181:    * @return The collation ordering value.
 182:    */
 183:   public int previous()
 184:   {
 185:     RuleBasedCollator.CollationElement e = previousBlock();
 186: 
 187:     if (e == null)
 188:       return NULLORDER;
 189:     
 190:     return e.getValue();
 191:   }
 192: 
 193:   /**
 194:    * This method returns the primary order value for the given collation
 195:    * value.
 196:    *
 197:    * @param order The collation value returned from <code>next()</code> or 
 198:    *              <code>previous()</code>.
 199:    *
 200:    * @return The primary order value of the specified collation value.  This is
 201:    *         the high 16 bits.
 202:    */
 203:   public static int primaryOrder(int order)
 204:   {
 205:     // From the JDK 1.2 spec.
 206:     return order >>> 16;
 207:   }
 208: 
 209:   /**
 210:    * This method resets the internal position pointer to read from the
 211:    * beginning of the <code>String</code> again.
 212:    */
 213:   public void reset()
 214:   {
 215:     index = 0;
 216:     textIndex = 0;
 217:   }
 218: 
 219:   /**
 220:    * This method returns the secondary order value for the given collation
 221:    * value.
 222:    *
 223:    * @param order The collation value returned from <code>next()</code> or 
 224:    *              <code>previous()</code>.
 225:    *
 226:    * @return The secondary order value of the specified collation value.  This 
 227:    *         is the bits 8-15.
 228:    */
 229:   public static short secondaryOrder(int order)
 230:   {
 231:     // From the JDK 1.2 spec.
 232:     return (short) ((order >>> 8) & 255);
 233:   }
 234: 
 235:   /**
 236:    * This method returns the tertiary order value for the given collation
 237:    * value.
 238:    *
 239:    * @param order The collation value returned from <code>next()</code> or 
 240:    *              <code>previous()</code>.
 241:    *
 242:    * @return The tertiary order value of the specified collation value.  This 
 243:    *         is the low eight bits.
 244:    */
 245:   public static short tertiaryOrder(int order)
 246:   {
 247:     // From the JDK 1.2 spec.
 248:     return (short) (order & 255);
 249:   }
 250: 
 251:   /**
 252:    * This method sets the <code>String</code> that it is iterating over
 253:    * to the specified <code>String</code>.
 254:    *
 255:    * @param text The new <code>String</code> to iterate over.
 256:    *
 257:    * @since 1.2
 258:    */
 259:   public void setText(String text)
 260:   {
 261:     int idx = 0;
 262:     int idx_idx = 0;
 263:     int alreadyExpanded = 0;
 264:     int idxToMove = 0;
 265: 
 266:     this.text = new StringCharacterIterator(text);
 267:     this.index = 0;
 268: 
 269:     String work_text = text.intern();
 270: 
 271:     ArrayList a_element = new ArrayList();
 272:     ArrayList a_idx = new ArrayList();
 273: 
 274:     // Build element collection ordered as they come in "text".
 275:     while (idx < work_text.length())
 276:       {
 277:     String key, key_old;
 278: 
 279:     Object object = null;
 280:     int p = 1;
 281:     
 282:     // IMPROVE: use a TreeMap with a prefix-ordering rule.
 283:     key_old = key = null;
 284:     do
 285:       {
 286:         if (object != null)
 287:           key_old = key;
 288:         key = work_text.substring (idx, idx+p);
 289:         object = collator.prefix_tree.get (key);
 290:         if (object != null && idx < alreadyExpanded)
 291:           {
 292:         RuleBasedCollator.CollationElement prefix = (RuleBasedCollator.CollationElement)object;
 293:         if (prefix.expansion != null && 
 294:             prefix.expansion.startsWith(work_text.substring(0, idx)))
 295:         {
 296:           object = null;
 297:           key = key_old;
 298:         }
 299:           }
 300:         p++;
 301:       }
 302:     while (idx+p <= work_text.length());
 303:     
 304:     if (object == null)
 305:       key = key_old;
 306:     
 307:     RuleBasedCollator.CollationElement prefix =
 308:       (RuleBasedCollator.CollationElement) collator.prefix_tree.get (key);
 309: 
 310:     /*
 311:      * First case: There is no such sequence in the database.
 312:      * We will have to build one from the context.
 313:      */
 314:     if (prefix == null)
 315:       {
 316:         /*
 317:          * We are dealing with sequences in an expansion. They
 318:          * are treated as accented characters (tertiary order).
 319:          */
 320:         if (alreadyExpanded > 0)
 321:           {
 322:         RuleBasedCollator.CollationElement e =
 323:           collator.getDefaultAccentedElement (work_text.charAt (idx));
 324:         
 325:         a_element.add (e);
 326:         a_idx.add (new Integer(idx_idx));
 327:         idx++;
 328:         alreadyExpanded--;
 329:         if (alreadyExpanded == 0)
 330:           {
 331:             /* There is not any characters left in the expansion set.
 332:              * We can increase the pointer in the source string.
 333:              */
 334:             idx_idx += idxToMove;
 335:             idxToMove = 0; 
 336:           }
 337:         else
 338:           idx_idx++;
 339:           }
 340:         else
 341:           {
 342:         /* This is a normal character. */
 343:         RuleBasedCollator.CollationElement e =
 344:           collator.getDefaultElement (work_text.charAt (idx));
 345:         Integer i_ref = new Integer(idx_idx);
 346: 
 347:         /* Don't forget to mark it as a special sequence so the
 348:          * string can be ordered.
 349:          */
 350:         a_element.add (RuleBasedCollator.SPECIAL_UNKNOWN_SEQ);
 351:         a_idx.add (i_ref);
 352:         a_element.add (e);
 353:         a_idx.add (i_ref);
 354:         idx_idx++;
 355:         idx++;
 356:           }
 357:         continue;
 358:       }
 359:  
 360:     /*
 361:      * Second case: Here we have found a matching sequence.
 362:      * Here we have an expansion string prepend it to the "work text" and
 363:      * add the corresponding sorting element. We must also mark 
 364:      */
 365:     if (prefix.expansion != null)
 366:       {
 367:         work_text = prefix.expansion
 368:           + work_text.substring (idx+prefix.key.length());
 369:         idx = 0;
 370:         a_element.add (prefix);
 371:         a_idx.add (new Integer(idx_idx));
 372:         if (alreadyExpanded == 0)
 373:           idxToMove = prefix.key.length();
 374:         alreadyExpanded += prefix.expansion.length()-prefix.key.length();
 375:       }
 376:     else
 377:       {
 378:         /* Third case: the simplest. We have got the prefix and it
 379:          * has not to be expanded.
 380:          */
 381:         a_element.add (prefix);
 382:         a_idx.add (new Integer(idx_idx));
 383:         idx += prefix.key.length();
 384:         /* If the sequence is in an expansion, we must decrease the
 385:          * counter.
 386:          */
 387:         if (alreadyExpanded > 0)
 388:           {
 389:         alreadyExpanded -= prefix.key.length();
 390:         if (alreadyExpanded == 0)
 391:           {
 392:             idx_idx += idxToMove;
 393:             idxToMove = 0;
 394:           }
 395:           }
 396:         else
 397:           idx_idx += prefix.key.length();
 398:       }
 399:       }
 400:     
 401:     text_decomposition = (RuleBasedCollator.CollationElement[])
 402:        a_element.toArray(new RuleBasedCollator.CollationElement[a_element.size()]);
 403:     text_indexes = new int[a_idx.size()+1];
 404:     for (int i = 0; i < a_idx.size(); i++) 
 405:       {
 406:     text_indexes[i] = ((Integer)a_idx.get(i)).intValue();
 407:       }
 408:     text_indexes[a_idx.size()] = text.length();
 409:   }
 410: 
 411:   /**
 412:    * This method sets the <code>String</code> that it is iterating over
 413:    * to the <code>String</code> represented by the specified
 414:    * <code>CharacterIterator</code>.
 415:    *
 416:    * @param source The <code>CharacterIterator</code> containing the new
 417:    * <code>String</code> to iterate over.
 418:    */
 419:   public void setText(CharacterIterator source)
 420:   {
 421:     CPStringBuilder expand = new CPStringBuilder();
 422: 
 423:     // For now assume we read from the beginning of the string.
 424:     for (char c = source.first();
 425:      c != CharacterIterator.DONE;
 426:      c = source.next())
 427:       expand.append(c);
 428: 
 429:     setText(expand.toString());
 430:   }
 431: 
 432:   /**
 433:    * This method returns the current offset into the <code>String</code>
 434:    * that is being iterated over.
 435:    *
 436:    * @return The iteration index position.
 437:    *
 438:    * @since 1.2
 439:    */
 440:   public int getOffset()
 441:   {
 442:     return textIndex;
 443:   }
 444: 
 445:   /**
 446:    * This method sets the iteration index position into the current
 447:    * <code>String</code> to the specified value.  This value must not
 448:    * be negative and must not be greater than the last index position
 449:    * in the <code>String</code>.
 450:    *
 451:    * @param offset The new iteration index position.
 452:    *
 453:    * @exception IllegalArgumentException If the new offset is not valid.
 454:    */
 455:   public void setOffset(int offset)
 456:   {
 457:     if (offset < 0)
 458:       throw new IllegalArgumentException("Negative offset: " + offset);
 459: 
 460:     if (offset > (text.getEndIndex() - 1))
 461:       throw new IllegalArgumentException("Offset too large: " + offset);
 462:     
 463:     for (index = 0; index < text_decomposition.length; index++)
 464:       {    
 465:     if (offset <= text_indexes[index])
 466:       break;
 467:       }
 468:     /*
 469:      * As text_indexes[0] == 0, we should not have to take care whether index is
 470:      * greater than 0. It is always.
 471:      */
 472:     if (text_indexes[index] == offset)
 473:       textIndex = offset;
 474:     else
 475:       textIndex = text_indexes[index-1];
 476:   }
 477: 
 478:   /**
 479:    * This method returns the maximum length of any expansion sequence that
 480:    * ends with the specified collation order value.  (Whatever that means).
 481:    *
 482:    * @param value The collation order value
 483:    *
 484:    * @return The maximum length of an expansion sequence.
 485:    */
 486:   public int getMaxExpansion(int value)
 487:   {
 488:     return 1;
 489:   }
 490: }