Source for java.text.CollationElementIterator

   1: /* CollationElementIterator.java -- Walks through collation elements
   2:    Copyright (C) 1998, 1999, 2001, 2002, 2003, 2004  Free Software Foundation
   3: 
   4: This file is part of GNU Classpath.
   5: 
   6: GNU Classpath is free software; you can redistribute it and/or modify
   7: it under the terms of the GNU General Public License as published by
   8: the Free Software Foundation; either version 2, or (at your option)
   9: any later version.
  10:  
  11: GNU Classpath is distributed in the hope that it will be useful, but
  12: WITHOUT ANY WARRANTY; without even the implied warranty of
  13: MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  14: General Public License for more details.
  15: 
  16: You should have received a copy of the GNU General Public License
  17: along with GNU Classpath; see the file COPYING.  If not, write to the
  18: Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
  19: 02110-1301 USA.
  20: 
  21: Linking this library statically or dynamically with other modules is
  22: making a combined work based on this library.  Thus, the terms and
  23: conditions of the GNU General Public License cover the whole
  24: combination.
  25: 
  26: As a special exception, the copyright holders of this library give you
  27: permission to link this library with independent modules to produce an
  28: executable, regardless of the license terms of these independent
  29: modules, and to copy and distribute the resulting executable under
  30: terms of your choice, provided that you also meet, for each linked
  31: independent module, the terms and conditions of the license of that
  32: module.  An independent module is a module which is not derived from
  33: or based on this library.  If you modify this library, you may extend
  34: this exception to your version of the library, but you are not
  35: obligated to do so.  If you do not wish to do so, delete this
  36: exception statement from your version. */
  37: 
  38: 
  39: package java.text;
  40: 
  41: import java.util.ArrayList;
  42: 
  43: /* Written using "Java Class Libraries", 2nd edition, plus online
  44:  * API docs for JDK 1.2 from http://www.javasoft.com.
  45:  * Status: Believed complete and correct to JDK 1.1.
  46:  */
  47: 
  48: /**
  49:  * This class walks through the character collation elements of a 
  50:  * <code>String</code> as defined by the collation rules in an instance of 
  51:  * <code>RuleBasedCollator</code>.  There is no public constructor for
  52:  * this class.  An instance is created by calling the
  53:  * <code>getCollationElementIterator</code> method on 
  54:  * <code>RuleBasedCollator</code>.
  55:  *
  56:  * @author Aaron M. Renn (arenn@urbanophile.com)
  57:  * @author Tom Tromey (tromey@cygnus.com)
  58:  * @author Guilhem Lavaux (guilhem.lavaux@free.fr)
  59:  */
  60: public final class CollationElementIterator
  61: {
  62:   /**
  63:    * This is a constant value that is returned to indicate that the end of 
  64:    * the string was encountered.
  65:    */
  66:   public static final int NULLORDER = -1;
  67: 
  68:   /**
  69:    * This is the RuleBasedCollator this object was created from.
  70:    */
  71:   RuleBasedCollator collator;
  72: 
  73:   /**
  74:    * This is the String that is being iterated over.
  75:    */
  76:   CharacterIterator text;
  77: 
  78:   /**
  79:    * This is the index into the collation decomposition where we are currently scanning.
  80:    */
  81:   int index;
  82: 
  83:   /**
  84:    * This is the index into the String where we are currently scanning.
  85:    */
  86:   int textIndex;
  87: 
  88:   /**
  89:    * Array containing the collation decomposition of the
  90:    * text given to the constructor.
  91:    */
  92:   private RuleBasedCollator.CollationElement[] text_decomposition;
  93: 
  94:   /**
  95:    * Array containing the index of the specified block.
  96:    */
  97:   private int[] text_indexes;
  98: 
  99:   /**
 100:    * This method initializes a new instance of <code>CollationElementIterator</code>
 101:    * to iterate over the specified <code>String</code> using the rules in the
 102:    * specified <code>RuleBasedCollator</code>.
 103:    *
 104:    * @param collator The <code>RuleBasedCollation</code> used for calculating collation values
 105:    * @param text The <code>String</code> to iterate over.
 106:    */
 107:   CollationElementIterator(RuleBasedCollator collator, String text)
 108:   {
 109:     this.collator = collator;
 110:     
 111:     setText (text);    
 112:   }
 113: 
 114:   /**
 115:    * This method initializes a new instance of <code>CollationElementIterator</code>
 116:    * to iterate over the specified <code>String</code> using the rules in the
 117:    * specified <code>RuleBasedCollator</code>.
 118:    *
 119:    * @param collator The <code>RuleBasedCollation</code> used for calculating collation values
 120:    * @param text The character iterator to iterate over.
 121:    */
 122:   CollationElementIterator(RuleBasedCollator collator, CharacterIterator text)
 123:   {
 124:     this.collator = collator;
 125:     
 126:     setText (text);    
 127:   }
 128: 
 129:   RuleBasedCollator.CollationElement nextBlock()
 130:   {
 131:     if (index >= text_decomposition.length)
 132:       return null;
 133:     
 134:     RuleBasedCollator.CollationElement e = text_decomposition[index];
 135:     
 136:     textIndex = text_indexes[index+1];
 137: 
 138:     index++;
 139: 
 140:     return e;
 141:   }
 142: 
 143:   RuleBasedCollator.CollationElement previousBlock()
 144:   {
 145:     if (index == 0)
 146:       return null;
 147:     
 148:     index--;
 149:     RuleBasedCollator.CollationElement e = text_decomposition[index];
 150: 
 151:     textIndex = text_indexes[index+1];
 152:     
 153:     return e;
 154:   }
 155: 
 156:   /**
 157:    * This method returns the collation ordering value of the next character sequence
 158:    * in the string (it may be an extended character following collation rules).
 159:    * This method will return <code>NULLORDER</code> if the
 160:    * end of the string was reached.
 161:    *
 162:    * @return The collation ordering value.
 163:    */
 164:   public int next()
 165:   {
 166:     RuleBasedCollator.CollationElement e = nextBlock();
 167: 
 168:     if (e == null)
 169:       return NULLORDER;
 170:     
 171:     return e.getValue();
 172:   }
 173: 
 174:   /**
 175:    * This method returns the collation ordering value of the previous character
 176:    * in the string.  This method will return <code>NULLORDER</code> if the
 177:    * beginning of the string was reached.
 178:    *
 179:    * @return The collation ordering value.
 180:    */
 181:   public int previous()
 182:   {
 183:     RuleBasedCollator.CollationElement e = previousBlock();
 184: 
 185:     if (e == null)
 186:       return NULLORDER;
 187:     
 188:     return e.getValue();
 189:   }
 190: 
 191:   /**
 192:    * This method returns the primary order value for the given collation
 193:    * value.
 194:    *
 195:    * @param order The collation value returned from <code>next()</code> or 
 196:    *              <code>previous()</code>.
 197:    *
 198:    * @return The primary order value of the specified collation value.  This is
 199:    *         the high 16 bits.
 200:    */
 201:   public static int primaryOrder(int order)
 202:   {
 203:     // From the JDK 1.2 spec.
 204:     return order >>> 16;
 205:   }
 206: 
 207:   /**
 208:    * This method resets the internal position pointer to read from the
 209:    * beginning of the <code>String</code> again.
 210:    */
 211:   public void reset()
 212:   {
 213:     index = 0;
 214:     textIndex = 0;
 215:   }
 216: 
 217:   /**
 218:    * This method returns the secondary order value for the given collation
 219:    * value.
 220:    *
 221:    * @param order The collation value returned from <code>next()</code> or 
 222:    *              <code>previous()</code>.
 223:    *
 224:    * @return The secondary order value of the specified collation value.  This 
 225:    *         is the bits 8-15.
 226:    */
 227:   public static short secondaryOrder(int order)
 228:   {
 229:     // From the JDK 1.2 spec.
 230:     return (short) ((order >>> 8) & 255);
 231:   }
 232: 
 233:   /**
 234:    * This method returns the tertiary order value for the given collation
 235:    * value.
 236:    *
 237:    * @param order The collation value returned from <code>next()</code> or 
 238:    *              <code>previous()</code>.
 239:    *
 240:    * @return The tertiary order value of the specified collation value.  This 
 241:    *         is the low eight bits.
 242:    */
 243:   public static short tertiaryOrder(int order)
 244:   {
 245:     // From the JDK 1.2 spec.
 246:     return (short) (order & 255);
 247:   }
 248: 
 249:   /**
 250:    * This method sets the <code>String</code> that it is iterating over
 251:    * to the specified <code>String</code>.
 252:    *
 253:    * @param text The new <code>String</code> to iterate over.
 254:    *
 255:    * @since 1.2
 256:    */
 257:   public void setText(String text)
 258:   {
 259:     int idx = 0;
 260:     int idx_idx = 0;
 261:     int alreadyExpanded = 0;
 262:     int idxToMove = 0;
 263: 
 264:     this.text = new StringCharacterIterator(text);
 265:     this.index = 0;
 266: 
 267:     String work_text = text.intern();
 268: 
 269:     ArrayList a_element = new ArrayList();
 270:     ArrayList a_idx = new ArrayList();
 271: 
 272:     // Build element collection ordered as they come in "text".
 273:     while (idx < work_text.length())
 274:       {
 275:     String key, key_old;
 276: 
 277:     Object object = null;
 278:     int p = 1;
 279:     
 280:     // IMPROVE: use a TreeMap with a prefix-ordering rule.
 281:     key_old = key = null;
 282:     do
 283:       {
 284:         if (object != null)
 285:           key_old = key;
 286:         key = work_text.substring (idx, idx+p);
 287:         object = collator.prefix_tree.get (key);
 288:         if (object != null && idx < alreadyExpanded)
 289:           {
 290:         RuleBasedCollator.CollationElement prefix = (RuleBasedCollator.CollationElement)object;
 291:         if (prefix.expansion != null && 
 292:             prefix.expansion.startsWith(work_text.substring(0, idx)))
 293:         {
 294:           object = null;
 295:           key = key_old;
 296:         }
 297:           }
 298:         p++;
 299:       }
 300:     while (idx+p <= work_text.length());
 301:     
 302:     if (object == null)
 303:       key = key_old;
 304:     
 305:     RuleBasedCollator.CollationElement prefix =
 306:       (RuleBasedCollator.CollationElement) collator.prefix_tree.get (key);
 307: 
 308:     /*
 309:      * First case: There is no such sequence in the database.
 310:      * We will have to build one from the context.
 311:      */
 312:     if (prefix == null)
 313:       {
 314:         /*
 315:          * We are dealing with sequences in an expansion. They
 316:          * are treated as accented characters (tertiary order).
 317:          */
 318:         if (alreadyExpanded > 0)
 319:           {
 320:         RuleBasedCollator.CollationElement e =
 321:           collator.getDefaultAccentedElement (work_text.charAt (idx));
 322:         
 323:         a_element.add (e);
 324:         a_idx.add (new Integer(idx_idx));
 325:         idx++;
 326:         alreadyExpanded--;
 327:         if (alreadyExpanded == 0)
 328:           {
 329:             /* There is not any characters left in the expansion set.
 330:              * We can increase the pointer in the source string.
 331:              */
 332:             idx_idx += idxToMove;
 333:             idxToMove = 0; 
 334:           }
 335:         else
 336:           idx_idx++;
 337:           }
 338:         else
 339:           {
 340:         /* This is a normal character. */
 341:         RuleBasedCollator.CollationElement e =
 342:           collator.getDefaultElement (work_text.charAt (idx));
 343:         Integer i_ref = new Integer(idx_idx);
 344: 
 345:         /* Don't forget to mark it as a special sequence so the
 346:          * string can be ordered.
 347:          */
 348:         a_element.add (RuleBasedCollator.SPECIAL_UNKNOWN_SEQ);
 349:         a_idx.add (i_ref);
 350:         a_element.add (e);
 351:         a_idx.add (i_ref);
 352:         idx_idx++;
 353:         idx++;
 354:           }
 355:         continue;
 356:       }
 357:  
 358:     /*
 359:      * Second case: Here we have found a matching sequence.
 360:      * Here we have an expansion string prepend it to the "work text" and
 361:      * add the corresponding sorting element. We must also mark 
 362:      */
 363:     if (prefix.expansion != null)
 364:       {
 365:         work_text = prefix.expansion
 366:           + work_text.substring (idx+prefix.key.length());
 367:         idx = 0;
 368:         a_element.add (prefix);
 369:         a_idx.add (new Integer(idx_idx));
 370:         if (alreadyExpanded == 0)
 371:           idxToMove = prefix.key.length();
 372:         alreadyExpanded += prefix.expansion.length()-prefix.key.length();
 373:       }
 374:     else
 375:       {
 376:         /* Third case: the simplest. We have got the prefix and it
 377:          * has not to be expanded.
 378:          */
 379:         a_element.add (prefix);
 380:         a_idx.add (new Integer(idx_idx));
 381:         idx += prefix.key.length();
 382:         /* If the sequence is in an expansion, we must decrease the
 383:          * counter.
 384:          */
 385:         if (alreadyExpanded > 0)
 386:           {
 387:         alreadyExpanded -= prefix.key.length();
 388:         if (alreadyExpanded == 0)
 389:           {
 390:             idx_idx += idxToMove;
 391:             idxToMove = 0;
 392:           }
 393:           }
 394:         else
 395:           idx_idx += prefix.key.length();
 396:       }
 397:       }
 398:     
 399:     text_decomposition = (RuleBasedCollator.CollationElement[])
 400:        a_element.toArray(new RuleBasedCollator.CollationElement[a_element.size()]);
 401:     text_indexes = new int[a_idx.size()+1];
 402:     for (int i = 0; i < a_idx.size(); i++) 
 403:       {
 404:     text_indexes[i] = ((Integer)a_idx.get(i)).intValue();
 405:       }
 406:     text_indexes[a_idx.size()] = text.length();
 407:   }
 408: 
 409:   /**
 410:    * This method sets the <code>String</code> that it is iterating over
 411:    * to the <code>String</code> represented by the specified
 412:    * <code>CharacterIterator</code>.
 413:    *
 414:    * @param source The <code>CharacterIterator</code> containing the new
 415:    * <code>String</code> to iterate over.
 416:    */
 417:   public void setText(CharacterIterator source)
 418:   {
 419:     StringBuffer expand = new StringBuffer();
 420: 
 421:     // For now assume we read from the beginning of the string.
 422:     for (char c = source.first();
 423:      c != CharacterIterator.DONE;
 424:      c = source.next())
 425:       expand.append(c);
 426: 
 427:     setText(expand.toString());
 428:   }
 429: 
 430:   /**
 431:    * This method returns the current offset into the <code>String</code>
 432:    * that is being iterated over.
 433:    *
 434:    * @return The iteration index position.
 435:    *
 436:    * @since 1.2
 437:    */
 438:   public int getOffset()
 439:   {
 440:     return textIndex;
 441:   }
 442: 
 443:   /**
 444:    * This method sets the iteration index position into the current
 445:    * <code>String</code> to the specified value.  This value must not
 446:    * be negative and must not be greater than the last index position
 447:    * in the <code>String</code>.
 448:    *
 449:    * @param offset The new iteration index position.
 450:    *
 451:    * @exception IllegalArgumentException If the new offset is not valid.
 452:    */
 453:   public void setOffset(int offset)
 454:   {
 455:     if (offset < 0)
 456:       throw new IllegalArgumentException("Negative offset: " + offset);
 457: 
 458:     if (offset > (text.getEndIndex() - 1))
 459:       throw new IllegalArgumentException("Offset too large: " + offset);
 460:     
 461:     for (index = 0; index < text_decomposition.length; index++)
 462:       {    
 463:     if (offset <= text_indexes[index])
 464:       break;
 465:       }
 466:     /*
 467:      * As text_indexes[0] == 0, we should not have to take care whether index is
 468:      * greater than 0. It is always.
 469:      */
 470:     if (text_indexes[index] == offset)
 471:       textIndex = offset;
 472:     else
 473:       textIndex = text_indexes[index-1];
 474:   }
 475: 
 476:   /**
 477:    * This method returns the maximum length of any expansion sequence that
 478:    * ends with the specified collation order value.  (Whatever that means).
 479:    *
 480:    * @param value The collation order value
 481:    *
 482:    * @return The maximum length of an expansion sequence.
 483:    */
 484:   public int getMaxExpansion(int value)
 485:   {
 486:     return 1;
 487:   }
 488: }