GNU Classpath (0.97.2) | |
Frames | No Frames |
1: /* CollationElementIterator.java -- Walks through collation elements 2: Copyright (C) 1998, 1999, 2001, 2002, 2003, 2004 Free Software Foundation 3: 4: This file is part of GNU Classpath. 5: 6: GNU Classpath is free software; you can redistribute it and/or modify 7: it under the terms of the GNU General Public License as published by 8: the Free Software Foundation; either version 2, or (at your option) 9: any later version. 10: 11: GNU Classpath is distributed in the hope that it will be useful, but 12: WITHOUT ANY WARRANTY; without even the implied warranty of 13: MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 14: General Public License for more details. 15: 16: You should have received a copy of the GNU General Public License 17: along with GNU Classpath; see the file COPYING. If not, write to the 18: Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 19: 02110-1301 USA. 20: 21: Linking this library statically or dynamically with other modules is 22: making a combined work based on this library. Thus, the terms and 23: conditions of the GNU General Public License cover the whole 24: combination. 25: 26: As a special exception, the copyright holders of this library give you 27: permission to link this library with independent modules to produce an 28: executable, regardless of the license terms of these independent 29: modules, and to copy and distribute the resulting executable under 30: terms of your choice, provided that you also meet, for each linked 31: independent module, the terms and conditions of the license of that 32: module. An independent module is a module which is not derived from 33: or based on this library. If you modify this library, you may extend 34: this exception to your version of the library, but you are not 35: obligated to do so. If you do not wish to do so, delete this 36: exception statement from your version. */ 37: 38: 39: package java.text; 40: 41: import java.util.ArrayList; 42: 43: /* Written using "Java Class Libraries", 2nd edition, plus online 44: * API docs for JDK 1.2 from http://www.javasoft.com. 45: * Status: Believed complete and correct to JDK 1.1. 46: */ 47: 48: /** 49: * This class walks through the character collation elements of a 50: * <code>String</code> as defined by the collation rules in an instance of 51: * <code>RuleBasedCollator</code>. There is no public constructor for 52: * this class. An instance is created by calling the 53: * <code>getCollationElementIterator</code> method on 54: * <code>RuleBasedCollator</code>. 55: * 56: * @author Aaron M. Renn (arenn@urbanophile.com) 57: * @author Tom Tromey (tromey@cygnus.com) 58: * @author Guilhem Lavaux (guilhem.lavaux@free.fr) 59: */ 60: public final class CollationElementIterator 61: { 62: /** 63: * This is a constant value that is returned to indicate that the end of 64: * the string was encountered. 65: */ 66: public static final int NULLORDER = -1; 67: 68: /** 69: * This is the RuleBasedCollator this object was created from. 70: */ 71: RuleBasedCollator collator; 72: 73: /** 74: * This is the String that is being iterated over. 75: */ 76: CharacterIterator text; 77: 78: /** 79: * This is the index into the collation decomposition where we are currently scanning. 80: */ 81: int index; 82: 83: /** 84: * This is the index into the String where we are currently scanning. 85: */ 86: int textIndex; 87: 88: /** 89: * Array containing the collation decomposition of the 90: * text given to the constructor. 91: */ 92: private RuleBasedCollator.CollationElement[] text_decomposition; 93: 94: /** 95: * Array containing the index of the specified block. 96: */ 97: private int[] text_indexes; 98: 99: /** 100: * This method initializes a new instance of <code>CollationElementIterator</code> 101: * to iterate over the specified <code>String</code> using the rules in the 102: * specified <code>RuleBasedCollator</code>. 103: * 104: * @param collator The <code>RuleBasedCollation</code> used for calculating collation values 105: * @param text The <code>String</code> to iterate over. 106: */ 107: CollationElementIterator(RuleBasedCollator collator, String text) 108: { 109: this.collator = collator; 110: 111: setText (text); 112: } 113: 114: /** 115: * This method initializes a new instance of <code>CollationElementIterator</code> 116: * to iterate over the specified <code>String</code> using the rules in the 117: * specified <code>RuleBasedCollator</code>. 118: * 119: * @param collator The <code>RuleBasedCollation</code> used for calculating collation values 120: * @param text The character iterator to iterate over. 121: */ 122: CollationElementIterator(RuleBasedCollator collator, CharacterIterator text) 123: { 124: this.collator = collator; 125: 126: setText (text); 127: } 128: 129: RuleBasedCollator.CollationElement nextBlock() 130: { 131: if (index >= text_decomposition.length) 132: return null; 133: 134: RuleBasedCollator.CollationElement e = text_decomposition[index]; 135: 136: textIndex = text_indexes[index+1]; 137: 138: index++; 139: 140: return e; 141: } 142: 143: RuleBasedCollator.CollationElement previousBlock() 144: { 145: if (index == 0) 146: return null; 147: 148: index--; 149: RuleBasedCollator.CollationElement e = text_decomposition[index]; 150: 151: textIndex = text_indexes[index+1]; 152: 153: return e; 154: } 155: 156: /** 157: * This method returns the collation ordering value of the next character sequence 158: * in the string (it may be an extended character following collation rules). 159: * This method will return <code>NULLORDER</code> if the 160: * end of the string was reached. 161: * 162: * @return The collation ordering value. 163: */ 164: public int next() 165: { 166: RuleBasedCollator.CollationElement e = nextBlock(); 167: 168: if (e == null) 169: return NULLORDER; 170: 171: return e.getValue(); 172: } 173: 174: /** 175: * This method returns the collation ordering value of the previous character 176: * in the string. This method will return <code>NULLORDER</code> if the 177: * beginning of the string was reached. 178: * 179: * @return The collation ordering value. 180: */ 181: public int previous() 182: { 183: RuleBasedCollator.CollationElement e = previousBlock(); 184: 185: if (e == null) 186: return NULLORDER; 187: 188: return e.getValue(); 189: } 190: 191: /** 192: * This method returns the primary order value for the given collation 193: * value. 194: * 195: * @param order The collation value returned from <code>next()</code> or 196: * <code>previous()</code>. 197: * 198: * @return The primary order value of the specified collation value. This is 199: * the high 16 bits. 200: */ 201: public static int primaryOrder(int order) 202: { 203: // From the JDK 1.2 spec. 204: return order >>> 16; 205: } 206: 207: /** 208: * This method resets the internal position pointer to read from the 209: * beginning of the <code>String</code> again. 210: */ 211: public void reset() 212: { 213: index = 0; 214: textIndex = 0; 215: } 216: 217: /** 218: * This method returns the secondary order value for the given collation 219: * value. 220: * 221: * @param order The collation value returned from <code>next()</code> or 222: * <code>previous()</code>. 223: * 224: * @return The secondary order value of the specified collation value. This 225: * is the bits 8-15. 226: */ 227: public static short secondaryOrder(int order) 228: { 229: // From the JDK 1.2 spec. 230: return (short) ((order >>> 8) & 255); 231: } 232: 233: /** 234: * This method returns the tertiary order value for the given collation 235: * value. 236: * 237: * @param order The collation value returned from <code>next()</code> or 238: * <code>previous()</code>. 239: * 240: * @return The tertiary order value of the specified collation value. This 241: * is the low eight bits. 242: */ 243: public static short tertiaryOrder(int order) 244: { 245: // From the JDK 1.2 spec. 246: return (short) (order & 255); 247: } 248: 249: /** 250: * This method sets the <code>String</code> that it is iterating over 251: * to the specified <code>String</code>. 252: * 253: * @param text The new <code>String</code> to iterate over. 254: * 255: * @since 1.2 256: */ 257: public void setText(String text) 258: { 259: int idx = 0; 260: int idx_idx = 0; 261: int alreadyExpanded = 0; 262: int idxToMove = 0; 263: 264: this.text = new StringCharacterIterator(text); 265: this.index = 0; 266: 267: String work_text = text.intern(); 268: 269: ArrayList a_element = new ArrayList(); 270: ArrayList a_idx = new ArrayList(); 271: 272: // Build element collection ordered as they come in "text". 273: while (idx < work_text.length()) 274: { 275: String key, key_old; 276: 277: Object object = null; 278: int p = 1; 279: 280: // IMPROVE: use a TreeMap with a prefix-ordering rule. 281: key_old = key = null; 282: do 283: { 284: if (object != null) 285: key_old = key; 286: key = work_text.substring (idx, idx+p); 287: object = collator.prefix_tree.get (key); 288: if (object != null && idx < alreadyExpanded) 289: { 290: RuleBasedCollator.CollationElement prefix = (RuleBasedCollator.CollationElement)object; 291: if (prefix.expansion != null && 292: prefix.expansion.startsWith(work_text.substring(0, idx))) 293: { 294: object = null; 295: key = key_old; 296: } 297: } 298: p++; 299: } 300: while (idx+p <= work_text.length()); 301: 302: if (object == null) 303: key = key_old; 304: 305: RuleBasedCollator.CollationElement prefix = 306: (RuleBasedCollator.CollationElement) collator.prefix_tree.get (key); 307: 308: /* 309: * First case: There is no such sequence in the database. 310: * We will have to build one from the context. 311: */ 312: if (prefix == null) 313: { 314: /* 315: * We are dealing with sequences in an expansion. They 316: * are treated as accented characters (tertiary order). 317: */ 318: if (alreadyExpanded > 0) 319: { 320: RuleBasedCollator.CollationElement e = 321: collator.getDefaultAccentedElement (work_text.charAt (idx)); 322: 323: a_element.add (e); 324: a_idx.add (new Integer(idx_idx)); 325: idx++; 326: alreadyExpanded--; 327: if (alreadyExpanded == 0) 328: { 329: /* There is not any characters left in the expansion set. 330: * We can increase the pointer in the source string. 331: */ 332: idx_idx += idxToMove; 333: idxToMove = 0; 334: } 335: else 336: idx_idx++; 337: } 338: else 339: { 340: /* This is a normal character. */ 341: RuleBasedCollator.CollationElement e = 342: collator.getDefaultElement (work_text.charAt (idx)); 343: Integer i_ref = new Integer(idx_idx); 344: 345: /* Don't forget to mark it as a special sequence so the 346: * string can be ordered. 347: */ 348: a_element.add (RuleBasedCollator.SPECIAL_UNKNOWN_SEQ); 349: a_idx.add (i_ref); 350: a_element.add (e); 351: a_idx.add (i_ref); 352: idx_idx++; 353: idx++; 354: } 355: continue; 356: } 357: 358: /* 359: * Second case: Here we have found a matching sequence. 360: * Here we have an expansion string prepend it to the "work text" and 361: * add the corresponding sorting element. We must also mark 362: */ 363: if (prefix.expansion != null) 364: { 365: work_text = prefix.expansion 366: + work_text.substring (idx+prefix.key.length()); 367: idx = 0; 368: a_element.add (prefix); 369: a_idx.add (new Integer(idx_idx)); 370: if (alreadyExpanded == 0) 371: idxToMove = prefix.key.length(); 372: alreadyExpanded += prefix.expansion.length()-prefix.key.length(); 373: } 374: else 375: { 376: /* Third case: the simplest. We have got the prefix and it 377: * has not to be expanded. 378: */ 379: a_element.add (prefix); 380: a_idx.add (new Integer(idx_idx)); 381: idx += prefix.key.length(); 382: /* If the sequence is in an expansion, we must decrease the 383: * counter. 384: */ 385: if (alreadyExpanded > 0) 386: { 387: alreadyExpanded -= prefix.key.length(); 388: if (alreadyExpanded == 0) 389: { 390: idx_idx += idxToMove; 391: idxToMove = 0; 392: } 393: } 394: else 395: idx_idx += prefix.key.length(); 396: } 397: } 398: 399: text_decomposition = (RuleBasedCollator.CollationElement[]) 400: a_element.toArray(new RuleBasedCollator.CollationElement[a_element.size()]); 401: text_indexes = new int[a_idx.size()+1]; 402: for (int i = 0; i < a_idx.size(); i++) 403: { 404: text_indexes[i] = ((Integer)a_idx.get(i)).intValue(); 405: } 406: text_indexes[a_idx.size()] = text.length(); 407: } 408: 409: /** 410: * This method sets the <code>String</code> that it is iterating over 411: * to the <code>String</code> represented by the specified 412: * <code>CharacterIterator</code>. 413: * 414: * @param source The <code>CharacterIterator</code> containing the new 415: * <code>String</code> to iterate over. 416: */ 417: public void setText(CharacterIterator source) 418: { 419: StringBuffer expand = new StringBuffer(); 420: 421: // For now assume we read from the beginning of the string. 422: for (char c = source.first(); 423: c != CharacterIterator.DONE; 424: c = source.next()) 425: expand.append(c); 426: 427: setText(expand.toString()); 428: } 429: 430: /** 431: * This method returns the current offset into the <code>String</code> 432: * that is being iterated over. 433: * 434: * @return The iteration index position. 435: * 436: * @since 1.2 437: */ 438: public int getOffset() 439: { 440: return textIndex; 441: } 442: 443: /** 444: * This method sets the iteration index position into the current 445: * <code>String</code> to the specified value. This value must not 446: * be negative and must not be greater than the last index position 447: * in the <code>String</code>. 448: * 449: * @param offset The new iteration index position. 450: * 451: * @exception IllegalArgumentException If the new offset is not valid. 452: */ 453: public void setOffset(int offset) 454: { 455: if (offset < 0) 456: throw new IllegalArgumentException("Negative offset: " + offset); 457: 458: if (offset > (text.getEndIndex() - 1)) 459: throw new IllegalArgumentException("Offset too large: " + offset); 460: 461: for (index = 0; index < text_decomposition.length; index++) 462: { 463: if (offset <= text_indexes[index]) 464: break; 465: } 466: /* 467: * As text_indexes[0] == 0, we should not have to take care whether index is 468: * greater than 0. It is always. 469: */ 470: if (text_indexes[index] == offset) 471: textIndex = offset; 472: else 473: textIndex = text_indexes[index-1]; 474: } 475: 476: /** 477: * This method returns the maximum length of any expansion sequence that 478: * ends with the specified collation order value. (Whatever that means). 479: * 480: * @param value The collation order value 481: * 482: * @return The maximum length of an expansion sequence. 483: */ 484: public int getMaxExpansion(int value) 485: { 486: return 1; 487: } 488: }
GNU Classpath (0.97.2) |