GNU Classpath (0.98) | |
Frames | No Frames |
1: /* CollationElementIterator.java -- Walks through collation elements 2: Copyright (C) 1998, 1999, 2001, 2002, 2003, 2004 Free Software Foundation 3: 4: This file is part of GNU Classpath. 5: 6: GNU Classpath is free software; you can redistribute it and/or modify 7: it under the terms of the GNU General Public License as published by 8: the Free Software Foundation; either version 2, or (at your option) 9: any later version. 10: 11: GNU Classpath is distributed in the hope that it will be useful, but 12: WITHOUT ANY WARRANTY; without even the implied warranty of 13: MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 14: General Public License for more details. 15: 16: You should have received a copy of the GNU General Public License 17: along with GNU Classpath; see the file COPYING. If not, write to the 18: Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 19: 02110-1301 USA. 20: 21: Linking this library statically or dynamically with other modules is 22: making a combined work based on this library. Thus, the terms and 23: conditions of the GNU General Public License cover the whole 24: combination. 25: 26: As a special exception, the copyright holders of this library give you 27: permission to link this library with independent modules to produce an 28: executable, regardless of the license terms of these independent 29: modules, and to copy and distribute the resulting executable under 30: terms of your choice, provided that you also meet, for each linked 31: independent module, the terms and conditions of the license of that 32: module. An independent module is a module which is not derived from 33: or based on this library. If you modify this library, you may extend 34: this exception to your version of the library, but you are not 35: obligated to do so. If you do not wish to do so, delete this 36: exception statement from your version. */ 37: 38: 39: package java.text; 40: 41: import gnu.java.lang.CPStringBuilder; 42: 43: import java.util.ArrayList; 44: 45: /* Written using "Java Class Libraries", 2nd edition, plus online 46: * API docs for JDK 1.2 from http://www.javasoft.com. 47: * Status: Believed complete and correct to JDK 1.1. 48: */ 49: 50: /** 51: * This class walks through the character collation elements of a 52: * <code>String</code> as defined by the collation rules in an instance of 53: * <code>RuleBasedCollator</code>. There is no public constructor for 54: * this class. An instance is created by calling the 55: * <code>getCollationElementIterator</code> method on 56: * <code>RuleBasedCollator</code>. 57: * 58: * @author Aaron M. Renn (arenn@urbanophile.com) 59: * @author Tom Tromey (tromey@cygnus.com) 60: * @author Guilhem Lavaux (guilhem.lavaux@free.fr) 61: */ 62: public final class CollationElementIterator 63: { 64: /** 65: * This is a constant value that is returned to indicate that the end of 66: * the string was encountered. 67: */ 68: public static final int NULLORDER = -1; 69: 70: /** 71: * This is the RuleBasedCollator this object was created from. 72: */ 73: RuleBasedCollator collator; 74: 75: /** 76: * This is the String that is being iterated over. 77: */ 78: CharacterIterator text; 79: 80: /** 81: * This is the index into the collation decomposition where we are currently scanning. 82: */ 83: int index; 84: 85: /** 86: * This is the index into the String where we are currently scanning. 87: */ 88: int textIndex; 89: 90: /** 91: * Array containing the collation decomposition of the 92: * text given to the constructor. 93: */ 94: private RuleBasedCollator.CollationElement[] text_decomposition; 95: 96: /** 97: * Array containing the index of the specified block. 98: */ 99: private int[] text_indexes; 100: 101: /** 102: * This method initializes a new instance of <code>CollationElementIterator</code> 103: * to iterate over the specified <code>String</code> using the rules in the 104: * specified <code>RuleBasedCollator</code>. 105: * 106: * @param collator The <code>RuleBasedCollation</code> used for calculating collation values 107: * @param text The <code>String</code> to iterate over. 108: */ 109: CollationElementIterator(RuleBasedCollator collator, String text) 110: { 111: this.collator = collator; 112: 113: setText (text); 114: } 115: 116: /** 117: * This method initializes a new instance of <code>CollationElementIterator</code> 118: * to iterate over the specified <code>String</code> using the rules in the 119: * specified <code>RuleBasedCollator</code>. 120: * 121: * @param collator The <code>RuleBasedCollation</code> used for calculating collation values 122: * @param text The character iterator to iterate over. 123: */ 124: CollationElementIterator(RuleBasedCollator collator, CharacterIterator text) 125: { 126: this.collator = collator; 127: 128: setText (text); 129: } 130: 131: RuleBasedCollator.CollationElement nextBlock() 132: { 133: if (index >= text_decomposition.length) 134: return null; 135: 136: RuleBasedCollator.CollationElement e = text_decomposition[index]; 137: 138: textIndex = text_indexes[index+1]; 139: 140: index++; 141: 142: return e; 143: } 144: 145: RuleBasedCollator.CollationElement previousBlock() 146: { 147: if (index == 0) 148: return null; 149: 150: index--; 151: RuleBasedCollator.CollationElement e = text_decomposition[index]; 152: 153: textIndex = text_indexes[index+1]; 154: 155: return e; 156: } 157: 158: /** 159: * This method returns the collation ordering value of the next character sequence 160: * in the string (it may be an extended character following collation rules). 161: * This method will return <code>NULLORDER</code> if the 162: * end of the string was reached. 163: * 164: * @return The collation ordering value. 165: */ 166: public int next() 167: { 168: RuleBasedCollator.CollationElement e = nextBlock(); 169: 170: if (e == null) 171: return NULLORDER; 172: 173: return e.getValue(); 174: } 175: 176: /** 177: * This method returns the collation ordering value of the previous character 178: * in the string. This method will return <code>NULLORDER</code> if the 179: * beginning of the string was reached. 180: * 181: * @return The collation ordering value. 182: */ 183: public int previous() 184: { 185: RuleBasedCollator.CollationElement e = previousBlock(); 186: 187: if (e == null) 188: return NULLORDER; 189: 190: return e.getValue(); 191: } 192: 193: /** 194: * This method returns the primary order value for the given collation 195: * value. 196: * 197: * @param order The collation value returned from <code>next()</code> or 198: * <code>previous()</code>. 199: * 200: * @return The primary order value of the specified collation value. This is 201: * the high 16 bits. 202: */ 203: public static int primaryOrder(int order) 204: { 205: // From the JDK 1.2 spec. 206: return order >>> 16; 207: } 208: 209: /** 210: * This method resets the internal position pointer to read from the 211: * beginning of the <code>String</code> again. 212: */ 213: public void reset() 214: { 215: index = 0; 216: textIndex = 0; 217: } 218: 219: /** 220: * This method returns the secondary order value for the given collation 221: * value. 222: * 223: * @param order The collation value returned from <code>next()</code> or 224: * <code>previous()</code>. 225: * 226: * @return The secondary order value of the specified collation value. This 227: * is the bits 8-15. 228: */ 229: public static short secondaryOrder(int order) 230: { 231: // From the JDK 1.2 spec. 232: return (short) ((order >>> 8) & 255); 233: } 234: 235: /** 236: * This method returns the tertiary order value for the given collation 237: * value. 238: * 239: * @param order The collation value returned from <code>next()</code> or 240: * <code>previous()</code>. 241: * 242: * @return The tertiary order value of the specified collation value. This 243: * is the low eight bits. 244: */ 245: public static short tertiaryOrder(int order) 246: { 247: // From the JDK 1.2 spec. 248: return (short) (order & 255); 249: } 250: 251: /** 252: * This method sets the <code>String</code> that it is iterating over 253: * to the specified <code>String</code>. 254: * 255: * @param text The new <code>String</code> to iterate over. 256: * 257: * @since 1.2 258: */ 259: public void setText(String text) 260: { 261: int idx = 0; 262: int idx_idx = 0; 263: int alreadyExpanded = 0; 264: int idxToMove = 0; 265: 266: this.text = new StringCharacterIterator(text); 267: this.index = 0; 268: 269: String work_text = text.intern(); 270: 271: ArrayList a_element = new ArrayList(); 272: ArrayList a_idx = new ArrayList(); 273: 274: // Build element collection ordered as they come in "text". 275: while (idx < work_text.length()) 276: { 277: String key, key_old; 278: 279: Object object = null; 280: int p = 1; 281: 282: // IMPROVE: use a TreeMap with a prefix-ordering rule. 283: key_old = key = null; 284: do 285: { 286: if (object != null) 287: key_old = key; 288: key = work_text.substring (idx, idx+p); 289: object = collator.prefix_tree.get (key); 290: if (object != null && idx < alreadyExpanded) 291: { 292: RuleBasedCollator.CollationElement prefix = (RuleBasedCollator.CollationElement)object; 293: if (prefix.expansion != null && 294: prefix.expansion.startsWith(work_text.substring(0, idx))) 295: { 296: object = null; 297: key = key_old; 298: } 299: } 300: p++; 301: } 302: while (idx+p <= work_text.length()); 303: 304: if (object == null) 305: key = key_old; 306: 307: RuleBasedCollator.CollationElement prefix = 308: (RuleBasedCollator.CollationElement) collator.prefix_tree.get (key); 309: 310: /* 311: * First case: There is no such sequence in the database. 312: * We will have to build one from the context. 313: */ 314: if (prefix == null) 315: { 316: /* 317: * We are dealing with sequences in an expansion. They 318: * are treated as accented characters (tertiary order). 319: */ 320: if (alreadyExpanded > 0) 321: { 322: RuleBasedCollator.CollationElement e = 323: collator.getDefaultAccentedElement (work_text.charAt (idx)); 324: 325: a_element.add (e); 326: a_idx.add (new Integer(idx_idx)); 327: idx++; 328: alreadyExpanded--; 329: if (alreadyExpanded == 0) 330: { 331: /* There is not any characters left in the expansion set. 332: * We can increase the pointer in the source string. 333: */ 334: idx_idx += idxToMove; 335: idxToMove = 0; 336: } 337: else 338: idx_idx++; 339: } 340: else 341: { 342: /* This is a normal character. */ 343: RuleBasedCollator.CollationElement e = 344: collator.getDefaultElement (work_text.charAt (idx)); 345: Integer i_ref = new Integer(idx_idx); 346: 347: /* Don't forget to mark it as a special sequence so the 348: * string can be ordered. 349: */ 350: a_element.add (RuleBasedCollator.SPECIAL_UNKNOWN_SEQ); 351: a_idx.add (i_ref); 352: a_element.add (e); 353: a_idx.add (i_ref); 354: idx_idx++; 355: idx++; 356: } 357: continue; 358: } 359: 360: /* 361: * Second case: Here we have found a matching sequence. 362: * Here we have an expansion string prepend it to the "work text" and 363: * add the corresponding sorting element. We must also mark 364: */ 365: if (prefix.expansion != null) 366: { 367: work_text = prefix.expansion 368: + work_text.substring (idx+prefix.key.length()); 369: idx = 0; 370: a_element.add (prefix); 371: a_idx.add (new Integer(idx_idx)); 372: if (alreadyExpanded == 0) 373: idxToMove = prefix.key.length(); 374: alreadyExpanded += prefix.expansion.length()-prefix.key.length(); 375: } 376: else 377: { 378: /* Third case: the simplest. We have got the prefix and it 379: * has not to be expanded. 380: */ 381: a_element.add (prefix); 382: a_idx.add (new Integer(idx_idx)); 383: idx += prefix.key.length(); 384: /* If the sequence is in an expansion, we must decrease the 385: * counter. 386: */ 387: if (alreadyExpanded > 0) 388: { 389: alreadyExpanded -= prefix.key.length(); 390: if (alreadyExpanded == 0) 391: { 392: idx_idx += idxToMove; 393: idxToMove = 0; 394: } 395: } 396: else 397: idx_idx += prefix.key.length(); 398: } 399: } 400: 401: text_decomposition = (RuleBasedCollator.CollationElement[]) 402: a_element.toArray(new RuleBasedCollator.CollationElement[a_element.size()]); 403: text_indexes = new int[a_idx.size()+1]; 404: for (int i = 0; i < a_idx.size(); i++) 405: { 406: text_indexes[i] = ((Integer)a_idx.get(i)).intValue(); 407: } 408: text_indexes[a_idx.size()] = text.length(); 409: } 410: 411: /** 412: * This method sets the <code>String</code> that it is iterating over 413: * to the <code>String</code> represented by the specified 414: * <code>CharacterIterator</code>. 415: * 416: * @param source The <code>CharacterIterator</code> containing the new 417: * <code>String</code> to iterate over. 418: */ 419: public void setText(CharacterIterator source) 420: { 421: CPStringBuilder expand = new CPStringBuilder(); 422: 423: // For now assume we read from the beginning of the string. 424: for (char c = source.first(); 425: c != CharacterIterator.DONE; 426: c = source.next()) 427: expand.append(c); 428: 429: setText(expand.toString()); 430: } 431: 432: /** 433: * This method returns the current offset into the <code>String</code> 434: * that is being iterated over. 435: * 436: * @return The iteration index position. 437: * 438: * @since 1.2 439: */ 440: public int getOffset() 441: { 442: return textIndex; 443: } 444: 445: /** 446: * This method sets the iteration index position into the current 447: * <code>String</code> to the specified value. This value must not 448: * be negative and must not be greater than the last index position 449: * in the <code>String</code>. 450: * 451: * @param offset The new iteration index position. 452: * 453: * @exception IllegalArgumentException If the new offset is not valid. 454: */ 455: public void setOffset(int offset) 456: { 457: if (offset < 0) 458: throw new IllegalArgumentException("Negative offset: " + offset); 459: 460: if (offset > (text.getEndIndex() - 1)) 461: throw new IllegalArgumentException("Offset too large: " + offset); 462: 463: for (index = 0; index < text_decomposition.length; index++) 464: { 465: if (offset <= text_indexes[index]) 466: break; 467: } 468: /* 469: * As text_indexes[0] == 0, we should not have to take care whether index is 470: * greater than 0. It is always. 471: */ 472: if (text_indexes[index] == offset) 473: textIndex = offset; 474: else 475: textIndex = text_indexes[index-1]; 476: } 477: 478: /** 479: * This method returns the maximum length of any expansion sequence that 480: * ends with the specified collation order value. (Whatever that means). 481: * 482: * @param value The collation order value 483: * 484: * @return The maximum length of an expansion sequence. 485: */ 486: public int getMaxExpansion(int value) 487: { 488: return 1; 489: } 490: }
GNU Classpath (0.98) |