ICU 4.8.1.1  4.8.1.1
 All Data Structures Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
rbbi.h
Go to the documentation of this file.
1 /*
2 ***************************************************************************
3 * Copyright (C) 1999-2011 International Business Machines Corporation *
4 * and others. All rights reserved. *
5 ***************************************************************************
6 
7 **********************************************************************
8 * Date Name Description
9 * 10/22/99 alan Creation.
10 * 11/11/99 rgillam Complete port from Java.
11 **********************************************************************
12 */
13 
14 #ifndef RBBI_H
15 #define RBBI_H
16 
17 #include "unicode/utypes.h"
18 
24 #if !UCONFIG_NO_BREAK_ITERATION
25 
26 #include "unicode/brkiter.h"
27 #include "unicode/udata.h"
28 #include "unicode/parseerr.h"
29 #include "unicode/schriter.h"
30 #include "unicode/uchriter.h"
31 
32 
33 struct UTrie;
34 
36 
38 struct RBBIDataHeader;
39 class RuleBasedBreakIteratorTables;
40 class BreakIterator;
41 class RBBIDataWrapper;
42 class UStack;
43 class LanguageBreakEngine;
44 class UnhandledEngine;
45 struct RBBIStateTable;
46 
47 
48 
49 
66 
67 protected:
73 
80 
87 
94 
99  RBBIDataWrapper *fData;
100 
105 
113 
120 
129 
135 
142 
151 
159  UnhandledEngine *fUnhandledBreakEngine;
160 
166  int32_t fBreakType;
167 
168 protected:
169  //=======================================================================
170  // constructors
171  //=======================================================================
172 
181  enum EDontAdopt {
182  kDontAdopt
183  };
184 
195  RuleBasedBreakIterator(RBBIDataHeader* data, UErrorCode &status);
196 
205  RuleBasedBreakIterator(const RBBIDataHeader* data, enum EDontAdopt dontAdopt, UErrorCode &status);
206 
207 
208  friend class RBBIRuleBuilder;
210  friend class BreakIterator;
211 
212 
213 
214 public:
215 
221 
229 
239  UParseError &parseError,
240  UErrorCode &status);
241 
242 
243 
244 
268  RuleBasedBreakIterator(const uint8_t *compiledRules,
269  uint32_t ruleLength,
270  UErrorCode &status);
271 
272 
286 
291  virtual ~RuleBasedBreakIterator();
292 
300  RuleBasedBreakIterator& operator=(const RuleBasedBreakIterator& that);
301 
310  virtual UBool operator==(const BreakIterator& that) const;
311 
319  UBool operator!=(const BreakIterator& that) const;
320 
331  virtual BreakIterator* clone() const;
332 
338  virtual int32_t hashCode(void) const;
339 
345  virtual const UnicodeString& getRules(void) const;
346 
347  //=======================================================================
348  // BreakIterator overrides
349  //=======================================================================
350 
376  virtual CharacterIterator& getText(void) const;
377 
378 
393  virtual UText *getUText(UText *fillIn, UErrorCode &status) const;
394 
402  virtual void adoptText(CharacterIterator* newText);
403 
410  virtual void setText(const UnicodeString& newText);
411 
425  virtual void setText(UText *text, UErrorCode &status);
426 
432  virtual int32_t first(void);
433 
439  virtual int32_t last(void);
440 
451  virtual int32_t next(int32_t n);
452 
458  virtual int32_t next(void);
459 
465  virtual int32_t previous(void);
466 
474  virtual int32_t following(int32_t offset);
475 
483  virtual int32_t preceding(int32_t offset);
484 
493  virtual UBool isBoundary(int32_t offset);
494 
500  virtual int32_t current(void) const;
501 
502 
535  virtual int32_t getRuleStatus() const;
536 
560  virtual int32_t getRuleStatusVec(int32_t *fillInVec, int32_t capacity, UErrorCode &status);
561 
573  virtual UClassID getDynamicClassID(void) const;
574 
586  static UClassID U_EXPORT2 getStaticClassID(void);
587 
588  /*
589  * Create a clone (copy) of this break iterator in memory provided
590  * by the caller. The idea is to increase performance by avoiding
591  * a storage allocation. Use of this functoin is NOT RECOMMENDED.
592  * Performance gains are minimal, and correct buffer management is
593  * tricky. Use clone() instead.
594  *
595  * @param stackBuffer The pointer to the memory into which the cloned object
596  * should be placed. If NULL, allocate heap memory
597  * for the cloned object.
598  * @param BufferSize The size of the buffer. If zero, return the required
599  * buffer size, but do not clone the object. If the
600  * size was too small (but not zero), allocate heap
601  * storage for the cloned object.
602  *
603  * @param status Error status. U_SAFECLONE_ALLOCATED_WARNING will be
604  * returned if the the provided buffer was too small, and
605  * the clone was therefore put on the heap.
606  *
607  * @return Pointer to the clone object. This may differ from the stackBuffer
608  * address if the byte alignment of the stack buffer was not suitable
609  * or if the stackBuffer was too small to hold the clone.
610  * @stable ICU 2.0
611  */
612  virtual BreakIterator * createBufferClone(void *stackBuffer,
613  int32_t &BufferSize,
614  UErrorCode &status);
615 
616 
634  virtual const uint8_t *getBinaryRules(uint32_t &length);
635 
636 
637 protected:
638  //=======================================================================
639  // implementation
640  //=======================================================================
646  virtual void reset(void);
647 
648 #if 0
649 
657  virtual UBool isDictionaryChar(UChar32);
658 
663  virtual int32_t getBreakType() const;
664 #endif
665 
670  virtual void setBreakType(int32_t type);
671 
677  void init();
678 
679 private:
680 
690  int32_t handlePrevious(const RBBIStateTable *statetable);
691 
701  int32_t handleNext(const RBBIStateTable *statetable);
702 
703 protected:
704 
719  int32_t checkDictionary(int32_t startPos, int32_t endPos, UBool reverse);
720 
721 private:
722 
729  const LanguageBreakEngine *getLanguageBreakEngine(UChar32 c);
730 
734  void makeRuleStatusValid();
735 
736 };
737 
738 //------------------------------------------------------------------------------
739 //
740 // Inline Functions Definitions ...
741 //
742 //------------------------------------------------------------------------------
743 
745  return !operator==(that);
746 }
747 
749 
750 #endif /* #if !UCONFIG_NO_BREAK_ITERATION */
751 
752 #endif