00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014 #ifndef RBBI_H
00015 #define RBBI_H
00016
00017 #include "unicode/utypes.h"
00018
00024 #if !UCONFIG_NO_BREAK_ITERATION
00025
00026 #include "unicode/brkiter.h"
00027 #include "unicode/udata.h"
00028 #include "unicode/parseerr.h"
00029 #include "unicode/schriter.h"
00030 #include "unicode/uchriter.h"
00031
00032
00033 struct UTrie;
00034
00035 U_NAMESPACE_BEGIN
00036
00038 struct RBBIDataHeader;
00039 class RuleBasedBreakIteratorTables;
00040 class BreakIterator;
00041 class RBBIDataWrapper;
00042 class UStack;
00043 class LanguageBreakEngine;
00044 class UnhandledEngine;
00045 struct RBBIStateTable;
00046
00047
00048
00049
00065 class U_COMMON_API RuleBasedBreakIterator : public BreakIterator {
00066
00067 protected:
00072 UText *fText;
00073
00079 CharacterIterator *fCharIter;
00080
00086 StringCharacterIterator *fSCharIter;
00087
00093 UCharCharacterIterator *fDCharIter;
00094
00099 RBBIDataWrapper *fData;
00100
00104 int32_t fLastRuleStatusIndex;
00105
00112 UBool fLastStatusIndexValid;
00113
00119 uint32_t fDictionaryCharCount;
00120
00128 int32_t* fCachedBreakPositions;
00129
00134 int32_t fNumCachedBreakPositions;
00135
00141 int32_t fPositionInCache;
00142
00150 UStack *fLanguageBreakEngines;
00151
00159 UnhandledEngine *fUnhandledBreakEngine;
00160
00166 int32_t fBreakType;
00167
00168 protected:
00169
00170
00171
00172
00181 enum EDontAdopt {
00182 kDontAdopt
00183 };
00184
00195 RuleBasedBreakIterator(RBBIDataHeader* data, UErrorCode &status);
00196
00205 RuleBasedBreakIterator(const RBBIDataHeader* data, enum EDontAdopt dontAdopt, UErrorCode &status);
00206
00207
00208 friend class RBBIRuleBuilder;
00210 friend class BreakIterator;
00211
00212
00213
00214 public:
00215
00220 RuleBasedBreakIterator();
00221
00228 RuleBasedBreakIterator(const RuleBasedBreakIterator& that);
00229
00238 RuleBasedBreakIterator( const UnicodeString &rules,
00239 UParseError &parseError,
00240 UErrorCode &status);
00241
00242
00255 RuleBasedBreakIterator(UDataMemory* image, UErrorCode &status);
00256
00261 virtual ~RuleBasedBreakIterator();
00262
00270 RuleBasedBreakIterator& operator=(const RuleBasedBreakIterator& that);
00271
00280 virtual UBool operator==(const BreakIterator& that) const;
00281
00289 UBool operator!=(const BreakIterator& that) const;
00290
00301 virtual BreakIterator* clone() const;
00302
00308 virtual int32_t hashCode(void) const;
00309
00315 virtual const UnicodeString& getRules(void) const;
00316
00317
00318
00319
00320
00346 virtual CharacterIterator& getText(void) const;
00347
00348
00363 virtual UText *getUText(UText *fillIn, UErrorCode &status) const;
00364
00372 virtual void adoptText(CharacterIterator* newText);
00373
00380 virtual void setText(const UnicodeString& newText);
00381
00395 virtual void setText(UText *text, UErrorCode &status);
00396
00402 virtual int32_t first(void);
00403
00409 virtual int32_t last(void);
00410
00421 virtual int32_t next(int32_t n);
00422
00428 virtual int32_t next(void);
00429
00435 virtual int32_t previous(void);
00436
00444 virtual int32_t following(int32_t offset);
00445
00453 virtual int32_t preceding(int32_t offset);
00454
00463 virtual UBool isBoundary(int32_t offset);
00464
00470 virtual int32_t current(void) const;
00471
00472
00505 virtual int32_t getRuleStatus() const;
00506
00530 virtual int32_t getRuleStatusVec(int32_t *fillInVec, int32_t capacity, UErrorCode &status);
00531
00543 virtual UClassID getDynamicClassID(void) const;
00544
00556 static UClassID U_EXPORT2 getStaticClassID(void);
00557
00558
00559
00560
00561
00562
00563
00564
00565
00566
00567
00568
00569
00570
00571
00572
00573
00574
00575
00576
00577
00578
00579
00580
00581
00582 virtual BreakIterator * createBufferClone(void *stackBuffer,
00583 int32_t &BufferSize,
00584 UErrorCode &status);
00585
00586
00604 virtual const uint8_t *getBinaryRules(uint32_t &length);
00605
00606
00607 protected:
00608
00609
00610
00616 virtual void reset(void);
00617
00618 #if 0
00619
00627 virtual UBool isDictionaryChar(UChar32);
00628
00633 virtual int32_t getBreakType() const;
00634 #endif
00635
00640 virtual void setBreakType(int32_t type);
00641
00647 void init();
00648
00649 private:
00650
00660 int32_t handlePrevious(const RBBIStateTable *statetable);
00661
00671 int32_t handleNext(const RBBIStateTable *statetable);
00672
00673 protected:
00674
00689 int32_t checkDictionary(int32_t startPos, int32_t endPos, UBool reverse);
00690
00691 private:
00692
00699 const LanguageBreakEngine *getLanguageBreakEngine(UChar32 c);
00700
00704 void makeRuleStatusValid();
00705
00706 };
00707
00708
00709
00710
00711
00712
00713
00714 inline UBool RuleBasedBreakIterator::operator!=(const BreakIterator& that) const {
00715 return !operator==(that);
00716 }
00717
00718 U_NAMESPACE_END
00719
00720 #endif
00721
00722 #endif