Skip to content

Commit 628ec44

Browse files
committed
ICU-13541 RBBI patch unicode-org#2 from grhoten. Optimize object layout.
X-SVN-Rev: 40812
1 parent 32ba591 commit 628ec44

File tree

3 files changed

+74
-75
lines changed

3 files changed

+74
-75
lines changed

icu4c/source/common/rbbi.cpp

Lines changed: 46 additions & 46 deletions
Original file line numberDiff line numberDiff line change
@@ -192,7 +192,7 @@ RuleBasedBreakIterator::~RuleBasedBreakIterator() {
192192
}
193193
fCharIter = NULL;
194194

195-
utext_close(fText);
195+
utext_close(&fText);
196196

197197
if (fData != NULL) {
198198
fData->removeReference();
@@ -229,12 +229,12 @@ RuleBasedBreakIterator::operator=(const RuleBasedBreakIterator& that) {
229229
}
230230
// TODO: clone fLanguageBreakEngines from "that"
231231
UErrorCode status = U_ZERO_ERROR;
232-
fText = utext_clone(fText, that.fText, FALSE, TRUE, &status);
232+
utext_clone(&fText, &that.fText, FALSE, TRUE, &status);
233233

234234
if (fCharIter != &fSCharIter) {
235235
delete fCharIter;
236236
}
237-
fCharIter = NULL;
237+
fCharIter = &fSCharIter;
238238

239239
if (that.fCharIter != NULL && that.fCharIter != &that.fSCharIter) {
240240
// This is a little bit tricky - it will intially appear that
@@ -278,7 +278,7 @@ RuleBasedBreakIterator::operator=(const RuleBasedBreakIterator& that) {
278278
//
279279
//-----------------------------------------------------------------------------
280280
void RuleBasedBreakIterator::init(UErrorCode &status) {
281-
fText = NULL;
281+
fText = UTEXT_INITIALIZER;
282282
fCharIter = NULL;
283283
fData = NULL;
284284
fPosition = 0;
@@ -299,10 +299,10 @@ void RuleBasedBreakIterator::init(UErrorCode &status) {
299299
return;
300300
}
301301

302-
fText = utext_openUChars(NULL, NULL, 0, &status);
302+
utext_openUChars(&fText, NULL, 0, &status);
303303
fDictionaryCache = new DictionaryCache(this, status);
304304
fBreakCache = new BreakCache(this, status);
305-
if (U_SUCCESS(status) && (fText == NULL || fDictionaryCache == NULL || fBreakCache == NULL)) {
305+
if (U_SUCCESS(status) && (fDictionaryCache == NULL || fBreakCache == NULL)) {
306306
status = U_MEMORY_ALLOCATION_ERROR;
307307
}
308308

@@ -351,7 +351,7 @@ RuleBasedBreakIterator::operator==(const BreakIterator& that) const {
351351

352352
const RuleBasedBreakIterator& that2 = (const RuleBasedBreakIterator&) that;
353353

354-
if (!utext_equals(fText, that2.fText)) {
354+
if (!utext_equals(&fText, &that2.fText)) {
355355
// The two break iterators are operating on different text,
356356
// or have a different iteration position.
357357
// Note that fText's position is always the same as the break iterator's position.
@@ -392,7 +392,7 @@ void RuleBasedBreakIterator::setText(UText *ut, UErrorCode &status) {
392392
}
393393
fBreakCache->reset();
394394
fDictionaryCache->reset();
395-
fText = utext_clone(fText, ut, FALSE, TRUE, &status);
395+
utext_clone(&fText, ut, FALSE, TRUE, &status);
396396

397397
// Set up a dummy CharacterIterator to be returned if anyone
398398
// calls getText(). With input from UText, there is no reasonable
@@ -413,7 +413,7 @@ void RuleBasedBreakIterator::setText(UText *ut, UErrorCode &status) {
413413

414414

415415
UText *RuleBasedBreakIterator::getUText(UText *fillIn, UErrorCode &status) const {
416-
UText *result = utext_clone(fillIn, fText, FALSE, TRUE, &status);
416+
UText *result = utext_clone(fillIn, &fText, FALSE, TRUE, &status);
417417
return result;
418418
}
419419

@@ -450,9 +450,9 @@ RuleBasedBreakIterator::adoptText(CharacterIterator* newText) {
450450
if (newText==NULL || newText->startIndex() != 0) {
451451
// startIndex !=0 wants to be an error, but there's no way to report it.
452452
// Make the iterator text be an empty string.
453-
fText = utext_openUChars(fText, NULL, 0, &status);
453+
utext_openUChars(&fText, NULL, 0, &status);
454454
} else {
455-
fText = utext_openCharacterIterator(fText, newText, &status);
455+
utext_openCharacterIterator(&fText, newText, &status);
456456
}
457457
this->first();
458458
}
@@ -467,7 +467,7 @@ RuleBasedBreakIterator::setText(const UnicodeString& newText) {
467467
UErrorCode status = U_ZERO_ERROR;
468468
fBreakCache->reset();
469469
fDictionaryCache->reset();
470-
fText = utext_openConstUnicodeString(fText, &newText, &status);
470+
utext_openConstUnicodeString(&fText, &newText, &status);
471471

472472
// Set up a character iterator on the string.
473473
// Needed in case someone calls getText().
@@ -499,14 +499,14 @@ RuleBasedBreakIterator &RuleBasedBreakIterator::refreshInputText(UText *input, U
499499
status = U_ILLEGAL_ARGUMENT_ERROR;
500500
return *this;
501501
}
502-
int64_t pos = utext_getNativeIndex(fText);
502+
int64_t pos = utext_getNativeIndex(&fText);
503503
// Shallow read-only clone of the new UText into the existing input UText
504-
fText = utext_clone(fText, input, FALSE, TRUE, &status);
504+
utext_clone(&fText, input, FALSE, TRUE, &status);
505505
if (U_FAILURE(status)) {
506506
return *this;
507507
}
508-
utext_setNativeIndex(fText, pos);
509-
if (utext_getNativeIndex(fText) != pos) {
508+
utext_setNativeIndex(&fText, pos);
509+
if (utext_getNativeIndex(&fText) != pos) {
510510
// Sanity check. The new input utext is supposed to have the exact same
511511
// contents as the old. If we can't set to the same position, it doesn't.
512512
// The contents underlying the old utext might be invalid at this point,
@@ -536,7 +536,7 @@ int32_t RuleBasedBreakIterator::first(void) {
536536
* @return The text's past-the-end offset.
537537
*/
538538
int32_t RuleBasedBreakIterator::last(void) {
539-
int32_t endPos = (int32_t)utext_nativeLength(fText);
539+
int32_t endPos = (int32_t)utext_nativeLength(&fText);
540540
UBool endShouldBeBoundary = isBoundary(endPos); // Has side effect of setting iterator position.
541541
(void)endShouldBeBoundary;
542542
U_ASSERT(endShouldBeBoundary);
@@ -607,8 +607,8 @@ int32_t RuleBasedBreakIterator::following(int32_t startPos) {
607607

608608
// Move requested offset to a code point start. It might be on a trail surrogate,
609609
// or on a trail byte if the input is UTF-8. Or it may be beyond the end of the text.
610-
utext_setNativeIndex(fText, startPos);
611-
startPos = (int32_t)utext_getNativeIndex(fText);
610+
utext_setNativeIndex(&fText, startPos);
611+
startPos = (int32_t)utext_getNativeIndex(&fText);
612612

613613
UErrorCode status = U_ZERO_ERROR;
614614
fBreakCache->following(startPos, status);
@@ -622,15 +622,15 @@ int32_t RuleBasedBreakIterator::following(int32_t startPos) {
622622
* @return The position of the last boundary before the starting position.
623623
*/
624624
int32_t RuleBasedBreakIterator::preceding(int32_t offset) {
625-
if (fText == NULL || offset > utext_nativeLength(fText)) {
625+
if (offset > utext_nativeLength(&fText)) {
626626
return last();
627627
}
628628

629629
// Move requested offset to a code point start. It might be on a trail surrogate,
630630
// or on a trail byte if the input is UTF-8.
631631

632-
utext_setNativeIndex(fText, offset);
633-
int32_t adjustedOffset = utext_getNativeIndex(fText);
632+
utext_setNativeIndex(&fText, offset);
633+
int32_t adjustedOffset = utext_getNativeIndex(&fText);
634634

635635
UErrorCode status = U_ZERO_ERROR;
636636
fBreakCache->preceding(adjustedOffset, status);
@@ -656,16 +656,16 @@ UBool RuleBasedBreakIterator::isBoundary(int32_t offset) {
656656
// Note that isBoundary() is always be false for offsets that are not on code point boundaries.
657657
// But we still need the side effect of leaving iteration at the following boundary.
658658

659-
utext_setNativeIndex(fText, offset);
660-
int32_t adjustedOffset = utext_getNativeIndex(fText);
659+
utext_setNativeIndex(&fText, offset);
660+
int32_t adjustedOffset = utext_getNativeIndex(&fText);
661661

662662
bool result = false;
663663
UErrorCode status = U_ZERO_ERROR;
664664
if (fBreakCache->seek(adjustedOffset) || fBreakCache->populateNear(adjustedOffset, status)) {
665665
result = (fBreakCache->current() == offset);
666666
}
667667

668-
if (result && adjustedOffset < offset && utext_char32At(fText, offset) == U_SENTINEL) {
668+
if (result && adjustedOffset < offset && utext_char32At(&fText, offset) == U_SENTINEL) {
669669
// Original offset is beyond the end of the text. Return FALSE, it's not a boundary,
670670
// but the iteration position remains set to the end of the text, which is a boundary.
671671
return FALSE;
@@ -785,9 +785,9 @@ int32_t RuleBasedBreakIterator::handleNext() {
785785

786786
// if we're already at the end of the text, return DONE.
787787
initialPosition = fPosition;
788-
UTEXT_SETNATIVEINDEX(fText, initialPosition);
788+
UTEXT_SETNATIVEINDEX(&fText, initialPosition);
789789
result = initialPosition;
790-
c = UTEXT_NEXT32(fText);
790+
c = UTEXT_NEXT32(&fText);
791791
if (c==U_SENTINEL) {
792792
fDone = TRUE;
793793
return UBRK_DONE;
@@ -850,7 +850,7 @@ int32_t RuleBasedBreakIterator::handleNext() {
850850

851851
#ifdef RBBI_DEBUG
852852
if (gTrace) {
853-
RBBIDebugPrintf(" %4ld ", utext_getNativeIndex(fText));
853+
RBBIDebugPrintf(" %4ld ", utext_getNativeIndex(&fText));
854854
if (0x20<=c && c<0x7f) {
855855
RBBIDebugPrintf("\"%c\" ", c);
856856
} else {
@@ -874,7 +874,7 @@ int32_t RuleBasedBreakIterator::handleNext() {
874874
if (row->fAccepting == -1) {
875875
// Match found, common case.
876876
if (mode != RBBI_START) {
877-
result = (int32_t)UTEXT_GETNATIVEINDEX(fText);
877+
result = (int32_t)UTEXT_GETNATIVEINDEX(&fText);
878878
}
879879
fRuleStatusIndex = row->fTagIdx; // Remember the break status (tag) values.
880880
}
@@ -892,7 +892,7 @@ int32_t RuleBasedBreakIterator::handleNext() {
892892
int16_t rule = row->fLookAhead;
893893
if (rule != 0) {
894894
// At the position of a '/' in a look-ahead match. Record it.
895-
int32_t pos = (int32_t)UTEXT_GETNATIVEINDEX(fText);
895+
int32_t pos = (int32_t)UTEXT_GETNATIVEINDEX(&fText);
896896
lookAheadMatches.setPosition(rule, pos);
897897
}
898898

@@ -908,7 +908,7 @@ int32_t RuleBasedBreakIterator::handleNext() {
908908
// the input position. The next iteration will be processing the
909909
// first real input character.
910910
if (mode == RBBI_RUN) {
911-
c = UTEXT_NEXT32(fText);
911+
c = UTEXT_NEXT32(&fText);
912912
} else {
913913
if (mode == RBBI_START) {
914914
mode = RBBI_RUN;
@@ -922,9 +922,9 @@ int32_t RuleBasedBreakIterator::handleNext() {
922922
// (This really indicates a defect in the break rules. They should always match
923923
// at least one character.)
924924
if (result == initialPosition) {
925-
utext_setNativeIndex(fText, initialPosition);
926-
utext_next32(fText);
927-
result = (int32_t)utext_getNativeIndex(fText);
925+
utext_setNativeIndex(&fText, initialPosition);
926+
utext_next32(&fText);
927+
result = (int32_t)utext_getNativeIndex(&fText);
928928
fRuleStatusIndex = 0;
929929
}
930930

@@ -959,22 +959,22 @@ int32_t RuleBasedBreakIterator::handlePrevious(int32_t fromPosition) {
959959
int32_t initialPosition = 0;
960960

961961
const RBBIStateTable *stateTable = fData->fSafeRevTable;
962-
UTEXT_SETNATIVEINDEX(fText, fromPosition);
962+
UTEXT_SETNATIVEINDEX(&fText, fromPosition);
963963
#ifdef RBBI_DEBUG
964964
if (gTrace) {
965965
RBBIDebugPuts("Handle Previous pos char state category");
966966
}
967967
#endif
968968

969969
// if we're already at the start of the text, return DONE.
970-
if (fText == NULL || fData == NULL || UTEXT_GETNATIVEINDEX(fText)==0) {
970+
if (fData == NULL || UTEXT_GETNATIVEINDEX(&fText)==0) {
971971
return BreakIterator::DONE;
972972
}
973973

974974
// Set up the starting char.
975-
initialPosition = (int32_t)UTEXT_GETNATIVEINDEX(fText);
975+
initialPosition = (int32_t)UTEXT_GETNATIVEINDEX(&fText);
976976
result = initialPosition;
977-
c = UTEXT_PREVIOUS32(fText);
977+
c = UTEXT_PREVIOUS32(&fText);
978978

979979
// Set the initial state for the state machine
980980
state = START_STATE;
@@ -1022,7 +1022,7 @@ int32_t RuleBasedBreakIterator::handlePrevious(int32_t fromPosition) {
10221022

10231023
#ifdef RBBI_DEBUG
10241024
if (gTrace) {
1025-
RBBIDebugPrintf(" %4d ", (int32_t)utext_getNativeIndex(fText));
1025+
RBBIDebugPrintf(" %4d ", (int32_t)utext_getNativeIndex(&fText));
10261026
if (0x20<=c && c<0x7f) {
10271027
RBBIDebugPrintf("\"%c\" ", c);
10281028
} else {
@@ -1043,22 +1043,22 @@ int32_t RuleBasedBreakIterator::handlePrevious(int32_t fromPosition) {
10431043

10441044
if (row->fAccepting == -1) {
10451045
// Match found, common case.
1046-
result = (int32_t)UTEXT_GETNATIVEINDEX(fText);
1046+
result = (int32_t)UTEXT_GETNATIVEINDEX(&fText);
10471047
}
10481048

10491049
int16_t completedRule = row->fAccepting;
10501050
if (completedRule > 0) {
10511051
// Lookahead match is completed.
10521052
int32_t lookaheadResult = lookAheadMatches.getPosition(completedRule);
10531053
if (lookaheadResult >= 0) {
1054-
UTEXT_SETNATIVEINDEX(fText, lookaheadResult);
1054+
UTEXT_SETNATIVEINDEX(&fText, lookaheadResult);
10551055
return lookaheadResult;
10561056
}
10571057
}
10581058
int16_t rule = row->fLookAhead;
10591059
if (rule != 0) {
10601060
// At the position of a '/' in a look-ahead match. Record it.
1061-
int32_t pos = (int32_t)UTEXT_GETNATIVEINDEX(fText);
1061+
int32_t pos = (int32_t)UTEXT_GETNATIVEINDEX(&fText);
10621062
lookAheadMatches.setPosition(rule, pos);
10631063
}
10641064

@@ -1074,7 +1074,7 @@ int32_t RuleBasedBreakIterator::handlePrevious(int32_t fromPosition) {
10741074
// the input position. The next iteration will be processing the
10751075
// first real input character.
10761076
if (mode == RBBI_RUN) {
1077-
c = UTEXT_PREVIOUS32(fText);
1077+
c = UTEXT_PREVIOUS32(&fText);
10781078
} else {
10791079
if (mode == RBBI_START) {
10801080
mode = RBBI_RUN;
@@ -1088,9 +1088,9 @@ int32_t RuleBasedBreakIterator::handlePrevious(int32_t fromPosition) {
10881088
// (This really indicates a defect in the break rules. They should always match
10891089
// at least one character.)
10901090
if (result == initialPosition) {
1091-
UTEXT_SETNATIVEINDEX(fText, initialPosition);
1092-
UTEXT_PREVIOUS32(fText);
1093-
result = (int32_t)UTEXT_GETNATIVEINDEX(fText);
1091+
UTEXT_SETNATIVEINDEX(&fText, initialPosition);
1092+
UTEXT_PREVIOUS32(&fText);
1093+
result = (int32_t)UTEXT_GETNATIVEINDEX(&fText);
10941094
}
10951095

10961096
#ifdef RBBI_DEBUG

icu4c/source/common/rbbi_cache.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -138,7 +138,7 @@ void RuleBasedBreakIterator::DictionaryCache::populateDictionary(int32_t startPo
138138
int32_t current;
139139
UErrorCode status = U_ZERO_ERROR;
140140
int32_t foundBreakCount = 0;
141-
UText *text = fBI->fText;
141+
UText *text = &fBI->fText;
142142

143143
// Loop through the text, looking for ranges of dictionary characters.
144144
// For each span, find the appropriate break engine, and ask it to find

0 commit comments

Comments
 (0)