Skip to content

Commit 5618203

Browse files
committed
ICU-22360 revert portions of unicode-org#2159 which included @ in ALetter for wordbreak, update tests
1 parent 530ca98 commit 5618203

File tree

15 files changed

+19
-19
lines changed

15 files changed

+19
-19
lines changed

icu4c/source/data/brkitr/rules/word.txt

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -38,7 +38,7 @@ $Regional_Indicator = [\p{Word_Break = Regional_Indicator}];
3838
$Format = [\p{Word_Break = Format}];
3939
$Katakana = [\p{Word_Break = Katakana}];
4040
$Hebrew_Letter = [\p{Word_Break = Hebrew_Letter}];
41-
$ALetter = [\p{Word_Break = ALetter} @];
41+
$ALetter = [\p{Word_Break = ALetter}];
4242
$Single_Quote = [\p{Word_Break = Single_Quote}];
4343
$Double_Quote = [\p{Word_Break = Double_Quote}];
4444
$MidNumLet = [\p{Word_Break = MidNumLet}];

icu4c/source/data/brkitr/rules/word_POSIX.txt

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -38,7 +38,7 @@ $Regional_Indicator = [\p{Word_Break = Regional_Indicator}];
3838
$Format = [\p{Word_Break = Format}];
3939
$Katakana = [\p{Word_Break = Katakana}];
4040
$Hebrew_Letter = [\p{Word_Break = Hebrew_Letter}];
41-
$ALetter = [\p{Word_Break = ALetter} @];
41+
$ALetter = [\p{Word_Break = ALetter}];
4242
$Single_Quote = [\p{Word_Break = Single_Quote}];
4343
$Double_Quote = [\p{Word_Break = Double_Quote}];
4444
$MidNumLet = [\p{Word_Break = MidNumLet} - [.]];

icu4c/source/data/brkitr/rules/word_fi_sv.txt

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -38,7 +38,7 @@ $Regional_Indicator = [\p{Word_Break = Regional_Indicator}];
3838
$Format = [\p{Word_Break = Format}];
3939
$Katakana = [\p{Word_Break = Katakana}];
4040
$Hebrew_Letter = [\p{Word_Break = Hebrew_Letter}];
41-
$ALetter = [\p{Word_Break = ALetter} @];
41+
$ALetter = [\p{Word_Break = ALetter}];
4242
$Single_Quote = [\p{Word_Break = Single_Quote}];
4343
$Double_Quote = [\p{Word_Break = Double_Quote}];
4444
$MidNumLet = [\p{Word_Break = MidNumLet}];

icu4c/source/test/intltest/rbbitst.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1937,7 +1937,7 @@ RBBIWordMonkey::RBBIWordMonkey()
19371937
fKatakanaSet = new UnicodeSet(u"[\\p{Word_Break = Katakana}]", status);
19381938
fRegionalIndicatorSet = new UnicodeSet(u"[\\p{Word_Break = Regional_Indicator}]", status);
19391939
fHebrew_LetterSet = new UnicodeSet(u"[\\p{Word_Break = Hebrew_Letter}]", status);
1940-
fALetterSet = new UnicodeSet(u"[\\p{Word_Break = ALetter} @]", status);
1940+
fALetterSet = new UnicodeSet(u"[\\p{Word_Break = ALetter}]", status);
19411941
fSingle_QuoteSet = new UnicodeSet(u"[\\p{Word_Break = Single_Quote}]", status);
19421942
fDouble_QuoteSet = new UnicodeSet(u"[\\p{Word_Break = Double_Quote}]", status);
19431943
fMidNumLetSet = new UnicodeSet(u"[\\p{Word_Break = MidNumLet}]", status);

icu4c/source/test/testdata/break_rules/word.txt

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -25,7 +25,7 @@ Regional_Indicator = [\p{Word_Break = Regional_Indicator}];
2525
Format = [\p{Word_Break = Format}];
2626
Katakana = [\p{Word_Break = Katakana}];
2727
Hebrew_Letter = [\p{Word_Break = Hebrew_Letter}];
28-
ALetter = [\p{Word_Break = ALetter} @];
28+
ALetter = [\p{Word_Break = ALetter}];
2929
Single_Quote = [\p{Word_Break = Single_Quote}];
3030
Double_Quote = [\p{Word_Break = Double_Quote}];
3131
MidNumLet = [\p{Word_Break = MidNumLet}];

icu4c/source/test/testdata/rbbitst.txt

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1586,29 +1586,29 @@ Bangkok)•</data>
15861586
<data>•Can't<200> •have<200> •breaks<200> •in<200> •xx<200>:•yy<200> •or<200> •struct.field<200> \
15871587
•for<200> •CS<200>-•types<200>.•</data>
15881588
<data>•\uFF92\uFF76\uFF9E<400> •</data>
1589-
<data>•xx@yy<200>.•</data>
1589+
<data>•xx<200>@•yy<200>.•</data>
15901590

15911591
<locale en_US_POSIX>
15921592
<word>
15931593
<data>•Can't<200> •have<200> •breaks<200> •in<200> •xx<200>:•yy<200> •or<200> •struct<200>.•field<200> \
15941594
•for<200> •CS<200>-•types<200>.•</data>
15951595
<data>•\u06c9<200>\uc799\ufffa•</data>
15961596
<data>•\uFF92\uFF76\uFF9E<400> •</data>
1597-
<data>•xx@yy<200>.•</data>
1597+
<data>•xx<200>@•yy<200>.•</data>
15981598

15991599
<locale fi>
16001600
<word>
16011601
<data>•Can't<200> •have<200> •breaks<200> •in<200> •xx:yy<200> •or<200> •struct.field<200> \
16021602
•for<200> •CS<200>-•types<200>.•</data>
16031603
<data>•\uFF92\uFF76\uFF9E<400> •</data>
1604-
<data>•xx@yy<200>.•</data>
1604+
<data>•xx<200>@•yy<200>.•</data>
16051605

16061606
<locale sv>
16071607
<word>
16081608
<data>•Can't<200> •have<200> •breaks<200> •in<200> •xx:yy<200> •or<200> •struct.field<200> \
16091609
•for<200> •CS<200>-•types<200>.•</data>
16101610
<data>•\uFF92\uFF76\uFF9E<400> •</data>
1611-
<data>•xx@yy<200>.•</data>
1611+
<data>•xx<200>@•yy<200>.•</data>
16121612

16131613

16141614
# UBreakIteratorType UBRK_CHARACTER, Locale "th"

icu4j/main/shared/data/icudata.jar

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,3 @@
11
version https://git-lfs.github.com/spec/v1
2-
oid sha256:88808b997ca13e786f5f6bdd95d05d7d9ba3fe6b12f6356b8b15bb4eb49d644e
3-
size 14330312
2+
oid sha256:9b764b3c6af6c9e8ed18770a1c758f9740aede42bdb435fe6cb3fa3f8a7846af
3+
size 14330291

icu4j/main/shared/data/icutzdata.jar

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,3 @@
11
version https://git-lfs.github.com/spec/v1
2-
oid sha256:affd9c4e150caed2894d4912763ef2cb95249f94e859d2b3298c5636ab124f50
2+
oid sha256:57224bd406c99dd7242f9aeac1db8beaf6e0e1520646b4bedab404aa02c896a3
33
size 94829

icu4j/main/shared/data/testdata.jar

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,3 @@
11
version https://git-lfs.github.com/spec/v1
2-
oid sha256:d5c5dd3ac8fca302041ec888963b800c61dd4003a8647515e193cec72967f871
2+
oid sha256:0e466f0476161bdf5b82d33e164e44d2f0912156436057d53d949ee386bdc79d
33
size 831605

icu4j/main/tests/core/src/com/ibm/icu/dev/test/rbbi/RBBITestMonkey.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -400,7 +400,7 @@ static class RBBIWordMonkey extends RBBIMonkeyKind {
400400
fRegionalIndicatorSet = new UnicodeSet("[\\p{Word_Break = Regional_Indicator}]");
401401
fKatakanaSet = new UnicodeSet("[\\p{Word_Break = Katakana}]");
402402
fHebrew_LetterSet = new UnicodeSet("[\\p{Word_Break = Hebrew_Letter}]");
403-
fALetterSet = new UnicodeSet("[\\p{Word_Break = ALetter} @]");
403+
fALetterSet = new UnicodeSet("[\\p{Word_Break = ALetter}]");
404404
fSingle_QuoteSet = new UnicodeSet("[\\p{Word_Break = Single_Quote}]");
405405
fDouble_QuoteSet = new UnicodeSet("[\\p{Word_Break = Double_Quote}]");
406406
fMidNumLetSet = new UnicodeSet("[\\p{Word_Break = MidNumLet}]");

0 commit comments

Comments
 (0)