104
104
reason = "use the crates.io `getopts` library instead" ) ) ]
105
105
106
106
#[ cfg( test) ] #[ macro_use] extern crate log;
107
+ extern crate unicode_width;
108
+
107
109
108
110
use self :: Name :: * ;
109
111
use self :: HasArg :: * ;
110
112
use self :: Occur :: * ;
111
113
use self :: Fail :: * ;
112
114
use self :: Optval :: * ;
113
- use self :: SplitWithinState :: * ;
114
- use self :: Whitespace :: * ;
115
- use self :: LengthLimit :: * ;
116
115
117
116
use std:: error:: Error ;
118
117
use std:: ffi:: OsStr ;
119
118
use std:: fmt;
120
119
use std:: iter:: { repeat, IntoIterator } ;
121
120
use std:: result;
122
121
122
+ use unicode_width:: UnicodeWidthStr ;
123
+
123
124
/// A description of the options that a program can handle.
124
125
pub struct Options {
125
126
grps : Vec < OptGroup > ,
@@ -479,7 +480,7 @@ impl Options {
479
480
let mut row = " " . to_string ( ) ;
480
481
481
482
// short option
482
- match short_name. len ( ) {
483
+ match short_name. width ( ) {
483
484
0 => {
484
485
if any_short {
485
486
row. push_str ( " " ) ;
@@ -488,26 +489,23 @@ impl Options {
488
489
1 => {
489
490
row. push ( '-' ) ;
490
491
row. push_str ( & short_name) ;
491
- if long_name. len ( ) > 0 {
492
+ if long_name. width ( ) > 0 {
492
493
row. push_str ( ", " ) ;
493
494
} else {
494
495
// Only a single space here, so that any
495
496
// argument is printed in the correct spot.
496
497
row. push ( ' ' ) ;
497
498
}
498
499
}
500
+ // FIXME: refer issue #7.
499
501
_ => panic ! ( "the short name should only be 1 ascii char long" ) ,
500
502
}
501
503
502
504
// long option
503
- match long_name. len ( ) {
505
+ match long_name. width ( ) {
504
506
0 => { }
505
507
_ => {
506
- if self . long_only {
507
- row. push ( '-' ) ;
508
- } else {
509
- row. push_str ( "--" ) ;
510
- }
508
+ row. push_str ( if self . long_only { "-" } else { "--" } ) ;
511
509
row. push_str ( & long_name) ;
512
510
row. push ( ' ' ) ;
513
511
}
@@ -524,9 +522,7 @@ impl Options {
524
522
}
525
523
}
526
524
527
- // FIXME: #5516 should be graphemes not codepoints
528
- // here we just need to indent the start of the description
529
- let rowlen = row. chars ( ) . count ( ) ;
525
+ let rowlen = row. width ( ) ;
530
526
if rowlen < 24 {
531
527
for _ in 0 .. 24 - rowlen {
532
528
row. push ( ' ' ) ;
@@ -535,25 +531,7 @@ impl Options {
535
531
row. push_str ( & desc_sep)
536
532
}
537
533
538
- // Normalize desc to contain words separated by one space character
539
- let mut desc_normalized_whitespace = String :: new ( ) ;
540
- for word in desc. split ( |c : char | c. is_whitespace ( ) )
541
- . filter ( |s| !s. is_empty ( ) ) {
542
- desc_normalized_whitespace. push_str ( word) ;
543
- desc_normalized_whitespace. push ( ' ' ) ;
544
- }
545
-
546
- // FIXME: #5516 should be graphemes not codepoints
547
- let mut desc_rows = Vec :: new ( ) ;
548
- each_split_within ( & desc_normalized_whitespace,
549
- 54 ,
550
- |substr| {
551
- desc_rows. push ( substr. to_string ( ) ) ;
552
- true
553
- } ) ;
554
-
555
- // FIXME: #5516 should be graphemes not codepoints
556
- // wrapped description
534
+ let desc_rows = each_split_within ( & desc, 54 ) ;
557
535
row. push_str ( & desc_rows. join ( & desc_sep) ) ;
558
536
559
537
row
@@ -923,118 +901,59 @@ fn format_option(opt: &OptGroup) -> String {
923
901
line
924
902
}
925
903
926
- #[ derive( Clone , Copy ) ]
927
- enum SplitWithinState {
928
- A , // leading whitespace, initial state
929
- B , // words
930
- C , // internal and trailing whitespace
931
- }
932
-
933
- #[ derive( Clone , Copy ) ]
934
- enum Whitespace {
935
- Ws , // current char is whitespace
936
- Cr // current char is not whitespace
937
- }
938
-
939
- #[ derive( Clone , Copy ) ]
940
- enum LengthLimit {
941
- UnderLim , // current char makes current substring still fit in limit
942
- OverLim // current char makes current substring no longer fit in limit
943
- }
944
-
945
-
946
904
/// Splits a string into substrings with possibly internal whitespace,
947
905
/// each of them at most `lim` bytes long, if possible. The substrings
948
906
/// have leading and trailing whitespace removed, and are only cut at
949
907
/// whitespace boundaries.
950
- ///
951
- /// Note: Function was moved here from `std::str` because this module is the only place that
952
- /// uses it, and because it was too specific for a general string function.
953
- fn each_split_within < ' a , F > ( ss : & ' a str , lim : usize , mut it : F )
954
- -> bool where F : FnMut ( & ' a str ) -> bool {
955
- // Just for fun, let's write this as a state machine:
956
-
957
- let mut slice_start = 0 ;
958
- let mut last_start = 0 ;
959
- let mut last_end = 0 ;
960
- let mut state = A ;
961
- let mut fake_i = ss. len ( ) ;
962
- let mut lim = lim;
963
-
964
- let mut cont = true ;
965
-
966
- // if the limit is larger than the string, lower it to save cycles
967
- if lim >= fake_i {
968
- lim = fake_i;
969
- }
970
-
971
- let mut machine = |cont : & mut bool , state : & mut SplitWithinState , ( i, c) : ( usize , char ) | {
972
- let whitespace = if c. is_whitespace ( ) { Ws } else { Cr } ;
973
- let limit = if ( i - slice_start + 1 ) <= lim { UnderLim } else { OverLim } ;
974
-
975
- * state = match ( * state, whitespace, limit) {
976
- ( A , Ws , _) => { A }
977
- ( A , Cr , _) => { slice_start = i; last_start = i; B }
978
-
979
- ( B , Cr , UnderLim ) => { B }
980
- ( B , Cr , OverLim ) if ( i - last_start + 1 ) > lim => {
981
- // A single word has gone over the limit. In this
982
- // case we just accept that the word will be too long.
983
- B
984
- }
985
- ( B , Cr , OverLim ) => {
986
- * cont = it ( & ss[ slice_start..last_end] ) ;
987
- slice_start = last_start;
988
- B
989
- }
990
- ( B , Ws , UnderLim ) => {
991
- last_end = i;
992
- C
993
- }
994
- ( B , Ws , OverLim ) => {
995
- last_end = i;
996
- * cont = it ( & ss[ slice_start..last_end] ) ;
997
- A
998
- }
999
-
1000
- ( C , Cr , UnderLim ) => {
1001
- last_start = i;
1002
- B
908
+ fn each_split_within ( desc : & str , lim : usize ) -> Vec < String > {
909
+ let mut rows = Vec :: new ( ) ;
910
+ for line in desc. trim ( ) . lines ( ) {
911
+ let line_chars = line. chars ( ) . chain ( Some ( ' ' ) ) ;
912
+ let words = line_chars. fold ( ( Vec :: new ( ) , 0 , 0 ) , |( mut words, a, z) , c | {
913
+ let idx = z + c. len_utf8 ( ) ; // Get the current byte offset
914
+
915
+ // If the char is whitespace, advance the word start and maybe push a word
916
+ if c. is_whitespace ( ) {
917
+ if a != z {
918
+ words. push ( & line[ a..z] ) ;
919
+ }
920
+ ( words, idx, idx)
1003
921
}
1004
- ( C , Cr , OverLim ) => {
1005
- * cont = it ( & ss[ slice_start..last_end] ) ;
1006
- slice_start = i;
1007
- last_start = i;
1008
- last_end = i;
1009
- B
922
+ // If the char is not whitespace, continue, retaining the current
923
+ else {
924
+ ( words, a, idx)
1010
925
}
1011
- ( C , Ws , OverLim ) => {
1012
- * cont = it ( & ss[ slice_start..last_end] ) ;
1013
- A
926
+ } ) . 0 ;
927
+
928
+ let mut row = String :: new ( ) ;
929
+ for word in words. iter ( ) {
930
+ let sep = if row. len ( ) > 0 { Some ( " " ) } else { None } ;
931
+ let width = row. width ( )
932
+ + word. width ( )
933
+ + sep. map ( UnicodeWidthStr :: width) . unwrap_or ( 0 ) ;
934
+
935
+ if width <= lim {
936
+ if let Some ( sep) = sep { row. push_str ( sep) }
937
+ row. push_str ( word) ;
938
+ continue
1014
939
}
1015
- ( C , Ws , UnderLim ) => {
1016
- C
940
+ if row. len ( ) > 0 {
941
+ rows. push ( row. clone ( ) ) ;
942
+ row. clear ( ) ;
1017
943
}
1018
- } ;
1019
-
1020
- * cont
1021
- } ;
1022
-
1023
- ss. char_indices ( ) . all ( |x| machine ( & mut cont, & mut state, x) ) ;
1024
-
1025
- // Let the automaton 'run out' by supplying trailing whitespace
1026
- while cont && match state { B | C => true , A => false } {
1027
- machine ( & mut cont, & mut state, ( fake_i, ' ' ) ) ;
1028
- fake_i += 1 ;
944
+ row. push_str ( word) ;
945
+ }
946
+ if row. len ( ) > 0 {
947
+ rows. push ( row) ;
948
+ }
1029
949
}
1030
- return cont ;
950
+ rows
1031
951
}
1032
952
1033
953
#[ test]
1034
954
fn test_split_within ( ) {
1035
955
fn t ( s : & str , i : usize , u : & [ String ] ) {
1036
- let mut v = Vec :: new ( ) ;
1037
- each_split_within ( s, i, |s| { v. push ( s. to_string ( ) ) ; true } ) ;
956
+ let v = each_split_within ( & ( s. to_string ( ) ) , i) ;
1038
957
assert ! ( v. iter( ) . zip( u. iter( ) ) . all( |( a, b) | a == b) ) ;
1039
958
}
1040
959
t ( "" , 0 , & [ ] ) ;
@@ -1046,7 +965,9 @@ fn test_split_within() {
1046
965
"Little lamb" . to_string ( )
1047
966
] ) ;
1048
967
t ( "\n Mary had a little lamb\n Little lamb\n " , :: std:: usize:: MAX ,
1049
- & [ "Mary had a little lamb\n Little lamb" . to_string ( ) ] ) ;
968
+ & [ "Mary had a little lamb" . to_string ( ) ,
969
+ "Little lamb" . to_string ( )
970
+ ] ) ;
1050
971
}
1051
972
1052
973
#[ cfg( test) ]
@@ -1827,6 +1748,69 @@ Options:
1827
1748
assert ! ( usage == expected)
1828
1749
}
1829
1750
1751
+ #[ test]
1752
+ fn test_usage_description_newline_handling ( ) {
1753
+ let mut opts = Options :: new ( ) ;
1754
+ opts. optflag ( "k" , "k\u{2013} w\u{2013} " ,
1755
+ "The word kiwi is normally spelled with two i's" ) ;
1756
+ opts. optflag ( "a" , "apple" ,
1757
+ "This description forces a new line.\n Here is a premature\n \
1758
+ newline") ;
1759
+
1760
+ let expected =
1761
+ "Usage: fruits
1762
+
1763
+ Options:
1764
+ -k, --k–w– The word kiwi is normally spelled with two i's
1765
+ -a, --apple This description forces a new line.
1766
+ Here is a premature
1767
+ newline
1768
+ " ;
1769
+
1770
+ let usage = opts. usage ( "Usage: fruits" ) ;
1771
+
1772
+ debug ! ( "expected: <<{}>>" , expected) ;
1773
+ debug ! ( "generated: <<{}>>" , usage) ;
1774
+ assert ! ( usage == expected)
1775
+ }
1776
+
1777
+ #[ test]
1778
+ fn test_usage_multiwidth ( ) {
1779
+ let mut opts = Options :: new ( ) ;
1780
+ opts. optflag ( "a" , "apple" , "apple description" ) ;
1781
+ opts. optflag ( "b" , "banana\u{00AB} " , "banana description" ) ;
1782
+ opts. optflag ( "c" , "brûlée" , "brûlée quite long description" ) ;
1783
+ opts. optflag ( "k" , "kiwi\u{20AC} " , "kiwi description" ) ;
1784
+ opts. optflag ( "o" , "orange\u{2039} " , "orange description" ) ;
1785
+ opts. optflag ( "r" , "raspberry-but-making-this-option-way-too-long" ,
1786
+ "raspberry description is also quite long indeed longer than \
1787
+ every other piece of text we might encounter here and thus will \
1788
+ be automatically broken up"
1789
+ ) ;
1790
+
1791
+ let expected =
1792
+ "Usage: fruits
1793
+
1794
+ Options:
1795
+ -a, --apple apple description
1796
+ -b, --banana« banana description
1797
+ -c, --brûlée brûlée quite long description
1798
+ -k, --kiwi€ kiwi description
1799
+ -o, --orange‹ orange description
1800
+ -r, --raspberry-but-making-this-option-way-too-long
1801
+ raspberry description is also quite long indeed longer
1802
+ than every other piece of text we might encounter here
1803
+ and thus will be automatically broken up
1804
+ " ;
1805
+
1806
+ let usage = opts. usage ( "Usage: fruits" ) ;
1807
+
1808
+ debug ! ( "expected: <<{}>>" , expected) ;
1809
+ debug ! ( "generated: <<{}>>" , usage) ;
1810
+ assert ! ( usage == expected)
1811
+ }
1812
+
1813
+
1830
1814
#[ test]
1831
1815
fn test_usage_short_only ( ) {
1832
1816
let mut opts = Options :: new ( ) ;
0 commit comments