@@ -215,7 +215,7 @@ typedef enum {
215215 * following header file: */
216216# include "utfebcdic.h"
217217
218- # else /* ! EBCDIC */
218+ # else /* ! EBCDIC */
219219
220220START_EXTERN_C
221221
@@ -235,11 +235,11 @@ EXTCONST unsigned char PL_utf8skip[] = {
235235/* 0x90 */ 1 ,1 ,1 ,1 ,1 ,1 ,1 ,1 ,1 ,1 ,1 ,1 ,1 ,1 ,1 ,1 , /* bogus: continuation byte */
236236/* 0xA0 */ 1 ,1 ,1 ,1 ,1 ,1 ,1 ,1 ,1 ,1 ,1 ,1 ,1 ,1 ,1 ,1 , /* bogus: continuation byte */
237237/* 0xB0 */ 1 ,1 ,1 ,1 ,1 ,1 ,1 ,1 ,1 ,1 ,1 ,1 ,1 ,1 ,1 ,1 , /* bogus: continuation byte */
238- /* 0xC0 */ 2 ,2 , /* overlong */
238+ /* 0xC0 */ 2 ,2 , /* overlong */
239239/* 0xC2 */ 2 ,2 ,2 ,2 ,2 ,2 ,2 ,2 ,2 ,2 ,2 ,2 ,2 ,2 , /* U+0080 to U+03FF */
240240/* 0xD0 */ 2 ,2 ,2 ,2 ,2 ,2 ,2 ,2 ,2 ,2 ,2 ,2 ,2 ,2 ,2 ,2 , /* U+0400 to U+07FF */
241241/* 0xE0 */ 3 ,3 ,3 ,3 ,3 ,3 ,3 ,3 ,3 ,3 ,3 ,3 ,3 ,3 ,3 ,3 , /* U+0800 to U+FFFF */
242- /* 0xF0 */ 4 ,4 ,4 ,4 ,4 ,4 ,4 ,4 ,5 ,5 ,5 ,5 ,6 ,6 , /* above BMP to 2**31 - 1 */
242+ /* 0xF0 */ 4 ,4 ,4 ,4 ,4 ,4 ,4 ,4 ,5 ,5 ,5 ,5 ,6 ,6 , /* above BMP to 2**31 - 1 */
243243 /* Perl extended (never was official UTF-8). Up to 36 bit */
244244/* 0xFE */ 7 ,
245245 /* More extended, Up to 72 bits (64-bit + reserved) */
@@ -314,21 +314,21 @@ adding no time nor space requirements to the implementation.
314314 The following table is from Unicode 3.2, plus the Perl extensions for above
315315 U+10FFFF
316316
317- Code Points 1st Byte 2nd Byte 3rd 4th 5th 6th 7th 8th-13th
317+ Code Points 1st Byte 2nd Byte 3rd 4th 5th 6th 7th 8th-13th
318318
319- U+0000..U+007F 00..7F
319+ U+0000..U+007F 00..7F
320320 U+0080..U+07FF * C2..DF 80..BF
321- U+0800..U+0FFF E0 * A0..BF 80..BF
321+ U+0800..U+0FFF E0 * A0..BF 80..BF
322322 U+1000..U+CFFF E1..EC 80..BF 80..BF
323323 U+D000..U+D7FF ED 80..9F 80..BF
324324 U+D800..U+DFFF ED A0..BF 80..BF (surrogates)
325325 U+E000..U+FFFF EE..EF 80..BF 80..BF
326- U+10000..U+3FFFF F0 * 90..BF 80..BF 80..BF
327- U+40000..U+FFFFF F1..F3 80..BF 80..BF 80..BF
328- U+100000..U+10FFFF F4 80..8F 80..BF 80..BF
326+ U+10000..U+3FFFF F0 * 90..BF 80..BF 80..BF
327+ U+40000..U+FFFFF F1..F3 80..BF 80..BF 80..BF
328+ U+100000..U+10FFFF F4 80..8F 80..BF 80..BF
329329 Below are above-Unicode code points
330- U+110000..U+13FFFF F4 90..BF 80..BF 80..BF
331- U+110000..U+1FFFFF F5..F7 80..BF 80..BF 80..BF
330+ U+110000..U+13FFFF F4 90..BF 80..BF 80..BF
331+ U+110000..U+1FFFFF F5..F7 80..BF 80..BF 80..BF
332332 U+200000..U+FFFFFF F8 * 88..BF 80..BF 80..BF 80..BF
333333U+1000000..U+3FFFFFF F9..FB 80..BF 80..BF 80..BF 80..BF
334334U+4000000..U+3FFFFFFF FC * 84..BF 80..BF 80..BF 80..BF 80..BF
@@ -670,7 +670,7 @@ encoded as UTF-8. C<cp> is a native (ASCII or EBCDIC) code point if less than
670670/* Is the UTF8-encoded byte 'c' the first byte of a two byte sequence? Use
671671 * UTF8_IS_NEXT_CHAR_DOWNGRADEABLE() instead if the input isn't known to
672672 * be well-formed. */
673- #define UTF8_IS_DOWNGRADEABLE_START (c ) (__ASSERT_(FITS_IN_8_BITS(c)) \
673+ #define UTF8_IS_DOWNGRADEABLE_START (c ) (__ASSERT_(FITS_IN_8_BITS(c)) \
674674 inRANGE_helper_(U8, NATIVE_UTF8_TO_I8(c), \
675675 UTF_MIN_START_BYTE, UTF_MIN_ABOVE_LATIN1_BYTE - 1))
676676
@@ -711,7 +711,7 @@ uppercase/lowercase/titlecase/fold into.
711711 *
712712=cut
713713*/
714- #define UTF8_MAXBYTES_CASE \
714+ #define UTF8_MAXBYTES_CASE \
715715 MAX(UTF8_MAXBYTES, UTF8_MAX_FOLD_CHAR_EXPAND * UNISKIP_BY_MSB_(20))
716716
717717/* Rest of these are attributes of Unicode and perl's internals rather than the
@@ -859,11 +859,11 @@ that it returns TRUE in each for the exact same set of bit patterns. It is
859859valid on a subset of what UVCHR_IS_INVARIANT is valid on, so can just use that;
860860and the compiler should optimize out anything extraneous given the
861861implementation of the latter. */
862- #define UTF8_IS_INVARIANT (c ) UVCHR_IS_INVARIANT(ASSERT_NOT_PTR(c))
862+ #define UTF8_IS_INVARIANT (c ) UVCHR_IS_INVARIANT(ASSERT_NOT_PTR(c))
863863
864864/* Like the above, but its name implies a non-UTF8 input, which as the comments
865865 * above show, doesn't matter as to its implementation */
866- #define NATIVE_BYTE_IS_INVARIANT (c ) UVCHR_IS_INVARIANT(c)
866+ #define NATIVE_BYTE_IS_INVARIANT (c ) UVCHR_IS_INVARIANT(c)
867867
868868/* Misleadingly named: is the UTF8-encoded byte 'c' part of a variant sequence
869869 * in UTF-8? This is the inverse of UTF8_IS_INVARIANT. */
@@ -967,8 +967,8 @@ case any call to string overloading updates the internal UTF-8 encoding flag.
967967 && is_in_locale_category_ (FALSE, -1 ))) \
968968 && (! IN_BYTES ))
969969
970- #define UNICODE_SURROGATE_FIRST 0xD800
971- #define UNICODE_SURROGATE_LAST 0xDFFF
970+ #define UNICODE_SURROGATE_FIRST 0xD800
971+ #define UNICODE_SURROGATE_LAST 0xDFFF
972972
973973/*
974974=for apidoc Am|bool|UNICODE_IS_SURROGATE|const UV uv
@@ -1012,7 +1012,7 @@ representation.
10121012
10131013=cut
10141014 */
1015- #define UNICODE_REPLACEMENT 0xFFFD
1015+ #define UNICODE_REPLACEMENT 0xFFFD
10161016#define UNICODE_IS_REPLACEMENT (uv ) UNLIKELY((UV) (uv) == UNICODE_REPLACEMENT)
10171017#define UTF8_IS_REPLACEMENT (s , send ) \
10181018 UNLIKELY( \
@@ -1021,7 +1021,7 @@ representation.
10211021 sizeof(REPLACEMENT_CHARACTER_UTF8) - 1))
10221022
10231023/* Max legal code point according to Unicode */
1024- #define PERL_UNICODE_MAX 0x10FFFF
1024+ #define PERL_UNICODE_MAX 0x10FFFF
10251025
10261026/*
10271027
@@ -1057,10 +1057,10 @@ this macro matches
10571057
10581058=cut
10591059
1060- * ASCII EBCDIC I8
1061- * U+10FFFF: \xF4\x8F\xBF\xBF \xF9\xA1\xBF\xBF\xBF max legal Unicode
1062- * U+110000: \xF4\x90\x80\x80 \xF9\xA2\xA0\xA0\xA0
1063- * U+110001: \xF4\x90\x80\x81 \xF9\xA2\xA0\xA0\xA1
1060+ * ASCII EBCDIC I8
1061+ * U+10FFFF: \xF4\x8F\xBF\xBF \xF9\xA1\xBF\xBF\xBF max legal Unicode
1062+ * U+110000: \xF4\x90\x80\x80 \xF9\xA2\xA0\xA0\xA0
1063+ * U+110001: \xF4\x90\x80\x81 \xF9\xA2\xA0\xA0\xA1
10641064 */
10651065#define UTF_START_BYTE_110000_ UTF_START_BYTE(PERL_UNICODE_MAX + 1, 21)
10661066#define UTF_FIRST_CONT_BYTE_110000_ \
@@ -1255,10 +1255,10 @@ point's representation.
12551255#define UTF8_ALLOW_ANYUV 0
12561256#define UTF8_ALLOW_DEFAULT UTF8_ALLOW_ANYUV
12571257
1258- #define UNICODE_WARN_SURROGATE 0x0001 /* UTF-16 surrogates */
1259- #define UNICODE_WARN_NONCHAR 0x0002 /* Non-char code points */
1260- #define UNICODE_WARN_SUPER 0x0004 /* Above 0x10FFFF */
1261- #define UNICODE_WARN_PERL_EXTENDED 0x0008 /* Above 0x7FFF_FFFF */
1258+ #define UNICODE_WARN_SURROGATE 0x0001 /* UTF-16 surrogates */
1259+ #define UNICODE_WARN_NONCHAR 0x0002 /* Non-char code points */
1260+ #define UNICODE_WARN_SUPER 0x0004 /* Above 0x10FFFF */
1261+ #define UNICODE_WARN_PERL_EXTENDED 0x0008 /* Above 0x7FFF_FFFF */
12621262#define UNICODE_WARN_ABOVE_31_BIT UNICODE_WARN_PERL_EXTENDED
12631263#define UNICODE_DISALLOW_SURROGATE 0x0010
12641264#define UNICODE_DISALLOW_NONCHAR 0x0020
@@ -1286,11 +1286,11 @@ point's representation.
12861286
12871287/* For backward source compatibility, as are now the default */
12881288#define UNICODE_ALLOW_SURROGATE 0
1289- #define UNICODE_ALLOW_SUPER 0
1290- #define UNICODE_ALLOW_ANY 0
1289+ #define UNICODE_ALLOW_SUPER 0
1290+ #define UNICODE_ALLOW_ANY 0
12911291
1292- #define UNICODE_BYTE_ORDER_MARK 0xFEFF
1293- #define UNICODE_IS_BYTE_ORDER_MARK (uv ) UNLIKELY((UV) (uv) \
1292+ #define UNICODE_BYTE_ORDER_MARK 0xFEFF
1293+ #define UNICODE_IS_BYTE_ORDER_MARK (uv ) UNLIKELY((UV) (uv) \
12941294 == UNICODE_BYTE_ORDER_MARK)
12951295
12961296#define LATIN_SMALL_LETTER_SHARP_S LATIN_SMALL_LETTER_SHARP_S_NATIVE
@@ -1301,15 +1301,15 @@ point's representation.
13011301 LATIN_CAPITAL_LETTER_A_WITH_RING_ABOVE_NATIVE
13021302#define LATIN_SMALL_LETTER_A_WITH_RING_ABOVE \
13031303 LATIN_SMALL_LETTER_A_WITH_RING_ABOVE_NATIVE
1304- #define UNICODE_GREEK_CAPITAL_LETTER_SIGMA 0x03A3
1305- #define UNICODE_GREEK_SMALL_LETTER_FINAL_SIGMA 0x03C2
1306- #define UNICODE_GREEK_SMALL_LETTER_SIGMA 0x03C3
1304+ #define UNICODE_GREEK_CAPITAL_LETTER_SIGMA 0x03A3
1305+ #define UNICODE_GREEK_SMALL_LETTER_FINAL_SIGMA 0x03C2
1306+ #define UNICODE_GREEK_SMALL_LETTER_SIGMA 0x03C3
13071307#define GREEK_SMALL_LETTER_MU 0x03BC
1308- #define GREEK_CAPITAL_LETTER_MU 0x039C /* Upper and title case
1308+ #define GREEK_CAPITAL_LETTER_MU 0x039C /* Upper and title case
13091309 of MICRON */
1310- #define LATIN_CAPITAL_LETTER_Y_WITH_DIAERESIS 0x0178 /* Also is title case */
1310+ #define LATIN_CAPITAL_LETTER_Y_WITH_DIAERESIS 0x0178 /* Also is title case */
13111311#ifdef LATIN_CAPITAL_LETTER_SHARP_S_UTF8
1312- # define LATIN_CAPITAL_LETTER_SHARP_S 0x1E9E
1312+ # define LATIN_CAPITAL_LETTER_SHARP_S 0x1E9E
13131313#endif
13141314#define LATIN_CAPITAL_LETTER_I_WITH_DOT_ABOVE 0x130
13151315#define LATIN_SMALL_LETTER_DOTLESS_I 0x131
@@ -1319,16 +1319,16 @@ point's representation.
13191319#define KELVIN_SIGN 0x212A
13201320#define ANGSTROM_SIGN 0x212B
13211321
1322- #define UNI_DISPLAY_ISPRINT 0x0001
1323- #define UNI_DISPLAY_BACKSLASH 0x0002
1324- #define UNI_DISPLAY_BACKSPACE 0x0004 /* Allow \b when also
1322+ #define UNI_DISPLAY_ISPRINT 0x0001
1323+ #define UNI_DISPLAY_BACKSLASH 0x0002
1324+ #define UNI_DISPLAY_BACKSPACE 0x0004 /* Allow \b when also
13251325 UNI_DISPLAY_BACKSLASH */
1326- #define UNI_DISPLAY_QQ (UNI_DISPLAY_ISPRINT \
1326+ #define UNI_DISPLAY_QQ (UNI_DISPLAY_ISPRINT \
13271327 |UNI_DISPLAY_BACKSLASH \
13281328 |UNI_DISPLAY_BACKSPACE)
13291329
13301330/* Character classes could also allow \b, but not patterns in general */
1331- #define UNI_DISPLAY_REGEX (UNI_DISPLAY_ISPRINT|UNI_DISPLAY_BACKSLASH)
1331+ #define UNI_DISPLAY_REGEX (UNI_DISPLAY_ISPRINT|UNI_DISPLAY_BACKSLASH)
13321332
13331333/* Should be removed; maybe deprecated, but not used in CPAN */
13341334#define SHARP_S_SKIP 2
0 commit comments