Skip to content

Commit 3aa2ea7

Browse files
committed
8301971: Make JDK source code UTF-8
8338973: Document need to have UTF-8 locale available to build the JDK Reviewed-by: erikj, naoto, mbaesken
1 parent 74e981e commit 3aa2ea7

File tree

13 files changed

+72
-128
lines changed

13 files changed

+72
-128
lines changed

.editorconfig

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,8 @@
11
root = true
22

3+
[*]
4+
charset = utf-8
5+
36
[*.{cpp,hpp,c,h,java,cc,hh,m,mm,S,md,properties,gmk,m4,ac}]
47
trim_trailing_whitespace = true
58

.gitattributes

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
* -text
2+
* encoding=utf-8
23
*.java diff=java
34
*.c diff=cpp
45
*.h diff=cpp

doc/building.html

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -305,6 +305,14 @@ <h3 id="special-considerations">Special Considerations</h3>
305305
<li><p>If using <a href="#cygwin">Cygwin</a>, you must make sure the
306306
file permissions and attributes between Windows and Cygwin are
307307
consistent. It is recommended that you follow this procedure:</p>
308+
<li><p>UTF-8 support is needed to compile the JDK. On Unix systems, this
309+
typically means that the <code>C.UTF-8</code> or
310+
<code>en_US.UTF-8</code> locale needs to be available. For Windows
311+
users, please see the section on <a href="#locale-requirements">Locale
312+
Requirements</a> below.</p></li>
313+
<li><p>On Windows, if using <a href="#cygwin">Cygwin</a>, extra care
314+
must be taken to make sure the environment is consistent. It is
315+
recommended that you follow this procedure:</p>
308316
<ul>
309317
<li><p>Create the directory that is going to contain the top directory
310318
of the JDK clone by using the <code>mkdir</code> command in the Cygwin

doc/building.md

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -83,6 +83,11 @@ on where and how to check out the source code.
8383
for the source code, see below for suggestions on how to keep the build
8484
artifacts on a local disk.
8585

86+
* UTF-8 support is needed to compile the JDK. On Unix systems, this typically
87+
means that the `C.UTF-8` or `en_US.UTF-8` locale needs to be available. For
88+
Windows users, please see the section on [Locale
89+
Requirements](#locale-requirements) below.
90+
8691
* On Windows, extra care must be taken to have a smooth building experience:
8792

8893
* Make sure that all relevant paths have short names. Short names are used by

make/Docs.gmk

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -96,14 +96,14 @@ JAVADOC_DISABLED_DOCLINT_PACKAGES := org.w3c.* javax.smartcardio
9696

9797
# The initial set of options for javadoc
9898
JAVADOC_OPTIONS := -use -keywords -notimestamp \
99-
-serialwarn -encoding ISO-8859-1 -docencoding UTF-8 -breakiterator \
99+
-serialwarn -encoding utf-8 -docencoding utf-8 -breakiterator \
100100
-splitIndex --system none -javafx --expand-requires transitive \
101101
--override-methods=summary --syntax-highlight
102102

103103
# The reference options must stay stable to allow for comparisons across the
104104
# development cycle.
105105
REFERENCE_OPTIONS := -XDignore.symbol.file=true -use -keywords -notimestamp \
106-
-serialwarn -encoding ISO-8859-1 -breakiterator -splitIndex --system none \
106+
-serialwarn -encoding utf-8 -breakiterator -splitIndex --system none \
107107
-html5 -javafx --expand-requires transitive
108108

109109
# Should we add DRAFT stamps to the generated javadoc?

make/autoconf/basic.m4

Lines changed: 20 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -134,17 +134,33 @@ AC_DEFUN_ONCE([BASIC_SETUP_BUILD_ENV],
134134
)
135135
AC_SUBST(BUILD_ENV)
136136
137+
AC_MSG_CHECKING([for locale to use])
137138
if test "x$LOCALE" != x; then
138139
# Check if we actually have C.UTF-8; if so, use it
139140
if $LOCALE -a | $GREP -q -E "^C\.(utf8|UTF-8)$"; then
140141
LOCALE_USED=C.UTF-8
142+
AC_MSG_RESULT([C.UTF-8 (recommended)])
143+
elif $LOCALE -a | $GREP -q -E "^en_US\.(utf8|UTF-8)$"; then
144+
LOCALE_USED=en_US.UTF-8
145+
AC_MSG_RESULT([en_US.UTF-8 (acceptable fallback)])
141146
else
142-
AC_MSG_WARN([C.UTF-8 locale not found, using C locale])
143-
LOCALE_USED=C
147+
# As a fallback, check if users locale is UTF-8. USER_LOCALE was saved
148+
# by the wrapper configure script before autconf messed up LC_ALL.
149+
if $ECHO $USER_LOCALE | $GREP -q -E "\.(utf8|UTF-8)$"; then
150+
LOCALE_USED=$USER_LOCALE
151+
AC_MSG_RESULT([$USER_LOCALE (untested fallback)])
152+
AC_MSG_WARN([Could not find C.UTF-8 or en_US.UTF-8 locale. This is not supported, and the build might fail unexpectedly.])
153+
else
154+
AC_MSG_RESULT([no UTF-8 locale found])
155+
AC_MSG_WARN([No UTF-8 locale found. This is not supported. Proceeding with the C locale, but the build might fail unexpectedly.])
156+
LOCALE_USED=C
157+
fi
158+
AC_MSG_NOTICE([The recommended locale is C.UTF-8, but en_US.UTF-8 is also accepted.])
144159
fi
145160
else
146-
AC_MSG_WARN([locale command not not found, using C locale])
147-
LOCALE_USED=C
161+
LOCALE_USED=C.UTF-8
162+
AC_MSG_RESULT([C.UTF-8 (default)])
163+
AC_MSG_WARN([locale command not not found, using C.UTF-8 locale])
148164
fi
149165
150166
export LC_ALL=$LOCALE_USED

make/autoconf/configure

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -49,7 +49,9 @@ fi
4949
export CONFIG_SHELL=$BASH
5050
export _as_can_reexec=no
5151

52-
# Make sure all shell commands are executed with the C locale
52+
# Save user's current locale, but make sure all future shell commands are
53+
# executed with the C locale
54+
export USER_LOCALE=$LC_ALL
5355
export LC_ALL=C
5456

5557
if test "x$CUSTOM_CONFIG_DIR" != x; then

make/autoconf/flags-cflags.m4

Lines changed: 12 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -573,12 +573,20 @@ AC_DEFUN([FLAGS_SETUP_CFLAGS_HELPER],
573573
TOOLCHAIN_CFLAGS_JDK="$TOOLCHAIN_CFLAGS_JDK -fvisibility=hidden -fstack-protector"
574574
575575
elif test "x$TOOLCHAIN_TYPE" = xmicrosoft; then
576-
# The -utf-8 option sets source and execution character sets to UTF-8 to enable correct
577-
# compilation of all source files regardless of the active code page on Windows.
578-
TOOLCHAIN_CFLAGS_JVM="-nologo -MD -Zc:preprocessor -Zc:inline -Zc:throwingNew -permissive- -utf-8 -MP"
579-
TOOLCHAIN_CFLAGS_JDK="-nologo -MD -Zc:preprocessor -Zc:inline -Zc:throwingNew -permissive- -utf-8 -Zc:wchar_t-"
576+
TOOLCHAIN_CFLAGS_JVM="-nologo -MD -Zc:preprocessor -Zc:inline -Zc:throwingNew -permissive- -MP"
577+
TOOLCHAIN_CFLAGS_JDK="-nologo -MD -Zc:preprocessor -Zc:inline -Zc:throwingNew -permissive- -Zc:wchar_t-"
580578
fi
581579
580+
# Set character encoding in source
581+
if test "x$TOOLCHAIN_TYPE" = xgcc || test "x$TOOLCHAIN_TYPE" = xclang; then
582+
CHARSET_CFLAGS="-finput-charset=utf-8"
583+
elif test "x$TOOLCHAIN_TYPE" = xmicrosoft; then
584+
# The -utf-8 option sets both source and execution character sets
585+
CHARSET_CFLAGS="-utf-8 -validate-charset"
586+
fi
587+
TOOLCHAIN_CFLAGS_JVM="$TOOLCHAIN_CFLAGS_JVM $CHARSET_CFLAGS"
588+
TOOLCHAIN_CFLAGS_JDK="$TOOLCHAIN_CFLAGS_JDK $CHARSET_CFLAGS"
589+
582590
# CFLAGS C language level for JDK sources (hotspot only uses C++)
583591
if test "x$TOOLCHAIN_TYPE" = xgcc || test "x$TOOLCHAIN_TYPE" = xclang; then
584592
LANGSTD_CFLAGS="-std=c11"

make/common/JavaCompilation.gmk

Lines changed: 9 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -80,15 +80,13 @@ endef
8080
#
8181
# The sed expression does this:
8282
# 1. Add a backslash before any :, = or ! that do not have a backslash already.
83-
# 2. Apply the file unicode2x.sed which does a whole bunch of \u00XX to \xXX
84-
# conversions.
85-
# 3. Delete all lines starting with #.
86-
# 4. Delete empty lines.
87-
# 5. Append lines ending with \ with the next line.
88-
# 6. Remove leading and trailing white space. Note that tabs must be explicit
83+
# 2. Delete all lines starting with #.
84+
# 3. Delete empty lines.
85+
# 4. Append lines ending with \ with the next line.
86+
# 5. Remove leading and trailing white space. Note that tabs must be explicit
8987
# as sed on macosx does not understand '\t'.
90-
# 7. Replace the first \= with just =.
91-
# 8. Finally it's all sorted to create a stable output.
88+
# 6. Replace the first \= with just =.
89+
# 7. Finally it's all sorted to create a stable output.
9290
#
9391
# It is assumed that = is the character used for separating names and values.
9492
define add_file_to_clean
@@ -108,7 +106,6 @@ define add_file_to_clean
108106
( $(CAT) $$< && $(ECHO) "" ) \
109107
| $(SED) -e 's/\([^\\]\):/\1\\:/g' -e 's/\([^\\]\)=/\1\\=/g' \
110108
-e 's/\([^\\]\)!/\1\\!/g' -e 's/^[ ]*#.*/#/g' \
111-
| $(SED) -f "$$(TOPDIR)/make/common/support/unicode2x.sed" \
112109
| $(SED) -e '/^#/d' -e '/^$$$$/d' \
113110
-e :a -e '/\\$$$$/N; s/\\\n//; ta' \
114111
-e 's/^[ ]*//;s/[ ]*$$$$//' \
@@ -265,10 +262,12 @@ define SetupJavaCompilationBody
265262
endif
266263

267264
# Tell javac to do exactly as told and no more
268-
PARANOIA_FLAGS := -implicit:none -Xprefer:source -XDignore.symbol.file=true -encoding ascii
265+
PARANOIA_FLAGS := -implicit:none -Xprefer:source -XDignore.symbol.file=true
269266

270267
$1_FLAGS += -g -Xlint:all $$($1_TARGET_RELEASE) $$(PARANOIA_FLAGS)
271268
$1_FLAGS += $$($1_JAVAC_FLAGS)
269+
# Set character encoding in source
270+
$1_FLAGS += -encoding utf-8
272271

273272
ifeq ($$(JAVA_WARNINGS_AS_ERRORS), true)
274273
$1_FLAGS += -Werror

make/common/JdkNativeCompilation.gmk

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -227,6 +227,8 @@ endef
227227

228228
GLOBAL_VERSION_INFO_RESOURCE := $(TOPDIR)/src/java.base/windows/native/common/version.rc
229229

230+
# \xA9 is the copyright symbol in ANSI encoding (Windows-1252), which rc.exe
231+
# assumes the resource file is in.
230232
JDK_RCFLAGS=$(RCFLAGS) \
231233
-D"JDK_VERSION_STRING=$(VERSION_STRING)" \
232234
-D"JDK_COMPANY=$(JDK_RC_COMPANY_NAME)" \

make/common/support/unicode2x.sed

Lines changed: 0 additions & 100 deletions
This file was deleted.

src/java.base/unix/native/libjava/locale_str.h

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -87,7 +87,7 @@
8787
"zh", "zh_CN",
8888
#ifdef __linux__
8989
"bokmal", "nb_NO",
90-
"bokm\xE5l", "nb_NO",
90+
"bokmål", "nb_NO",
9191
"catalan", "ca_ES",
9292
"croatian", "hr_HR",
9393
"czech", "cs_CZ",
@@ -98,7 +98,7 @@
9898
"eesti", "et_EE",
9999
"estonian", "et_EE",
100100
"finnish", "fi_FI",
101-
"fran\xE7\x61is", "fr_FR",
101+
"français", "fr_FR",
102102
"french", "fr_FR",
103103
"galego", "gl_ES",
104104
"galician", "gl_ES",
@@ -162,7 +162,7 @@ static char *language_names[] = {
162162
"deutsch", "de",
163163
"dutch", "nl",
164164
"finnish", "fi",
165-
"fran\xE7\x61is", "fr",
165+
"français", "fr",
166166
"french", "fr",
167167
"german", "de",
168168
"greek", "el",

src/java.base/windows/native/libjava/HostLocaleProviderAdapter_md.c

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -134,7 +134,7 @@ WCHAR * fixes[2][2][3][16] =
134134
L"", L"", L"", L"", L"", L"", L"", L"", L"", L"", L"", L"", L"", L"", L"", L"",
135135
},
136136
{ // currency
137-
L"\xA4", L"", L"\xA4 ", L"", L"", L"", L"", L"", L"", L"", L"", L"", L"", L"", L"", L"",
137+
L"¤", L"", L"¤ ", L"", L"", L"", L"", L"", L"", L"", L"", L"", L"", L"", L"", L"",
138138
},
139139
{ // percent
140140
L"", L"", L"%", L"% ", L"", L"", L"", L"", L"", L"", L"", L"", L"", L"", L"", L"",
@@ -145,7 +145,7 @@ WCHAR * fixes[2][2][3][16] =
145145
L"(", L"-", L"- ", L"", L"", L"", L"", L"", L"", L"", L"", L"", L"", L"", L"", L"",
146146
},
147147
{ //currency
148-
L"(\xA4", L"-\xA4", L"\xA4-", L"\xA4", L"(", L"-", L"", L"", L"-", L"-\xA4 ", L"", L"\xA4 ", L"\xA4 -", L"", L"(\xA4 ", L"("
148+
L"(¤", L"-¤", L"¤-", L"¤", L"(", L"-", L"", L"", L"-", L"-¤ ", L"", L"¤ ", L"¤ -", L"", L"(¤ ", L"("
149149
},
150150
{ // percent
151151
L"-", L"-", L"-%", L"%-", L"%", L"", L"", L"-% ", L"", L"% ", L"% -", L"", L"", L"", L"", L"",
@@ -158,7 +158,7 @@ WCHAR * fixes[2][2][3][16] =
158158
L"", L"", L"", L"", L"", L"", L"", L"", L"", L"", L"", L"", L"", L"", L"", L""
159159
},
160160
{ // currency
161-
L"", L"\xA4 ", L"", L" \xA4", L"", L"", L"", L"", L"", L"", L"", L"", L"", L"", L"", L"",
161+
L"", L"¤ ", L"", L" ¤", L"", L"", L"", L"", L"", L"", L"", L"", L"", L"", L"", L"",
162162
},
163163
{ // percent
164164
L" %", L"%", L"", L"", L"", L"", L"", L"", L"", L"", L"", L"", L"", L"", L"", L"",
@@ -169,7 +169,7 @@ WCHAR * fixes[2][2][3][16] =
169169
L")", L"", L" ", L"-", L" -", L"", L"", L"", L"", L"", L"", L"", L"", L"", L"", L"",
170170
},
171171
{ //currency
172-
L")", L"", L"", L"-", L"\xA4)", L"\xA4", L"-\xA4", L"\xA4-", L" \xA4", L"", L" \xA4-", L"-", L"", L"- \xA4", L")", L" \xA4)"
172+
L")", L"", L"", L"-", L"¤)", L"¤", L"-¤", L"¤-", L" ¤", L"", L" ¤-", L"-", L"", L"- ¤", L")", L" ¤)"
173173
},
174174
{ // percent
175175
L" %", L"%", L"", L"", L"-", L"-%", L"%-", L"", L" %-", L"-", L"", L"- %", L"", L"", L"", L"",

0 commit comments

Comments
 (0)