Skip to content
This repository was archived by the owner on Jun 24, 2022. It is now read-only.

Commit de2f259

Browse files
JavaScript identifier grammar supports unescaped astral symbols, but JSC doesn’t
https://bugs.webkit.org/show_bug.cgi?id=208998 Reviewed by Michael Saboff. JSTests: * stress/unicode-identifiers-with-surrogate-pairs.js: Added. (let.c.of.chars.eval.foo): (throwsSyntaxError): (let.c.of.continueChars.throwsSyntaxError.foo): Source/JavaScriptCore: This patch fixes a bug in the parser that allows for surrogate pairs when parsing identifiers. It also makes a few other changes to the parser: 1) When looking for keywords we just need to check that subsequent character cannot be a identifier part or an escape start. 2) The only time we call parseIdentifierSlowCase is when we hit an escape start or a surrogate pair so we can optimize that to just copy everything up slow character into our buffer. 3) We shouldn't allow for asking if a UChar is an identifier start/part. * KeywordLookupGenerator.py: (Trie.printSubTreeAsC): (Trie.printAsC): * parser/Lexer.cpp: (JSC::isNonLatin1IdentStart): (JSC::isIdentStart): (JSC::isSingleCharacterIdentStart): (JSC::cannotBeIdentStart): (JSC::isIdentPart): (JSC::isSingleCharacterIdentPart): (JSC::cannotBeIdentPartOrEscapeStart): (JSC::Lexer<LChar>::currentCodePoint const): (JSC::Lexer<UChar>::currentCodePoint const): (JSC::Lexer<LChar>::parseIdentifier): (JSC::Lexer<UChar>::parseIdentifier): (JSC::Lexer<CharacterType>::parseIdentifierSlowCase): (JSC::Lexer<T>::lexWithoutClearingLineTerminator): (JSC::Lexer<T>::scanRegExp): (JSC::isIdentPartIncludingEscapeTemplate): Deleted. (JSC::isIdentPartIncludingEscape): Deleted. * parser/Lexer.h: (JSC::Lexer::setOffsetFromSourcePtr): Deleted. * parser/Parser.cpp: (JSC::Parser<LexerType>::printUnexpectedTokenText): * parser/ParserTokens.h: Source/WTF: * wtf/text/WTFString.cpp: (WTF::String::fromCodePoint): * wtf/text/WTFString.h: LayoutTests: Fix broken test that asserted a non-ID_START codepoint was a start codepoint and an ID_START codepoint was not a valid codepoint... * js/script-tests/unicode-escape-sequences.js: * js/unicode-escape-sequences-expected.txt: git-svn-id: http://svn.webkit.org/repository/webkit/trunk@258531 268f45cc-cd09-0410-ab3c-d52691b4dbfc
1 parent 42cdc42 commit de2f259

File tree

14 files changed

+349
-172
lines changed

14 files changed

+349
-172
lines changed

JSTests/ChangeLog

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,15 @@
1+
2020-03-16 Keith Miller <[email protected]>
2+
3+
JavaScript identifier grammar supports unescaped astral symbols, but JSC doesn’t
4+
https://bugs.webkit.org/show_bug.cgi?id=208998
5+
6+
Reviewed by Michael Saboff.
7+
8+
* stress/unicode-identifiers-with-surrogate-pairs.js: Added.
9+
(let.c.of.chars.eval.foo):
10+
(throwsSyntaxError):
11+
(let.c.of.continueChars.throwsSyntaxError.foo):
12+
113
2020-03-13 Saam Barati <[email protected]>
214

315
skip wasm/function-tests/grow-memory-cause-gc.js on memory limited devices
Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,30 @@
1+
2+
let chars = ["鴬", "𐊧", "Ϊ"];
3+
let continueChars = [unescape("\u0311"), String.fromCharCode(...[0xDB40, 0xDD96])];
4+
5+
let o = { };
6+
for (let c of chars) {
7+
eval(`var ${c};`);
8+
eval(`function foo() { var ${c} }`);
9+
eval(`o.${c}`);
10+
}
11+
12+
function throwsSyntaxError(string) {
13+
try {
14+
eval(string);
15+
} catch (e) {
16+
if (!(e instanceof SyntaxError))
17+
throw new Error(string);
18+
return;
19+
}
20+
throw new Error(string);
21+
}
22+
23+
for (let c of continueChars) {
24+
throwsSyntaxError(`var ${c}`);
25+
throwsSyntaxError(`function foo() { var ${c} }`);
26+
throwsSyntaxError(`o.${c}`);
27+
eval(`var ${("a" + c)}`);
28+
eval(`o.${"a" + c}`);
29+
30+
}

LayoutTests/ChangeLog

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,16 @@
1+
2020-03-16 Keith Miller <[email protected]>
2+
3+
JavaScript identifier grammar supports unescaped astral symbols, but JSC doesn’t
4+
https://bugs.webkit.org/show_bug.cgi?id=208998
5+
6+
Reviewed by Michael Saboff.
7+
8+
Fix broken test that asserted a non-ID_START codepoint was a start codepoint and
9+
an ID_START codepoint was not a valid codepoint...
10+
11+
* js/script-tests/unicode-escape-sequences.js:
12+
* js/unicode-escape-sequences-expected.txt:
13+
114
2020-03-16 Jason Lawrence <[email protected]>
215

316
[ Mac wk1 ] http/tests/security/clipboard/copy-paste-html-cross-origin-iframe-across-origin.html is flaky failing.

LayoutTests/js/script-tests/unicode-escape-sequences.js

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -74,8 +74,8 @@ testInvalidStringUnicodeEscapeSequence("{100000000000000000000000}");
7474
testIdentifierStartUnicodeEscapeSequence("{41}", "0041");
7575
testIdentifierStartUnicodeEscapeSequence("{102C0}", "D800,DEC0");
7676
testIdentifierStartUnicodeEscapeSequence("{102c0}", "D800,DEC0");
77-
testIdentifierStartUnicodeEscapeSequence("{1D306}", "D834,DF06");
78-
testIdentifierStartUnicodeEscapeSequence("{1d306}", "D834,DF06");
77+
testIdentifierStartUnicodeEscapeSequence("{10000}", "D800,DC00");
78+
testIdentifierStartUnicodeEscapeSequence("{10001}", "D800,DC01");
7979

8080
testInvalidIdentifierStartUnicodeEscapeSequence("");
8181
testInvalidIdentifierStartUnicodeEscapeSequence("{0}");
@@ -85,14 +85,14 @@ testInvalidIdentifierStartUnicodeEscapeSequence("{DC00}");
8585
testInvalidIdentifierStartUnicodeEscapeSequence("{dc00}");
8686
testInvalidIdentifierStartUnicodeEscapeSequence("{FFFF}");
8787
testInvalidIdentifierStartUnicodeEscapeSequence("{ffff}");
88-
testInvalidIdentifierStartUnicodeEscapeSequence("{10000}");
89-
testInvalidIdentifierStartUnicodeEscapeSequence("{10001}");
9088
testInvalidIdentifierStartUnicodeEscapeSequence("{10FFFE}");
9189
testInvalidIdentifierStartUnicodeEscapeSequence("{10fffe}");
9290
testInvalidIdentifierStartUnicodeEscapeSequence("{10FFFF}");
9391
testInvalidIdentifierStartUnicodeEscapeSequence("{10ffff}");
9492
testInvalidIdentifierStartUnicodeEscapeSequence("{00000000000000000000000010FFFF}");
9593
testInvalidIdentifierStartUnicodeEscapeSequence("{00000000000000000000000010ffff}");
94+
testInvalidIdentifierStartUnicodeEscapeSequence("{1D306}");
95+
testInvalidIdentifierStartUnicodeEscapeSequence("{1d306}");
9696

9797
testInvalidIdentifierStartUnicodeEscapeSequence("x");
9898
testInvalidIdentifierStartUnicodeEscapeSequence("{");

LayoutTests/js/unicode-escape-sequences-expected.txt

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -35,8 +35,8 @@ PASS codeUnits("\u{100000000000000000000000}") threw exception SyntaxError: \u c
3535
PASS codeUnits(function \u{41}(){}.name) is "0041"
3636
PASS codeUnits(function \u{102C0}(){}.name) is "D800,DEC0"
3737
PASS codeUnits(function \u{102c0}(){}.name) is "D800,DEC0"
38-
PASS codeUnits(function \u{1D306}(){}.name) is "D834,DF06"
39-
PASS codeUnits(function \u{1d306}(){}.name) is "D834,DF06"
38+
PASS codeUnits(function \u{10000}(){}.name) is "D800,DC00"
39+
PASS codeUnits(function \u{10001}(){}.name) is "D800,DC01"
4040
PASS codeUnits(function \u(){}.name) threw exception SyntaxError: Invalid unicode escape in identifier: '\u'.
4141
PASS codeUnits(function \u{0}(){}.name) threw exception SyntaxError: Invalid unicode escape in identifier: '\u{0}'.
4242
PASS codeUnits(function \u{D800}(){}.name) threw exception SyntaxError: Invalid unicode escape in identifier: '\u{D800}'.
@@ -45,14 +45,14 @@ PASS codeUnits(function \u{DC00}(){}.name) threw exception SyntaxError: Invalid
4545
PASS codeUnits(function \u{dc00}(){}.name) threw exception SyntaxError: Invalid unicode escape in identifier: '\u{dc00}'.
4646
PASS codeUnits(function \u{FFFF}(){}.name) threw exception SyntaxError: Invalid unicode escape in identifier: '\u{FFFF}'.
4747
PASS codeUnits(function \u{ffff}(){}.name) threw exception SyntaxError: Invalid unicode escape in identifier: '\u{ffff}'.
48-
PASS codeUnits(function \u{10000}(){}.name) threw exception SyntaxError: Invalid unicode escape in identifier: '\u{10000}'.
49-
PASS codeUnits(function \u{10001}(){}.name) threw exception SyntaxError: Invalid unicode escape in identifier: '\u{10001}'.
5048
PASS codeUnits(function \u{10FFFE}(){}.name) threw exception SyntaxError: Invalid unicode escape in identifier: '\u{10FFFE}'.
5149
PASS codeUnits(function \u{10fffe}(){}.name) threw exception SyntaxError: Invalid unicode escape in identifier: '\u{10fffe}'.
5250
PASS codeUnits(function \u{10FFFF}(){}.name) threw exception SyntaxError: Invalid unicode escape in identifier: '\u{10FFFF}'.
5351
PASS codeUnits(function \u{10ffff}(){}.name) threw exception SyntaxError: Invalid unicode escape in identifier: '\u{10ffff}'.
5452
PASS codeUnits(function \u{00000000000000000000000010FFFF}(){}.name) threw exception SyntaxError: Invalid unicode escape in identifier: '\u{00000000000000000000000010FFFF}'.
5553
PASS codeUnits(function \u{00000000000000000000000010ffff}(){}.name) threw exception SyntaxError: Invalid unicode escape in identifier: '\u{00000000000000000000000010ffff}'.
54+
PASS codeUnits(function \u{1D306}(){}.name) threw exception SyntaxError: Invalid unicode escape in identifier: '\u{1D306}'.
55+
PASS codeUnits(function \u{1d306}(){}.name) threw exception SyntaxError: Invalid unicode escape in identifier: '\u{1d306}'.
5656
PASS codeUnits(function \ux(){}.name) threw exception SyntaxError: Invalid unicode escape in identifier: '\u'.
5757
PASS codeUnits(function \u{(){}.name) threw exception SyntaxError: Invalid unicode escape in identifier: '\u{'.
5858
PASS codeUnits(function \u{}(){}.name) threw exception SyntaxError: Invalid unicode escape in identifier: '\u{'.

Source/JavaScriptCore/ChangeLog

Lines changed: 45 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,48 @@
1+
2020-03-16 Keith Miller <[email protected]>
2+
3+
JavaScript identifier grammar supports unescaped astral symbols, but JSC doesn’t
4+
https://bugs.webkit.org/show_bug.cgi?id=208998
5+
6+
Reviewed by Michael Saboff.
7+
8+
This patch fixes a bug in the parser that allows for surrogate pairs when parsing identifiers.
9+
It also makes a few other changes to the parser:
10+
11+
1) When looking for keywords we just need to check that subsequent
12+
character cannot be a identifier part or an escape start.
13+
14+
2) The only time we call parseIdentifierSlowCase is when we hit an
15+
escape start or a surrogate pair so we can optimize that to just
16+
copy everything up slow character into our buffer.
17+
18+
3) We shouldn't allow for asking if a UChar is an identifier start/part.
19+
20+
* KeywordLookupGenerator.py:
21+
(Trie.printSubTreeAsC):
22+
(Trie.printAsC):
23+
* parser/Lexer.cpp:
24+
(JSC::isNonLatin1IdentStart):
25+
(JSC::isIdentStart):
26+
(JSC::isSingleCharacterIdentStart):
27+
(JSC::cannotBeIdentStart):
28+
(JSC::isIdentPart):
29+
(JSC::isSingleCharacterIdentPart):
30+
(JSC::cannotBeIdentPartOrEscapeStart):
31+
(JSC::Lexer<LChar>::currentCodePoint const):
32+
(JSC::Lexer<UChar>::currentCodePoint const):
33+
(JSC::Lexer<LChar>::parseIdentifier):
34+
(JSC::Lexer<UChar>::parseIdentifier):
35+
(JSC::Lexer<CharacterType>::parseIdentifierSlowCase):
36+
(JSC::Lexer<T>::lexWithoutClearingLineTerminator):
37+
(JSC::Lexer<T>::scanRegExp):
38+
(JSC::isIdentPartIncludingEscapeTemplate): Deleted.
39+
(JSC::isIdentPartIncludingEscape): Deleted.
40+
* parser/Lexer.h:
41+
(JSC::Lexer::setOffsetFromSourcePtr): Deleted.
42+
* parser/Parser.cpp:
43+
(JSC::Parser<LexerType>::printUnexpectedTokenText):
44+
* parser/ParserTokens.h:
45+
146
2020-03-13 Sergio Villar Senin <[email protected]>
247

348
[WebXR] IDLs, stubs and build configuration for WPE

Source/JavaScriptCore/KeywordLookupGenerator.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -141,7 +141,7 @@ def printSubTreeAsC(self, typeName, indent):
141141
str = makePadding(indent)
142142

143143
if self.value != None:
144-
print(str + "if (!isIdentPartIncludingEscape(code+%d, m_codeEnd)) {" % (len(self.fullPrefix)))
144+
print(str + "if (LIKELY(cannotBeIdentPartOrEscapeStart(code[%d]))) {" % (len(self.fullPrefix)))
145145
print(str + " internalShift<%d>();" % len(self.fullPrefix))
146146
print(str + " if (shouldCreateIdentifier)")
147147
print(str + (" data->ident = &m_vm.propertyNames->%sKeyword;" % self.fullPrefix))
@@ -184,8 +184,8 @@ def maxLength(self):
184184
def printAsC(self):
185185
print("namespace JSC {")
186186
print("")
187-
print("static ALWAYS_INLINE bool isIdentPartIncludingEscape(const LChar* code, const LChar* codeEnd);")
188-
print("static ALWAYS_INLINE bool isIdentPartIncludingEscape(const UChar* code, const UChar* codeEnd);")
187+
print("static ALWAYS_INLINE bool cannotBeIdentPartOrEscapeStart(LChar);")
188+
print("static ALWAYS_INLINE bool cannotBeIdentPartOrEscapeStart(UChar);")
189189
# max length + 1 so we don't need to do any bounds checking at all
190190
print("static constexpr int maxTokenLength = %d;" % (self.maxLength() + 1))
191191
print("")

0 commit comments

Comments
 (0)