WebKit
diff --git a/‎JSTests/ChangeLog
Lines changed: 12 additions & 0 deletions b/‎JSTests/ChangeLog
Lines changed: 12 additions & 0 deletions
diff --git a/‎JSTests/stress/unicode-identifiers-with-surrogate-pairs.js
Lines changed: 30 additions & 0 deletions b/‎JSTests/stress/unicode-identifiers-with-surrogate-pairs.js
Lines changed: 30 additions & 0 deletions
diff --git a/‎LayoutTests/ChangeLog
Lines changed: 13 additions & 0 deletions b/‎LayoutTests/ChangeLog
Lines changed: 13 additions & 0 deletions
diff --git a/‎LayoutTests/js/script-tests/unicode-escape-sequences.js
Lines changed: 4 additions & 4 deletions b/‎LayoutTests/js/script-tests/unicode-escape-sequences.js
Lines changed: 4 additions & 4 deletions
diff --git a/‎LayoutTests/js/unicode-escape-sequences-expected.txt
Lines changed: 4 additions & 4 deletions b/‎LayoutTests/js/unicode-escape-sequences-expected.txt
Lines changed: 4 additions & 4 deletions
diff --git a/‎Source/JavaScriptCore/ChangeLog
Lines changed: 45 additions & 0 deletions b/‎Source/JavaScriptCore/ChangeLog
Lines changed: 45 additions & 0 deletions
diff --git a/‎Source/JavaScriptCore/KeywordLookupGenerator.py
Lines changed: 3 additions & 3 deletions b/‎Source/JavaScriptCore/KeywordLookupGenerator.py
Lines changed: 3 additions & 3 deletions
@@ -1,3 +1,15 @@
+2020-03-16  Keith Miller  <[email protected]>
+
+        JavaScript identifier grammar supports unescaped astral symbols, but JSC doesn’t
+        https://bugs.webkit.org/show_bug.cgi?id=208998
+
+        Reviewed by Michael Saboff.
+
+        * stress/unicode-identifiers-with-surrogate-pairs.js: Added.
+        (let.c.of.chars.eval.foo):
+        (throwsSyntaxError):
+        (let.c.of.continueChars.throwsSyntaxError.foo):
+
 2020-03-13  Saam Barati  <[email protected]>
 
         skip wasm/function-tests/grow-memory-cause-gc.js on memory limited devices
 
@@ -0,0 +1,30 @@
+
+let chars = ["鴬", "𐊧", "Ϊ"];
+let continueChars =  [unescape("\u0311"), String.fromCharCode(...[0xDB40, 0xDD96])];
+
+let o = { };
+for (let c of chars) {
+    eval(`var ${c};`);
+    eval(`function foo() { var ${c} }`);
+    eval(`o.${c}`);
+}
+
+function throwsSyntaxError(string) {
+    try {
+        eval(string);
+    } catch (e) {
+        if (!(e instanceof SyntaxError))
+            throw new Error(string);
+        return;
+    }
+    throw new Error(string);
+}
+
+for (let c of continueChars) {
+    throwsSyntaxError(`var ${c}`);
+    throwsSyntaxError(`function foo() { var ${c} }`);
+    throwsSyntaxError(`o.${c}`);
+    eval(`var ${("a" + c)}`);
+    eval(`o.${"a" + c}`);
+
+}
@@ -1,3 +1,16 @@
+2020-03-16  Keith Miller  <[email protected]>
+
+        JavaScript identifier grammar supports unescaped astral symbols, but JSC doesn’t
+        https://bugs.webkit.org/show_bug.cgi?id=208998
+
+        Reviewed by Michael Saboff.
+
+        Fix broken test that asserted a non-ID_START codepoint was a start codepoint and
+        an ID_START codepoint was not a valid codepoint...
+
+        * js/script-tests/unicode-escape-sequences.js:
+        * js/unicode-escape-sequences-expected.txt:
+
 2020-03-16  Jason Lawrence  <[email protected]>
 
         [ Mac wk1 ] http/tests/security/clipboard/copy-paste-html-cross-origin-iframe-across-origin.html is flaky failing.
 
@@ -74,8 +74,8 @@ testInvalidStringUnicodeEscapeSequence("{100000000000000000000000}");
 testIdentifierStartUnicodeEscapeSequence("{41}", "0041");
 testIdentifierStartUnicodeEscapeSequence("{102C0}", "D800,DEC0");
 testIdentifierStartUnicodeEscapeSequence("{102c0}", "D800,DEC0");
-testIdentifierStartUnicodeEscapeSequence("{1D306}", "D834,DF06");
-testIdentifierStartUnicodeEscapeSequence("{1d306}", "D834,DF06");
+testIdentifierStartUnicodeEscapeSequence("{10000}", "D800,DC00");
+testIdentifierStartUnicodeEscapeSequence("{10001}", "D800,DC01");
 
 testInvalidIdentifierStartUnicodeEscapeSequence("");
 testInvalidIdentifierStartUnicodeEscapeSequence("{0}");
@@ -85,14 +85,14 @@ testInvalidIdentifierStartUnicodeEscapeSequence("{DC00}");
 testInvalidIdentifierStartUnicodeEscapeSequence("{dc00}");
 testInvalidIdentifierStartUnicodeEscapeSequence("{FFFF}");
 testInvalidIdentifierStartUnicodeEscapeSequence("{ffff}");
-testInvalidIdentifierStartUnicodeEscapeSequence("{10000}");
-testInvalidIdentifierStartUnicodeEscapeSequence("{10001}");
 testInvalidIdentifierStartUnicodeEscapeSequence("{10FFFE}");
 testInvalidIdentifierStartUnicodeEscapeSequence("{10fffe}");
 testInvalidIdentifierStartUnicodeEscapeSequence("{10FFFF}");
 testInvalidIdentifierStartUnicodeEscapeSequence("{10ffff}");
 testInvalidIdentifierStartUnicodeEscapeSequence("{00000000000000000000000010FFFF}");
 testInvalidIdentifierStartUnicodeEscapeSequence("{00000000000000000000000010ffff}");
+testInvalidIdentifierStartUnicodeEscapeSequence("{1D306}");
+testInvalidIdentifierStartUnicodeEscapeSequence("{1d306}");
 
 testInvalidIdentifierStartUnicodeEscapeSequence("x");
 testInvalidIdentifierStartUnicodeEscapeSequence("{");
 
@@ -35,8 +35,8 @@ PASS codeUnits("\u{100000000000000000000000}") threw exception SyntaxError: \u c
 PASS codeUnits(function \u{41}(){}.name) is "0041"
 PASS codeUnits(function \u{102C0}(){}.name) is "D800,DEC0"
 PASS codeUnits(function \u{102c0}(){}.name) is "D800,DEC0"
-PASS codeUnits(function \u{1D306}(){}.name) is "D834,DF06"
-PASS codeUnits(function \u{1d306}(){}.name) is "D834,DF06"
+PASS codeUnits(function \u{10000}(){}.name) is "D800,DC00"
+PASS codeUnits(function \u{10001}(){}.name) is "D800,DC01"
 PASS codeUnits(function \u(){}.name) threw exception SyntaxError: Invalid unicode escape in identifier: '\u'.
 PASS codeUnits(function \u{0}(){}.name) threw exception SyntaxError: Invalid unicode escape in identifier: '\u{0}'.
 PASS codeUnits(function \u{D800}(){}.name) threw exception SyntaxError: Invalid unicode escape in identifier: '\u{D800}'.
@@ -45,14 +45,14 @@ PASS codeUnits(function \u{DC00}(){}.name) threw exception SyntaxError: Invalid
 PASS codeUnits(function \u{dc00}(){}.name) threw exception SyntaxError: Invalid unicode escape in identifier: '\u{dc00}'.
 PASS codeUnits(function \u{FFFF}(){}.name) threw exception SyntaxError: Invalid unicode escape in identifier: '\u{FFFF}'.
 PASS codeUnits(function \u{ffff}(){}.name) threw exception SyntaxError: Invalid unicode escape in identifier: '\u{ffff}'.
-PASS codeUnits(function \u{10000}(){}.name) threw exception SyntaxError: Invalid unicode escape in identifier: '\u{10000}'.
-PASS codeUnits(function \u{10001}(){}.name) threw exception SyntaxError: Invalid unicode escape in identifier: '\u{10001}'.
 PASS codeUnits(function \u{10FFFE}(){}.name) threw exception SyntaxError: Invalid unicode escape in identifier: '\u{10FFFE}'.
 PASS codeUnits(function \u{10fffe}(){}.name) threw exception SyntaxError: Invalid unicode escape in identifier: '\u{10fffe}'.
 PASS codeUnits(function \u{10FFFF}(){}.name) threw exception SyntaxError: Invalid unicode escape in identifier: '\u{10FFFF}'.
 PASS codeUnits(function \u{10ffff}(){}.name) threw exception SyntaxError: Invalid unicode escape in identifier: '\u{10ffff}'.
 PASS codeUnits(function \u{00000000000000000000000010FFFF}(){}.name) threw exception SyntaxError: Invalid unicode escape in identifier: '\u{00000000000000000000000010FFFF}'.
 PASS codeUnits(function \u{00000000000000000000000010ffff}(){}.name) threw exception SyntaxError: Invalid unicode escape in identifier: '\u{00000000000000000000000010ffff}'.
+PASS codeUnits(function \u{1D306}(){}.name) threw exception SyntaxError: Invalid unicode escape in identifier: '\u{1D306}'.
+PASS codeUnits(function \u{1d306}(){}.name) threw exception SyntaxError: Invalid unicode escape in identifier: '\u{1d306}'.
 PASS codeUnits(function \ux(){}.name) threw exception SyntaxError: Invalid unicode escape in identifier: '\u'.
 PASS codeUnits(function \u{(){}.name) threw exception SyntaxError: Invalid unicode escape in identifier: '\u{'.
 PASS codeUnits(function \u{}(){}.name) threw exception SyntaxError: Invalid unicode escape in identifier: '\u{'.
 
@@ -1,3 +1,48 @@
+2020-03-16  Keith Miller  <[email protected]>
+
+        JavaScript identifier grammar supports unescaped astral symbols, but JSC doesn’t
+        https://bugs.webkit.org/show_bug.cgi?id=208998
+
+        Reviewed by Michael Saboff.
+
+        This patch fixes a bug in the parser that allows for surrogate pairs when parsing identifiers.
+        It also makes a few other changes to the parser:
+
+        1) When looking for keywords we just need to check that subsequent
+        character cannot be a identifier part or an escape start.
+
+        2) The only time we call parseIdentifierSlowCase is when we hit an
+        escape start or a surrogate pair so we can optimize that to just
+        copy everything up slow character into our buffer.
+
+        3) We shouldn't allow for asking if a UChar is an identifier start/part.
+
+        * KeywordLookupGenerator.py:
+        (Trie.printSubTreeAsC):
+        (Trie.printAsC):
+        * parser/Lexer.cpp:
+        (JSC::isNonLatin1IdentStart):
+        (JSC::isIdentStart):
+        (JSC::isSingleCharacterIdentStart):
+        (JSC::cannotBeIdentStart):
+        (JSC::isIdentPart):
+        (JSC::isSingleCharacterIdentPart):
+        (JSC::cannotBeIdentPartOrEscapeStart):
+        (JSC::Lexer<LChar>::currentCodePoint const):
+        (JSC::Lexer<UChar>::currentCodePoint const):
+        (JSC::Lexer<LChar>::parseIdentifier):
+        (JSC::Lexer<UChar>::parseIdentifier):
+        (JSC::Lexer<CharacterType>::parseIdentifierSlowCase):
+        (JSC::Lexer<T>::lexWithoutClearingLineTerminator):
+        (JSC::Lexer<T>::scanRegExp):
+        (JSC::isIdentPartIncludingEscapeTemplate): Deleted.
+        (JSC::isIdentPartIncludingEscape): Deleted.
+        * parser/Lexer.h:
+        (JSC::Lexer::setOffsetFromSourcePtr): Deleted.
+        * parser/Parser.cpp:
+        (JSC::Parser<LexerType>::printUnexpectedTokenText):
+        * parser/ParserTokens.h:
+
 2020-03-13  Sergio Villar Senin  <[email protected]>
 
         [WebXR] IDLs, stubs and build configuration for WPE
 
@@ -141,7 +141,7 @@ def printSubTreeAsC(self, typeName, indent):
         str = makePadding(indent)
 
         if self.value != None:
-            print(str + "if (!isIdentPartIncludingEscape(code+%d, m_codeEnd)) {" % (len(self.fullPrefix)))
+            print(str + "if (LIKELY(cannotBeIdentPartOrEscapeStart(code[%d]))) {" % (len(self.fullPrefix)))
             print(str + "    internalShift<%d>();" % len(self.fullPrefix))
             print(str + "    if (shouldCreateIdentifier)")
             print(str + ("        data->ident = &m_vm.propertyNames->%sKeyword;" % self.fullPrefix))
@@ -184,8 +184,8 @@ def maxLength(self):
     def printAsC(self):
         print("namespace JSC {")
         print("")
-        print("static ALWAYS_INLINE bool isIdentPartIncludingEscape(const LChar* code, const LChar* codeEnd);")
-        print("static ALWAYS_INLINE bool isIdentPartIncludingEscape(const UChar* code, const UChar* codeEnd);")
+        print("static ALWAYS_INLINE bool cannotBeIdentPartOrEscapeStart(LChar);")
+        print("static ALWAYS_INLINE bool cannotBeIdentPartOrEscapeStart(UChar);")
         # max length + 1 so we don't need to do any bounds checking at all
         print("static constexpr int maxTokenLength = %d;" % (self.maxLength() + 1))
         print("")