Skip to content

Commit 6e80fac

Browse files
committed
Fixed a bug where characters in the upper part of the BMP would not be escaped.
And made such that ParseString whould throw an error if it fails to parse a string
1 parent 016ea00 commit 6e80fac

File tree

4 files changed

+54
-12
lines changed

4 files changed

+54
-12
lines changed

escape.go

Lines changed: 11 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,9 @@ const supplementalPlanesOffset = 0x10000
1111
const highSurrogateOffset = 0xD800
1212
const lowSurrogateOffset = 0xDC00
1313

14+
const basicMultilingualPlaneReservedOffset = 0xDFFF
15+
const basicMultilingualPlaneOffset = 0xFFFF
16+
1417
func combineUTF16Surrogates(high, low rune) rune {
1518
return supplementalPlanesOffset + (high-highSurrogateOffset)<<10 + (low - lowSurrogateOffset)
1619
}
@@ -49,11 +52,18 @@ func decodeSingleUnicodeEscape(in []byte) (rune, bool) {
4952
return rune(h1<<12 + h2<<8 + h3<<4 + h4), true
5053
}
5154

55+
// isUTF16EncodedRune checks if a rune is in the range for non-BMP characters,
56+
// which is used to describe UTF16 chars.
57+
// Source: https://en.wikipedia.org/wiki/Plane_(Unicode)#Basic_Multilingual_Plane
58+
func isUTF16EncodedRune(r rune) bool {
59+
return highSurrogateOffset <= r && r <= basicMultilingualPlaneReservedOffset
60+
}
61+
5262
func decodeUnicodeEscape(in []byte) (rune, int) {
5363
if r, ok := decodeSingleUnicodeEscape(in); !ok {
5464
// Invalid Unicode escape
5565
return utf8.RuneError, -1
56-
} else if r < highSurrogateOffset {
66+
} else if r <= basicMultilingualPlaneOffset && !isUTF16EncodedRune(r) {
5767
// Valid Unicode escape in Basic Multilingual Plane
5868
return r, 6
5969
} else if r2, ok := decodeSingleUnicodeEscape(in[6:]); !ok { // Note: previous decodeSingleUnicodeEscape success guarantees at least 6 bytes remain
@@ -66,7 +76,6 @@ func decodeUnicodeEscape(in []byte) (rune, int) {
6676
// Valid UTF16 surrogate pair
6777
return combineUTF16Surrogates(r, r2), 12
6878
}
69-
7079
}
7180

7281
// backslashCharEscapeTable: when '\X' is found for some byte X, it is to be replaced with backslashCharEscapeTable[X]

escape_test.go

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -44,12 +44,14 @@ var singleUnicodeEscapeTests = append([]escapedUnicodeRuneTest{
4444
{in: `\uD83D`, out: 0xD83D, len: 6},
4545
{in: `\uDE03`, out: 0xDE03, len: 6},
4646
{in: `\uFFFF`, out: 0xFFFF, len: 6},
47+
{in: `\uFF11`, out: '1', len: 6},
4748
}, commonUnicodeEscapeTests...)
4849

4950
var multiUnicodeEscapeTests = append([]escapedUnicodeRuneTest{
5051
{in: `\uD83D`, isErr: true},
5152
{in: `\uDE03`, isErr: true},
52-
{in: `\uFFFF`, isErr: true},
53+
{in: `\uFFFF`, out: '\uFFFF', len: 6},
54+
{in: `\uFF11`, out: '1', len: 6},
5355

5456
{in: `\uD83D\uDE03`, out: '\U0001F603', len: 12},
5557
{in: `\uD800\uDC00`, out: '\U00010000', len: 12},
@@ -109,13 +111,14 @@ var unescapeTests = []unescapeTest{
109111
{in: `ab\\de`, out: `ab\de`, canAlloc: true},
110112
{in: `ab\"de`, out: `ab"de`, canAlloc: true},
111113
{in: `ab \u00B0 de`, out: `ab ° de`, canAlloc: true},
114+
{in: `ab \uFF11 de`, out: `ab 1 de`, canAlloc: true},
115+
{in: `\uFFFF`, out: "\uFFFF", canAlloc: true},
112116
{in: `ab \uD83D\uDE03 de`, out: "ab \U0001F603 de", canAlloc: true},
113117
{in: `\u0000\u0000\u0000\u0000\u0000`, out: "\u0000\u0000\u0000\u0000\u0000", canAlloc: true},
114118
{in: `\u0000 \u0000 \u0000 \u0000 \u0000`, out: "\u0000 \u0000 \u0000 \u0000 \u0000", canAlloc: true},
115119
{in: ` \u0000 \u0000 \u0000 \u0000 \u0000 `, out: " \u0000 \u0000 \u0000 \u0000 \u0000 ", canAlloc: true},
116120

117121
{in: `\uD800`, isErr: true},
118-
{in: `\uFFFF`, isErr: true},
119122
{in: `abcde\`, isErr: true},
120123
{in: `abcde\x`, isErr: true},
121124
{in: `abcde\u`, isErr: true},

parser.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -832,7 +832,7 @@ func ParseBoolean(b []byte) (bool, error) {
832832
func ParseString(b []byte) (string, error) {
833833
var stackbuf [unescapeStackBufSize]byte // stack-allocated array for allocation-free unescaping of small strings
834834
if bU, err := Unescape(b, stackbuf[:]); err != nil {
835-
return "", nil
835+
return "", MalformedValueError
836836
} else {
837837
return string(bU), nil
838838
}

parser_test.go

Lines changed: 37 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -358,17 +358,17 @@ var getTests = []GetTest{
358358
isFound: false,
359359
},
360360
{ // Issue #81
361-
desc: `missing key in object in array`,
362-
json: `{"p":{"a":[{"u":"abc","t":"th"}]}}`,
363-
path: []string{"p", "a", "[0]", "x"},
361+
desc: `missing key in object in array`,
362+
json: `{"p":{"a":[{"u":"abc","t":"th"}]}}`,
363+
path: []string{"p", "a", "[0]", "x"},
364364
isFound: false,
365365
},
366366
{ // Issue #81 counter test
367-
desc: `existing key in object in array`,
368-
json: `{"p":{"a":[{"u":"abc","t":"th"}]}}`,
369-
path: []string{"p", "a", "[0]", "u"},
367+
desc: `existing key in object in array`,
368+
json: `{"p":{"a":[{"u":"abc","t":"th"}]}}`,
369+
path: []string{"p", "a", "[0]", "u"},
370370
isFound: true,
371-
data: "abc",
371+
data: "abc",
372372
},
373373
{ // This test returns not found instead of a parse error, as checking for the malformed JSON would reduce performance
374374
desc: "malformed key (followed by comma followed by colon)",
@@ -1183,3 +1183,33 @@ func TestParseFloat(t *testing.T) {
11831183
},
11841184
)
11851185
}
1186+
1187+
var parseStringTest = []ParseTest{
1188+
{
1189+
in: `\uFF11`,
1190+
intype: String,
1191+
out: "\uFF11",
1192+
},
1193+
{
1194+
in: `\uFFFF`,
1195+
intype: String,
1196+
out: "\uFFFF",
1197+
},
1198+
{
1199+
in: `\uDF00`,
1200+
intype: String,
1201+
isErr: true,
1202+
},
1203+
}
1204+
1205+
func TestParseString(t *testing.T) {
1206+
runParseTests(t, "ParseString()", parseStringTest,
1207+
func(test ParseTest) (value interface{}, err error) {
1208+
return ParseString([]byte(test.in))
1209+
},
1210+
func(test ParseTest, obtained interface{}) (bool, interface{}) {
1211+
expected := test.out.(string)
1212+
return obtained.(string) == expected, expected
1213+
},
1214+
)
1215+
}

0 commit comments

Comments
 (0)