Skip to content

Commit 5b6000a

Browse files
authored
fix: support numeric entities with values over 0xFFFF (#726)
Numeric entities over U+FFFF would be truncated to their lower two bytes due to use of `String.fromCharCode` which only returns a UTF-16 single code unit. Replacing the relevant calls with `String.fromCodePoint` addresses the problem.
1 parent efd45cf commit 5b6000a

File tree

4 files changed

+9
-8
lines changed

4 files changed

+9
-8
lines changed

spec/entities_spec.js

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -383,7 +383,7 @@ describe("XMLParser Entities", function() {
383383
<?xml version="1.0" encoding="UTF-8"?>
384384
<note>
385385
<heading>Bear</heading>
386-
<body face="&#x295;&#x2022;&#x1D25;&#x2022;&#x294;">Bears are called B&#228;ren in German!</body>
386+
<body face="&#x295;&#x2022;&#x1D25;&#x2022;&#x294;" smile="&#x1F60A;&#128523;">Bears are called B&#228;ren in German!</body>
387387
</note> `;
388388

389389
const expected = {
@@ -395,7 +395,8 @@ describe("XMLParser Entities", function() {
395395
"heading": "Bear",
396396
"body": {
397397
"#text": "Bears are called Bären in German!",
398-
"face": "ʕ•ᴥ•ʔ"
398+
"face": "ʕ•ᴥ•ʔ",
399+
"smile": "\u{1F60A}\u{1F60B}"
399400
}
400401
}
401402
};

src/v6/EntitiesParser.js

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -13,8 +13,8 @@ const htmlEntities = {
1313
"copyright" : { regex: /&(copy|#169);/g, val: "©" },
1414
"reg" : { regex: /&(reg|#174);/g, val: "®" },
1515
"inr" : { regex: /&(inr|#8377);/g, val: "₹" },
16-
"num_dec": { regex: /&#([0-9]{1,7});/g, val : (_, str) => String.fromCharCode(Number.parseInt(str, 10)) },
17-
"num_hex": { regex: /&#x([0-9a-fA-F]{1,6});/g, val : (_, str) => String.fromCharCode(Number.parseInt(str, 16)) },
16+
"num_dec": { regex: /&#([0-9]{1,7});/g, val : (_, str) => String.fromCodePoint(Number.parseInt(str, 10)) },
17+
"num_hex": { regex: /&#x([0-9a-fA-F]{1,6});/g, val : (_, str) => String.fromCodePoint(Number.parseInt(str, 16)) },
1818
};
1919
export default class EntitiesParser{
2020
constructor(replaceHtmlEntities) {

src/v6/valueParsers/EntitiesParser.js

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -13,8 +13,8 @@ const htmlEntities = {
1313
"copyright" : { regex: /&(copy|#169);/g, val: "©" },
1414
"reg" : { regex: /&(reg|#174);/g, val: "®" },
1515
"inr" : { regex: /&(inr|#8377);/g, val: "₹" },
16-
"num_dec": { regex: /&#([0-9]{1,7});/g, val : (_, str) => String.fromCharCode(Number.parseInt(str, 10)) },
17-
"num_hex": { regex: /&#x([0-9a-fA-F]{1,6});/g, val : (_, str) => String.fromCharCode(Number.parseInt(str, 16)) },
16+
"num_dec": { regex: /&#([0-9]{1,7});/g, val : (_, str) => String.fromCodePoint(Number.parseInt(str, 10)) },
17+
"num_hex": { regex: /&#x([0-9a-fA-F]{1,6});/g, val : (_, str) => String.fromCodePoint(Number.parseInt(str, 16)) },
1818
};
1919

2020
export default class EntitiesParser{

src/xmlparser/OrderedObjParser.js

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -41,8 +41,8 @@ export default class OrderedObjParser{
4141
"copyright" : { regex: /&(copy|#169);/g, val: "©" },
4242
"reg" : { regex: /&(reg|#174);/g, val: "®" },
4343
"inr" : { regex: /&(inr|#8377);/g, val: "₹" },
44-
"num_dec": { regex: /&#([0-9]{1,7});/g, val : (_, str) => String.fromCharCode(Number.parseInt(str, 10)) },
45-
"num_hex": { regex: /&#x([0-9a-fA-F]{1,6});/g, val : (_, str) => String.fromCharCode(Number.parseInt(str, 16)) },
44+
"num_dec": { regex: /&#([0-9]{1,7});/g, val : (_, str) => String.fromCodePoint(Number.parseInt(str, 10)) },
45+
"num_hex": { regex: /&#x([0-9a-fA-F]{1,6});/g, val : (_, str) => String.fromCodePoint(Number.parseInt(str, 16)) },
4646
};
4747
this.addExternalEntities = addExternalEntities;
4848
this.parseXml = parseXml;

0 commit comments

Comments
 (0)