diff --git a/spec/html_spec.js b/spec/html_spec.js index b5381ede..2f1b1e79 100644 --- a/spec/html_spec.js +++ b/spec/html_spec.js @@ -78,4 +78,66 @@ const parsingOptions = { output = output.replace('₹','&inr;'); expect(output.replace(/\s+/g, "")).toEqual(html.replace(/\s+/g, "")); }); + + + it("should fail to parse HTML + + `; + + const parsingOptions = { + ignoreAttributes: false, + preserveOrder: true, + unpairedTags: ["hr", "br", "link", "meta"], + stopNodes: ["*.pre", "*.script"], + processEntities: true, + htmlEntities: true, + }; + const parser = new XMLParser(parsingOptions); + expect(function () { parser.parse(html); }).toThrow(); + }); + + + it("should parse HTML + + `; + + const parsingOptions = { + ignoreAttributes: false, + preserveOrder: true, + unpairedTags: ["hr", "br", "link", "meta"], + stopNodes: ["*.pre", "*.script"], + ignoreTagsInNodes: ["*.script"], + processEntities: true, + htmlEntities: true, + }; + const parser = new XMLParser(parsingOptions); + let result = parser.parse(html); + + const builderOptions = { + ignoreAttributes: false, + format: true, + preserveOrder: true, + suppressEmptyNode: false, + unpairedTags: ["hr", "br", "link", "meta"], + stopNodes: ["*.pre", "*.script"], + } + const builder = new XMLBuilder(builderOptions); + let output = builder.build(result); + expect(output.replace(/\s+/g, "")).toEqual(html.replace(/\s+/g, "")); + }); + }); \ No newline at end of file diff --git a/src/fxp.d.ts b/src/fxp.d.ts index a878543b..575afcad 100644 --- a/src/fxp.d.ts +++ b/src/fxp.d.ts @@ -23,6 +23,7 @@ Control how tag value should be parsed. Called only if tag value is not empty attributeValueProcessor: (attrName: string, attrValue: string, jPath: string) => string; numberParseOptions: strnumOptions; stopNodes: string[]; + ignoreTagsInNodes: string[]; unpairedTags: string[]; alwaysCreateTextNode: boolean; isArray: (tagName: string, jPath: string, isLeafNode: boolean, isAttribute: boolean) => boolean; diff --git a/src/xmlparser/OptionsBuilder.js b/src/xmlparser/OptionsBuilder.js index b375d9dd..38426a8f 100644 --- a/src/xmlparser/OptionsBuilder.js +++ b/src/xmlparser/OptionsBuilder.js @@ -22,7 +22,8 @@ const defaultOptions = { attributeValueProcessor: function(attrName, val) { return val; }, - stopNodes: [], //nested tags will not be parsed even for errors + stopNodes: [], // nested tags will not be parsed even for errors + ignoreTagsInNodes: [], // nested tags will not be parsed even for errors alwaysCreateTextNode: false, isArray: () => false, commentPropName: false, diff --git a/src/xmlparser/OrderedObjParser.js b/src/xmlparser/OrderedObjParser.js index d2f79eca..5ea77e9c 100644 --- a/src/xmlparser/OrderedObjParser.js +++ b/src/xmlparser/OrderedObjParser.js @@ -46,7 +46,7 @@ class OrderedObjParser{ this.parseTextData = parseTextData; this.resolveNameSpace = resolveNameSpace; this.buildAttributesMap = buildAttributesMap; - this.isItStopNode = isItStopNode; + this.checkNodePathMatch = checkNodePathMatch; this.replaceEntitiesValue = replaceEntitiesValue; this.readStopNodeData = readStopNodeData; this.saveTextToParentTag = saveTextToParentTag; @@ -289,7 +289,7 @@ const parseXml = function(xmlData) { currentNode = this.tagsNodeStack.pop(); } - if (this.isItStopNode(this.options.stopNodes, jPath, tagName)) { //TODO: namespace + if (this.checkNodePathMatch(this.options.stopNodes, jPath, tagName)) { //TODO: namespace let tagContent = ""; //self-closing tag if(tagExp.length > 0 && tagExp.lastIndexOf("/") === tagExp.length - 1){ @@ -302,7 +302,7 @@ const parseXml = function(xmlData) { //normal tag else{ //read until closing tag is found - const result = this.readStopNodeData(xmlData, tagName, closeIndex + 1); + const result = this.readStopNodeData(xmlData, tagName, closeIndex + 1, this.checkNodePathMatch(this.options.ignoreTagsInNodes, jPath, tagName)); if(!result) throw new Error(`Unexpected end of ${tagName}`); i = result.i; tagContent = result.tagContent; @@ -403,15 +403,15 @@ function saveTextToParentTag(textData, currentNode, jPath, isLeafNode) { //TODO: use jPath to simplify the logic /** * - * @param {string[]} stopNodes + * @param {string[]} nodePaths * @param {string} jPath - * @param {string} currentTagName + * @param {string} currentTagName */ -function isItStopNode(stopNodes, jPath, currentTagName){ +function checkNodePathMatch(nodePaths, jPath, currentTagName) { const allNodesExp = "*." + currentTagName; - for (const stopNodePath in stopNodes) { - const stopNodeExp = stopNodes[stopNodePath]; - if( allNodesExp === stopNodeExp || jPath === stopNodeExp ) return true; + for (const nodePath in nodePaths) { + const nodeExp = nodePaths[nodePath]; + if (allNodesExp === nodeExp || jPath === nodeExp) return true; } return false; } @@ -494,8 +494,9 @@ function readTagExp(xmlData,i, removeNSPrefix, closingChar = ">"){ * @param {string} xmlData * @param {string} tagName * @param {number} i + * @param {boolean} ignoreNestedTags Ignores nested tags if true. This allows parsing of tags like without the < triggering a new open tag. */ -function readStopNodeData(xmlData, tagName, i){ +function readStopNodeData(xmlData, tagName, i, ignoreNestedTags){ const startIndex = i; // Starting at 1 since we already have an open tag let openTagCount = 1; @@ -524,7 +525,7 @@ function readStopNodeData(xmlData, tagName, i){ } else if(xmlData.substr(i + 1, 2) === '![') { const closeIndex = findClosingIndex(xmlData, "]]>", i, "StopNode is not closed.") - 2; i=closeIndex; - } else { + } else if (!ignoreNestedTags) { const tagData = readTagExp(xmlData, i, '>') if (tagData) {