Skip to content

Commit e15b636

Browse files
author
Nick Frasser
authored
URL followed by   now works as expected in linkify-html (#184)
* Add AMPERSAND character as first-class token Rather than it belonging to the SYMS text text token * Update simple-html-tokenizer to latest version This is the actual fix for the bug. It includes an update to ensure   is always parsed into its non-encoded form * Add tests for   following URL
1 parent 675b262 commit e15b636

File tree

7 files changed

+21
-6
lines changed

7 files changed

+21
-6
lines changed

package.json

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -63,7 +63,7 @@
6363
"requirejs": "^2.1.22",
6464
"rollup": "~0.40.0",
6565
"run-sequence": "^1.1.5",
66-
"simple-html-tokenizer": "git+https://github.com/nfrasser/simple-html-tokenizer.git#a907972303094c14680d2fd9870965429a893398",
66+
"simple-html-tokenizer": "git+https://github.com/nfrasser/simple-html-tokenizer.git#04799f4638ec5ed903a4e5aa6e832269fa59be6b",
6767
"through2": "^2.0.1"
6868
},
6969
"optionalDependencies": {

src/linkify-html.js

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -164,13 +164,15 @@ function skipTagTokens(tagName, tokens, i, skippedTokens) {
164164

165165
while (i < tokens.length && stackCount > 0) {
166166
let token = tokens[i];
167+
167168
if (token.type === StartTag && token.tagName.toUpperCase() === tagName) {
168169
// Nested tag of the same type, "add to stack"
169170
stackCount++;
170171
} else if (token.type === EndTag && token.tagName.toUpperCase() === tagName) {
171172
// Closing tag
172173
stackCount--;
173174
}
175+
174176
skippedTokens.push(token);
175177
i++;
176178
}

src/linkify/core/parser.js

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -41,6 +41,7 @@ import {
4141
CLOSEBRACKET,
4242
CLOSEANGLEBRACKET,
4343
CLOSEPAREN,
44+
AMPERSAND
4445
} from './tokens/text';
4546

4647
import {
@@ -105,7 +106,7 @@ S_START
105106
.on(LOCALHOST, S_TLD)
106107
.on(NUM, S_DOMAIN);
107108

108-
// Force URL for anything sane followed by protocol
109+
// Force URL for protocol followed by anything sane
109110
S_PROTOCOL_SLASH_SLASH
110111
.on(TLD, S_URL)
111112
.on(DOMAIN, S_URL)
@@ -159,7 +160,8 @@ let qsAccepting = [
159160
SLASH,
160161
TLD,
161162
UNDERSCORE,
162-
SYM
163+
SYM,
164+
AMPERSAND
163165
];
164166

165167
// Types of tokens that can follow a URL and be part of the query string
@@ -262,6 +264,7 @@ let localpartAccepting = [
262264
QUERY,
263265
UNDERSCORE,
264266
SYM,
267+
AMPERSAND,
265268
TLD
266269
];
267270

src/linkify/core/scanner.js

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -32,6 +32,7 @@ import {
3232
CLOSEBRACKET,
3333
CLOSEANGLEBRACKET,
3434
CLOSEPAREN,
35+
AMPERSAND,
3536
PUNCTUATION,
3637
NL,
3738
SYM
@@ -71,6 +72,7 @@ S_START
7172
.on(']', makeState(CLOSEBRACKET))
7273
.on('>', makeState(CLOSEANGLEBRACKET))
7374
.on(')', makeState(CLOSEPAREN))
75+
.on('&', makeState(AMPERSAND))
7476
.on([',', ';', '!', '"', '\''], makeState(PUNCTUATION));
7577

7678
// Whitespace jumps

src/linkify/core/tokens/text.js

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -160,6 +160,8 @@ const CLOSEBRACKET = inheritsToken(']');
160160
const CLOSEANGLEBRACKET = inheritsToken('>');
161161
const CLOSEPAREN = inheritsToken(')');
162162

163+
const AMPERSAND = inheritsToken('&');
164+
163165
export {
164166
TextToken as Base,
165167
DOMAIN,
@@ -186,5 +188,6 @@ export {
186188
CLOSEBRACE,
187189
CLOSEBRACKET,
188190
CLOSEANGLEBRACKET,
189-
CLOSEPAREN
191+
CLOSEPAREN,
192+
AMPERSAND
190193
};

test/spec/linkify-html-test.js

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -55,6 +55,10 @@ describe('linkify-html', () => {
5555
'Ignore tags like <script>var a = {}; a.ca = "Hello";</script> and <style>b.com {color: blue;}</style>',
5656
'Ignore tags like <script>var a = {}; <a href="http://a.ca" class="linkified" target="_blank">a.ca</a> = "Hello";</script> and <style><a href="http://b.com" class="linkified" target="_blank">b.com</a> {color: blue;}</style>',
5757
'Ignore tags like <script>var a = {}; a.ca = "Hello";</script> and <style>b.com {color: blue;}</style>'
58+
], [
59+
'6. Link followed by nbsp escape sequence https://github.com&nbsp;',
60+
'6. Link followed by nbsp escape sequence <a href="https://github.com" class="linkified" target="_blank">https://github.com</a>\u00a0',
61+
'6. Link followed by nbsp escape sequence <span href="https://github.com" class="my-linkify-class" target="_parent" rel="nofollow" onclick="console.log(\'Hello World!\')">https://github.com</span>\u00a0'
5862
]
5963
];
6064

test/spec/linkify/core/scanner-test.js

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,7 @@ const CLOSEBRACE = TEXTTOKENS.CLOSEBRACE;
2525
const CLOSEBRACKET = TEXTTOKENS.CLOSEBRACKET;
2626
const CLOSEANGLEBRACKET = TEXTTOKENS.CLOSEANGLEBRACKET;
2727
const CLOSEPAREN = TEXTTOKENS.CLOSEPAREN;
28+
const AMPERSAND = TEXTTOKENS.AMPERSAND;
2829

2930
// The elements are
3031
// 1. input string
@@ -40,8 +41,8 @@ const tests = [
4041
['+', [PLUS], ['+']],
4142
['#', [POUND], ['#']],
4243
['/', [SLASH], ['/']],
43-
['&', [SYM], ['&']],
44-
['&?<>(', [SYM, QUERY, OPENANGLEBRACKET, CLOSEANGLEBRACKET, OPENPAREN], ['&', '?', '<', '>', '(']],
44+
['&', [AMPERSAND], ['&']],
45+
['&?<>(', [AMPERSAND, QUERY, OPENANGLEBRACKET, CLOSEANGLEBRACKET, OPENPAREN], ['&', '?', '<', '>', '(']],
4546
['([{}])', [OPENPAREN, OPENBRACKET, OPENBRACE, CLOSEBRACE, CLOSEBRACKET, CLOSEPAREN], ['(', '[', '{', '}', ']', ')']],
4647
['!,;\'', [PUNCTUATION, PUNCTUATION, PUNCTUATION, PUNCTUATION], ['!', ',', ';', '\'']],
4748
['hello', [DOMAIN], ['hello']],

0 commit comments

Comments
 (0)