Skip to content

Commit adfa0ce

Browse files
author
Nick Frasser
authored
Correct trailing symbol parsing (#149)
* Scanner enhancements * Allow states to have a default state transition * Move away from regular expression state transitions * Parse trailing symbols in URLs more greedily * Now that quotes and parens/brackets get special treatment, there's no longer a need to leave off trailing non-standard URL symbols. Fixes #108 Fixes #143 Partially addresses #100
1 parent 0aa5eec commit adfa0ce

File tree

6 files changed

+51
-37
lines changed

6 files changed

+51
-37
lines changed

src/linkify/core/parser.js

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -64,7 +64,7 @@ S_TLD = makeState(T_URL), // (A) Simplest possible URL with no query string
6464
S_TLD_COLON = makeState(), // (A) URL followed by colon (potential port number here)
6565
S_TLD_PORT = makeState(T_URL), // TLD followed by a port number
6666
S_URL = makeState(T_URL), // Long URL with optional port and maybe query string
67-
S_URL_SYMS = makeState(), // URL followed by some symbols (will not be part of the final URL)
67+
S_URL_NON_ACCEPTING = makeState(), // URL followed by some symbols (will not be part of the final URL)
6868
S_URL_OPENBRACE = makeState(), // URL followed by {
6969
S_URL_OPENBRACKET = makeState(), // URL followed by [
7070
S_URL_OPENPAREN = makeState(), // URL followed by (
@@ -152,7 +152,8 @@ let qsAccepting = [
152152
TT_POUND,
153153
TT_PROTOCOL,
154154
TT_SLASH,
155-
TT_TLD
155+
TT_TLD,
156+
TT_SYM
156157
];
157158

158159
// Types of tokens that can follow a URL and be part of the query string
@@ -168,8 +169,7 @@ let qsNonAccepting = [
168169
TT_CLOSEPAREN,
169170
TT_OPENBRACE,
170171
TT_OPENBRACKET,
171-
TT_OPENPAREN,
172-
TT_SYM
172+
TT_OPENPAREN
173173
];
174174

175175
// These states are responsible primarily for determining whether or not to
@@ -182,7 +182,7 @@ S_URL
182182
.on(TT_OPENPAREN, S_URL_OPENPAREN);
183183

184184
// URL with extra symbols at the end, followed by an opening bracket
185-
S_URL_SYMS
185+
S_URL_NON_ACCEPTING
186186
.on(TT_OPENBRACE, S_URL_OPENBRACE)
187187
.on(TT_OPENBRACKET, S_URL_OPENBRACKET)
188188
.on(TT_OPENPAREN, S_URL_OPENPAREN);
@@ -225,10 +225,10 @@ S_URL_OPENPAREN_SYMS.on(qsNonAccepting, S_URL_OPENPAREN_SYMS);
225225

226226
// Account for the query string
227227
S_URL.on(qsAccepting, S_URL);
228-
S_URL_SYMS.on(qsAccepting, S_URL);
228+
S_URL_NON_ACCEPTING.on(qsAccepting, S_URL);
229229

230-
S_URL.on(qsNonAccepting, S_URL_SYMS);
231-
S_URL_SYMS.on(qsNonAccepting, S_URL_SYMS);
230+
S_URL.on(qsNonAccepting, S_URL_NON_ACCEPTING);
231+
S_URL_NON_ACCEPTING.on(qsNonAccepting, S_URL_NON_ACCEPTING);
232232

233233
// Email address-specific state definitions
234234
// Note: We are not allowing '/' in email addresses since this would interfere

src/linkify/core/scanner.js

Lines changed: 18 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -12,10 +12,10 @@ import {CharacterState as State, stateify} from './state';
1212

1313
const tlds = __TLDS__; // macro, see gulpfile.js
1414

15-
const
16-
REGEXP_NUM = /[0-9]/,
17-
REGEXP_ALPHANUM = /[a-z0-9]/,
18-
COLON = ':';
15+
const NUM = '0123456789'.split('');
16+
const ALPHANUM = '0123456789abcdefghijklmnopqrstuvwxyz'.split('');
17+
const WHITESPACE = [' ', '\f', '\r', '\t', '\v']; // excluding line breaks
18+
const COLON = ':';
1919

2020
let
2121
domainStates = [], // states that jump to DOMAIN on /[a-z0-9]/
@@ -30,7 +30,7 @@ T_TLD = TOKENS.TLD,
3030
T_WS = TOKENS.WS;
3131

3232
const // Frequently used states
33-
S_START = makeState(), // start state
33+
S_START = makeState(),
3434
S_NUM = makeState(T_NUM),
3535
S_DOMAIN = makeState(T_DOMAIN),
3636
S_DOMAIN_HYPHEN = makeState(), // domain followed by 1 or more hyphen characters
@@ -51,16 +51,16 @@ S_START
5151
.on('}', makeState(TOKENS.CLOSEBRACE))
5252
.on(']', makeState(TOKENS.CLOSEBRACKET))
5353
.on(')', makeState(TOKENS.CLOSEPAREN))
54-
.on(/[,;!]/, makeState(TOKENS.PUNCTUATION));
54+
.on([',', ';', '!', '"'], makeState(TOKENS.PUNCTUATION));
5555

5656
// Whitespace jumps
5757
// Tokens of only non-newline whitespace are arbitrarily long
5858
S_START
59-
.on(/\n/, makeState(TOKENS.NL))
60-
.on(/\s/, S_WS);
59+
.on('\n', makeState(TOKENS.NL))
60+
.on(WHITESPACE, S_WS);
6161

6262
// If any whitespace except newline, more whitespace!
63-
S_WS.on(/[^\S\n]/, S_WS);
63+
S_WS.on(WHITESPACE, S_WS);
6464

6565
// Generates states for top-level domains
6666
// Note that this is most accurate when tlds are in alphabetical order
@@ -109,30 +109,30 @@ domainStates.push.apply(domainStates, partialLocalhostStates);
109109
// Everything else
110110
// DOMAINs make more DOMAINs
111111
// Number and character transitions
112-
S_START.on(REGEXP_NUM, S_NUM);
112+
S_START.on(NUM, S_NUM);
113113
S_NUM
114114
.on('-', S_DOMAIN_HYPHEN)
115-
.on(REGEXP_NUM, S_NUM)
116-
.on(REGEXP_ALPHANUM, S_DOMAIN); // number becomes DOMAIN
115+
.on(NUM, S_NUM)
116+
.on(ALPHANUM, S_DOMAIN); // number becomes DOMAIN
117117

118118
S_DOMAIN
119119
.on('-', S_DOMAIN_HYPHEN)
120-
.on(REGEXP_ALPHANUM, S_DOMAIN);
120+
.on(ALPHANUM, S_DOMAIN);
121121

122122
// All the generated states should have a jump to DOMAIN
123123
for (let i = 0; i < domainStates.length; i++) {
124124
domainStates[i]
125125
.on('-', S_DOMAIN_HYPHEN)
126-
.on(REGEXP_ALPHANUM, S_DOMAIN);
126+
.on(ALPHANUM, S_DOMAIN);
127127
}
128128

129129
S_DOMAIN_HYPHEN
130130
.on('-', S_DOMAIN_HYPHEN)
131-
.on(REGEXP_NUM, S_DOMAIN)
132-
.on(REGEXP_ALPHANUM, S_DOMAIN);
131+
.on(NUM, S_DOMAIN)
132+
.on(ALPHANUM, S_DOMAIN);
133133

134-
// Any other character is considered a single symbol token
135-
S_START.on(/./, makeState(TOKENS.SYM));
134+
// Set default transition
135+
S_START.defaultTransition = makeState(TOKENS.SYM);
136136

137137
/**
138138
Given a string, returns an array of TOKEN instances representing the

src/linkify/core/state.js

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,8 @@ function createStateClass() {
2828
*/
2929
const BaseState = createStateClass();
3030
BaseState.prototype = {
31+
defaultTransition: false,
32+
3133
/**
3234
@method constructor
3335
@param {Class} tClass Pass in the kind of token to emit if there are
@@ -74,7 +76,7 @@ BaseState.prototype = {
7476
}
7577

7678
// Nowhere left to jump!
77-
return false;
79+
return this.defaultTransition;
7880
},
7981

8082
/**

test/spec/linkify-html-test.js

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -42,9 +42,9 @@ describe('linkify-html', function () {
4242
'The URL is <a href="http://google.com" class="linkified" target="_blank">google.com</a> and the email is <strong><a href="mailto:[email protected]" class="linkified">[email protected]</a></strong>',
4343
'The URL is <span href="https://google.com" class="my-linkify-class" target="_parent" rel="nofollow" onclick="console.log(\'Hello World!\')">google.com</span> and the email is <strong><span href="mailto:[email protected]?subject=Hello%20from%20Linkify" class="my-linkify-class" target="_parent" rel="nofollow" onclick="console.log(\'Hello World!\')">[email protected]</span></strong>'
4444
], [
45-
'Super long maps URL https://www.google.ca/maps/@43.472082,-80.5426668,18z?hl=en, a #hash-tag, and an email: test."wut"[email protected]!',
46-
'Super long maps URL <a href="https://www.google.ca/maps/@43.472082,-80.5426668,18z?hl=en" class="linkified" target="_blank">https://www.google.ca/maps/@43.472082,-80.5426668,18z?hl=en</a>, a #hash-tag, and an email: <a href="mailto:test.&quot;wut&quot;[email protected]" class="linkified">test."wut"[email protected]</a>!',
47-
'Super long maps URL <span href="https://www.google.ca/maps/@43.472082,-80.5426668,18z?hl=en" class="my-linkify-class" target="_parent" rel="nofollow" onclick="console.log(\'Hello World!\')">https://www.google.ca/maps/@43.472082,-8…</span>, a #hash-tag, and an email: <span href="mailto:test.&quot;wut&quot;[email protected]?subject=Hello%20from%20Linkify" class="my-linkify-class" target="_parent" rel="nofollow" onclick="console.log(\'Hello World!\')">test."wut"[email protected]</span>!',
45+
'Super long maps URL https://www.google.ca/maps/@43.472082,-80.5426668,18z?hl=en, a #hash-tag, and an email: [email protected]!',
46+
'Super long maps URL <a href="https://www.google.ca/maps/@43.472082,-80.5426668,18z?hl=en" class="linkified" target="_blank">https://www.google.ca/maps/@43.472082,-80.5426668,18z?hl=en</a>, a #hash-tag, and an email: <a href="mailto:[email protected]" class="linkified">[email protected]</a>!',
47+
'Super long maps URL <span href="https://www.google.ca/maps/@43.472082,-80.5426668,18z?hl=en" class="my-linkify-class" target="_parent" rel="nofollow" onclick="console.log(\'Hello World!\')">https://www.google.ca/maps/@43.472082,-8…</span>, a #hash-tag, and an email: <span href="mailto:[email protected]?subject=Hello%20from%20Linkify" class="my-linkify-class" target="_parent" rel="nofollow" onclick="console.log(\'Hello World!\')">[email protected]</span>!',
4848
], [
4949
'This link is already in an anchor tag <a href="#bro">google.com</a> LOL and this one <h1>isnt http://github.com</h1>',
5050
'This link is already in an anchor tag <a href="#bro">google.com</a> LOL and this one <h1>isnt <a href="http://github.com" class="linkified" target="_blank">http://github.com</a></h1>',
@@ -87,8 +87,8 @@ describe('linkify-html', function () {
8787
'2.The URL is google.com and the email is <strong>[email protected]</strong>',
8888
'2.The URL is google.com and the email is <strong><a href="mailto:[email protected]" class="linkified">[email protected]</a></strong>'
8989
], [
90-
'3.Super long maps URL https://www.google.ca/maps/@43.472082,-80.5426668,18z?hl=en, a #hash-tag, and an email: test."wut"[email protected]!',
91-
'3.Super long maps URL <a href="https://www.google.ca/maps/@43.472082,-80.5426668,18z?hl=en" class="linkified" target="_blank">https://www.google.ca/maps/@43.472082,-80.5426668,18z?hl=en</a>, a #hash-tag, and an email: <a href="mailto:test.&quot;wut&quot;[email protected]" class="linkified">test."wut"[email protected]</a>!'
90+
'3.Super long maps URL https://www.google.ca/maps/@43.472082,-80.5426668,18z?hl=en, a #hash-tag, and an email: [email protected]!',
91+
'3.Super long maps URL <a href="https://www.google.ca/maps/@43.472082,-80.5426668,18z?hl=en" class="linkified" target="_blank">https://www.google.ca/maps/@43.472082,-80.5426668,18z?hl=en</a>, a #hash-tag, and an email: <a href="mailto:[email protected]" class="linkified">[email protected]</a>!'
9292
], [
9393
'4a.This link is already in an anchor tag <a href="#bro">google.com</a> LOL and this one <h1>isnt http://github.com</h1>',
9494
'4a.This link is already in an anchor tag <a href="#bro">google.com</a> LOL and this one <h1>isnt <a href="http://github.com" class="linkified" target="_blank">http://github.com</a></h1>'

test/spec/linkify-string-test.js

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -32,7 +32,7 @@ describe('linkify-string', function () {
3232
defaultProtocol: 'https',
3333
linkAttributes: {
3434
rel: 'nofollow',
35-
onclick: 'javascript:;'
35+
onclick: 'javascript:alert("Hello");'
3636
},
3737
format: function (val) {
3838
return val.truncate(40);
@@ -57,12 +57,12 @@ describe('linkify-string', function () {
5757
], [
5858
'The URL is google.com and the email is [email protected]',
5959
'The URL is <a href="http://google.com" class="linkified" target="_blank">google.com</a> and the email is <a href="mailto:[email protected]" class="linkified">[email protected]</a>',
60-
'The URL is <span href="https://google.com" class="my-linkify-class" target="_parent" rel="nofollow" onclick="javascript:;">google.com</span> and the email is <span href="mailto:[email protected]?subject=Hello%20from%20Linkify" class="my-linkify-class" target="_parent" rel="nofollow" onclick="javascript:;">[email protected]</span>',
60+
'The URL is <span href="https://google.com" class="my-linkify-class" target="_parent" rel="nofollow" onclick="javascript:alert(&quot;Hello&quot;);">google.com</span> and the email is <span href="mailto:[email protected]?subject=Hello%20from%20Linkify" class="my-linkify-class" target="_parent" rel="nofollow" onclick="javascript:alert(&quot;Hello&quot;);">[email protected]</span>',
6161
'The URL is google.com and the email is [email protected]',
6262
], [
63-
'Super long maps URL https://www.google.ca/maps/@43.472082,-80.5426668,18z?hl=en, a #hash-tag, and an email: test."wut"[email protected]!\n',
64-
'Super long maps URL <a href="https://www.google.ca/maps/@43.472082,-80.5426668,18z?hl=en" class="linkified" target="_blank">https://www.google.ca/maps/@43.472082,-80.5426668,18z?hl=en</a>, a #hash-tag, and an email: <a href="mailto:test.&quot;wut&quot;[email protected]" class="linkified">test."wut"[email protected]</a>!\n',
65-
'Super long maps URL <span href="https://www.google.ca/maps/@43.472082,-80.5426668,18z?hl=en" class="my-linkify-class" target="_parent" rel="nofollow" onclick="javascript:;">https://www.google.ca/maps/@43.472082,-8…</span>, a #hash-tag, and an email: <span href="mailto:test.&quot;wut&quot;[email protected]?subject=Hello%20from%20Linkify" class="my-linkify-class" target="_parent" rel="nofollow" onclick="javascript:;">test."wut"[email protected]</span>!<br>\n',
63+
'Super long maps URL https://www.google.ca/maps/@43.472082,-80.5426668,18z?hl=en, a #hash-tag, and an email: [email protected]!\n',
64+
'Super long maps URL <a href="https://www.google.ca/maps/@43.472082,-80.5426668,18z?hl=en" class="linkified" target="_blank">https://www.google.ca/maps/@43.472082,-80.5426668,18z?hl=en</a>, a #hash-tag, and an email: <a href="mailto:[email protected]" class="linkified">[email protected]</a>!\n',
65+
'Super long maps URL <span href="https://www.google.ca/maps/@43.472082,-80.5426668,18z?hl=en" class="my-linkify-class" target="_parent" rel="nofollow" onclick="javascript:alert(&quot;Hello&quot;);">https://www.google.ca/maps/@43.472082,-8…</span>, a #hash-tag, and an email: <span href="mailto:[email protected]?subject=Hello%20from%20Linkify" class="my-linkify-class" target="_parent" rel="nofollow" onclick="javascript:alert(&quot;Hello&quot;);">[email protected]</span>!<br>\n',
6666
]
6767
];
6868

test/spec/linkify/core/parser-test.js

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -138,6 +138,18 @@ var tests = [
138138
'HTTP Auth URLs should work: http://username:[email protected]',
139139
[TEXT, URL],
140140
['HTTP Auth URLs should work: ', 'http://username:[email protected]']
141+
], [
142+
'Trailing equal symbol should work: http://example.com/foo/bar?token=CtFOYuk0wjiqvHZF==',
143+
[TEXT, URL],
144+
['Trailing equal symbol should work: ', 'http://example.com/foo/bar?token=CtFOYuk0wjiqvHZF==']
145+
], [
146+
'"https://surrounded.by.quotes/"',
147+
[TEXT, URL, TEXT],
148+
['"', 'https://surrounded.by.quotes/', '"']
149+
], [
150+
'More weird character in http://facebook.com/#aZ?/:@-._~!$&\'()*+,;= that URL',
151+
[TEXT, URL, TEXT],
152+
['More weird character in ', 'http://facebook.com/#aZ?/:@-._~!$&\'()*+,;=', ' that URL']
141153
]
142154
];
143155

0 commit comments

Comments
 (0)