Skip to content

Commit 4d07072

Browse files
author
Nick Frasser
committed
Merge pull request #127 from SoapBox/protocol-parsing
Anything that begins with a protocol is recognized as a string
2 parents 3ae6099 + 46b8c82 commit 4d07072

File tree

2 files changed

+16
-24
lines changed

2 files changed

+16
-24
lines changed

src/linkify/core/parser.js

Lines changed: 6 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -54,9 +54,6 @@ let S_START = makeState();
5454

5555
// Intermediate states for URLs. Note that domains that begin with a protocol
5656
// are treated slighly differently from those that don't.
57-
// (PSS == "PROTOCOL SLASH SLASH")
58-
// S_DOMAIN* states can generally become prefixes for email addresses, while
59-
// S_PSS_DOMAIN* cannot
6057
let
6158
S_PROTOCOL = makeState(), // e.g., 'http:'
6259
S_PROTOCOL_SLASH = makeState(), // e.g., '/', 'http:/''
@@ -66,11 +63,6 @@ S_DOMAIN_DOT = makeState(), // (A) domain followed by DOT
6663
S_TLD = makeState(T_URL), // (A) Simplest possible URL with no query string
6764
S_TLD_COLON = makeState(), // (A) URL followed by colon (potential port number here)
6865
S_TLD_PORT = makeState(T_URL), // TLD followed by a port number
69-
S_PSS_DOMAIN = makeState(), // parsed string starts with protocol and ends with a potential domain name (B)
70-
S_PSS_DOMAIN_DOT = makeState(), // (B) domain followed by DOT
71-
S_PSS_TLD = makeState(T_URL), // (B) Simplest possible URL with no query string and a protocol
72-
S_PSS_TLD_COLON = makeState(), // (A) URL followed by colon (potential port number here)
73-
S_PSS_TLD_PORT = makeState(T_URL), // TLD followed by a port number
7466
S_URL = makeState(T_URL), // Long URL with optional port and maybe query string
7567
S_URL_SYMS = makeState(), // URL followed by some symbols (will not be part of the final URL)
7668
S_URL_OPENBRACE = makeState(), // URL followed by {
@@ -104,15 +96,16 @@ S_START.on(TT_TLD, S_DOMAIN);
10496
S_START.on(TT_DOMAIN, S_DOMAIN);
10597
S_START.on(TT_LOCALHOST, S_TLD);
10698
S_START.on(TT_NUM, S_DOMAIN);
107-
S_PROTOCOL_SLASH_SLASH.on(TT_TLD, S_PSS_DOMAIN);
108-
S_PROTOCOL_SLASH_SLASH.on(TT_DOMAIN, S_PSS_DOMAIN);
109-
S_PROTOCOL_SLASH_SLASH.on(TT_NUM, S_PSS_DOMAIN);
110-
S_PROTOCOL_SLASH_SLASH.on(TT_LOCALHOST, S_PSS_TLD);
99+
100+
// Force URL for anything sane followed by protocol
101+
S_PROTOCOL_SLASH_SLASH.on(TT_TLD, S_URL);
102+
S_PROTOCOL_SLASH_SLASH.on(TT_DOMAIN, S_URL);
103+
S_PROTOCOL_SLASH_SLASH.on(TT_NUM, S_URL);
104+
S_PROTOCOL_SLASH_SLASH.on(TT_LOCALHOST, S_URL);
111105

112106
// Account for dots and hyphens
113107
// hyphens are usually parts of domain names
114108
S_DOMAIN.on(TT_DOT, S_DOMAIN_DOT);
115-
S_PSS_DOMAIN.on(TT_DOT, S_PSS_DOMAIN_DOT);
116109
S_EMAIL_DOMAIN.on(TT_DOT, S_EMAIL_DOMAIN_DOT);
117110

118111
// Hyphen can jump back to a domain name
@@ -122,10 +115,6 @@ S_DOMAIN_DOT.on(TT_TLD, S_TLD);
122115
S_DOMAIN_DOT.on(TT_DOMAIN, S_DOMAIN);
123116
S_DOMAIN_DOT.on(TT_NUM, S_DOMAIN);
124117
S_DOMAIN_DOT.on(TT_LOCALHOST, S_DOMAIN);
125-
S_PSS_DOMAIN_DOT.on(TT_TLD, S_PSS_TLD);
126-
S_PSS_DOMAIN_DOT.on(TT_DOMAIN, S_PSS_DOMAIN);
127-
S_PSS_DOMAIN_DOT.on(TT_NUM, S_PSS_DOMAIN);
128-
S_PSS_DOMAIN_DOT.on(TT_LOCALHOST, S_PSS_DOMAIN);
129118
S_EMAIL_DOMAIN_DOT.on(TT_TLD, S_EMAIL);
130119
S_EMAIL_DOMAIN_DOT.on(TT_DOMAIN, S_EMAIL_DOMAIN);
131120
S_EMAIL_DOMAIN_DOT.on(TT_NUM, S_EMAIL_DOMAIN);
@@ -134,7 +123,6 @@ S_EMAIL_DOMAIN_DOT.on(TT_LOCALHOST, S_EMAIL_DOMAIN);
134123
// S_TLD accepts! But the URL could be longer, try to find a match greedily
135124
// The `run` function should be able to "rollback" to the accepting state
136125
S_TLD.on(TT_DOT, S_DOMAIN_DOT);
137-
S_PSS_TLD.on(TT_DOT, S_PSS_DOMAIN_DOT);
138126
S_EMAIL.on(TT_DOT, S_EMAIL_DOMAIN_DOT);
139127

140128
// Become real URLs after `SLASH` or `COLON NUM SLASH`
@@ -143,10 +131,6 @@ S_TLD.on(TT_COLON, S_TLD_COLON);
143131
S_TLD.on(TT_SLASH, S_URL);
144132
S_TLD_COLON.on(TT_NUM, S_TLD_PORT);
145133
S_TLD_PORT.on(TT_SLASH, S_URL);
146-
S_PSS_TLD.on(TT_COLON, S_PSS_TLD_COLON);
147-
S_PSS_TLD.on(TT_SLASH, S_URL);
148-
S_PSS_TLD_COLON.on(TT_NUM, S_PSS_TLD_PORT);
149-
S_PSS_TLD_PORT.on(TT_SLASH, S_URL);
150134
S_EMAIL.on(TT_COLON, S_EMAIL_COLON);
151135
S_EMAIL_COLON.on(TT_NUM, S_EMAIL_PORT);
152136

test/spec/linkify/core/parser-test.js

Lines changed: 10 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -40,8 +40,8 @@ var tests = [
4040
['This [', 'i.imgur.com/ckSj2Ba.jpg', ')] should also work']
4141
], [
4242
'A link is http://nick.is.awesome/?q=nick+amazing&nick=yo%29%30hellp another is http://nick.con/?q=look',
43-
[TEXT, URL, TEXT],
44-
['A link is ', 'http://nick.is', '.awesome/?q=nick+amazing&nick=yo%29%30hellp another is http://nick.con/?q=look']
43+
[TEXT, URL, TEXT, URL],
44+
['A link is ', 'http://nick.is.awesome/?q=nick+amazing&nick=yo%29%30hellp', ' another is ', 'http://nick.con/?q=look']
4545
], [
4646
'SOme URLS http://google.com https://google1.com google2.com google.com/search?q=potatoes+oven goo.gl/0192n1 google.com?q=asda test bit.ly/0912j www.bob.com indigo.dev.soapbox.co/mobile google.com/?q=.exe flickr.com/linktoimage.jpg',
4747
[TEXT, URL, TEXT, URL, TEXT, URL, TEXT, URL, TEXT, URL, TEXT, URL, TEXT, URL, TEXT, URL, TEXT, URL, TEXT, URL, TEXT, URL],
@@ -130,6 +130,14 @@ var tests = [
130130
'A really funky one (example.com/?id=asd2{hellow}and%20it%20continues(23&((@)) and it ends',
131131
[TEXT, URL, TEXT],
132132
['A really funky one (', 'example.com/?id=asd2{hellow}and%20it%20continues(23&((@)', ') and it ends']
133+
], [
134+
'Force http:/ and http:// are not but http://a and http://b.local?qeasd3qas=23 are all links',
135+
[TEXT, URL, TEXT, URL, TEXT],
136+
['Force http:/ and http:// are not but ', 'http://a', ' and ', 'http://b.local?qeasd3qas=23', ' are all links']
137+
], [
138+
'HTTP Auth URLs should work: http://username:[email protected]',
139+
[TEXT, URL],
140+
['HTTP Auth URLs should work: ', 'http://username:[email protected]']
133141
]
134142
];
135143

0 commit comments

Comments
 (0)