@@ -54,9 +54,6 @@ let S_START = makeState();
5454
5555// Intermediate states for URLs. Note that domains that begin with a protocol
5656// are treated slighly differently from those that don't.
57- // (PSS == "PROTOCOL SLASH SLASH")
58- // S_DOMAIN* states can generally become prefixes for email addresses, while
59- // S_PSS_DOMAIN* cannot
6057let
6158S_PROTOCOL = makeState ( ) , // e.g., 'http:'
6259S_PROTOCOL_SLASH = makeState ( ) , // e.g., '/', 'http:/''
@@ -66,11 +63,6 @@ S_DOMAIN_DOT = makeState(), // (A) domain followed by DOT
6663S_TLD = makeState ( T_URL ) , // (A) Simplest possible URL with no query string
6764S_TLD_COLON = makeState ( ) , // (A) URL followed by colon (potential port number here)
6865S_TLD_PORT = makeState ( T_URL ) , // TLD followed by a port number
69- S_PSS_DOMAIN = makeState ( ) , // parsed string starts with protocol and ends with a potential domain name (B)
70- S_PSS_DOMAIN_DOT = makeState ( ) , // (B) domain followed by DOT
71- S_PSS_TLD = makeState ( T_URL ) , // (B) Simplest possible URL with no query string and a protocol
72- S_PSS_TLD_COLON = makeState ( ) , // (A) URL followed by colon (potential port number here)
73- S_PSS_TLD_PORT = makeState ( T_URL ) , // TLD followed by a port number
7466S_URL = makeState ( T_URL ) , // Long URL with optional port and maybe query string
7567S_URL_SYMS = makeState ( ) , // URL followed by some symbols (will not be part of the final URL)
7668S_URL_OPENBRACE = makeState ( ) , // URL followed by {
@@ -104,15 +96,16 @@ S_START.on(TT_TLD, S_DOMAIN);
10496S_START . on ( TT_DOMAIN , S_DOMAIN ) ;
10597S_START . on ( TT_LOCALHOST , S_TLD ) ;
10698S_START . on ( TT_NUM , S_DOMAIN ) ;
107- S_PROTOCOL_SLASH_SLASH . on ( TT_TLD , S_PSS_DOMAIN ) ;
108- S_PROTOCOL_SLASH_SLASH . on ( TT_DOMAIN , S_PSS_DOMAIN ) ;
109- S_PROTOCOL_SLASH_SLASH . on ( TT_NUM , S_PSS_DOMAIN ) ;
110- S_PROTOCOL_SLASH_SLASH . on ( TT_LOCALHOST , S_PSS_TLD ) ;
99+
100+ // Force URL for anything sane followed by protocol
101+ S_PROTOCOL_SLASH_SLASH . on ( TT_TLD , S_URL ) ;
102+ S_PROTOCOL_SLASH_SLASH . on ( TT_DOMAIN , S_URL ) ;
103+ S_PROTOCOL_SLASH_SLASH . on ( TT_NUM , S_URL ) ;
104+ S_PROTOCOL_SLASH_SLASH . on ( TT_LOCALHOST , S_URL ) ;
111105
112106// Account for dots and hyphens
113107// hyphens are usually parts of domain names
114108S_DOMAIN . on ( TT_DOT , S_DOMAIN_DOT ) ;
115- S_PSS_DOMAIN . on ( TT_DOT , S_PSS_DOMAIN_DOT ) ;
116109S_EMAIL_DOMAIN . on ( TT_DOT , S_EMAIL_DOMAIN_DOT ) ;
117110
118111// Hyphen can jump back to a domain name
@@ -122,10 +115,6 @@ S_DOMAIN_DOT.on(TT_TLD, S_TLD);
122115S_DOMAIN_DOT . on ( TT_DOMAIN , S_DOMAIN ) ;
123116S_DOMAIN_DOT . on ( TT_NUM , S_DOMAIN ) ;
124117S_DOMAIN_DOT . on ( TT_LOCALHOST , S_DOMAIN ) ;
125- S_PSS_DOMAIN_DOT . on ( TT_TLD , S_PSS_TLD ) ;
126- S_PSS_DOMAIN_DOT . on ( TT_DOMAIN , S_PSS_DOMAIN ) ;
127- S_PSS_DOMAIN_DOT . on ( TT_NUM , S_PSS_DOMAIN ) ;
128- S_PSS_DOMAIN_DOT . on ( TT_LOCALHOST , S_PSS_DOMAIN ) ;
129118S_EMAIL_DOMAIN_DOT . on ( TT_TLD , S_EMAIL ) ;
130119S_EMAIL_DOMAIN_DOT . on ( TT_DOMAIN , S_EMAIL_DOMAIN ) ;
131120S_EMAIL_DOMAIN_DOT . on ( TT_NUM , S_EMAIL_DOMAIN ) ;
@@ -134,7 +123,6 @@ S_EMAIL_DOMAIN_DOT.on(TT_LOCALHOST, S_EMAIL_DOMAIN);
134123// S_TLD accepts! But the URL could be longer, try to find a match greedily
135124// The `run` function should be able to "rollback" to the accepting state
136125S_TLD . on ( TT_DOT , S_DOMAIN_DOT ) ;
137- S_PSS_TLD . on ( TT_DOT , S_PSS_DOMAIN_DOT ) ;
138126S_EMAIL . on ( TT_DOT , S_EMAIL_DOMAIN_DOT ) ;
139127
140128// Become real URLs after `SLASH` or `COLON NUM SLASH`
@@ -143,10 +131,6 @@ S_TLD.on(TT_COLON, S_TLD_COLON);
143131S_TLD . on ( TT_SLASH , S_URL ) ;
144132S_TLD_COLON . on ( TT_NUM , S_TLD_PORT ) ;
145133S_TLD_PORT . on ( TT_SLASH , S_URL ) ;
146- S_PSS_TLD . on ( TT_COLON , S_PSS_TLD_COLON ) ;
147- S_PSS_TLD . on ( TT_SLASH , S_URL ) ;
148- S_PSS_TLD_COLON . on ( TT_NUM , S_PSS_TLD_PORT ) ;
149- S_PSS_TLD_PORT . on ( TT_SLASH , S_URL ) ;
150134S_EMAIL . on ( TT_COLON , S_EMAIL_COLON ) ;
151135S_EMAIL_COLON . on ( TT_NUM , S_EMAIL_PORT ) ;
152136
0 commit comments