@@ -43,31 +43,37 @@ export function init({ groups }) {
4343 tk . SLASH ,
4444 tk . SYM ,
4545 tk . TILDE ,
46- tk . UNDERSCORE
46+ tk . UNDERSCORE ,
4747 ] ) ;
4848
4949 // Types of tokens that can follow a URL and be part of the query string
5050 // but cannot be the very last characters
5151 // Characters that cannot appear in the URL at all should be excluded
5252 const qsNonAccepting = [
5353 tk . APOSTROPHE ,
54- tk . CLOSEANGLEBRACKET ,
55- tk . CLOSEBRACE ,
56- tk . CLOSEBRACKET ,
57- tk . CLOSEPAREN ,
58- tk . FULLWIDTH_CLOSEPAREN ,
5954 tk . COLON ,
6055 tk . COMMA ,
6156 tk . DOT ,
6257 tk . EXCLAMATION ,
58+ tk . QUERY ,
59+ tk . QUOTE ,
60+ tk . SEMI ,
6361 tk . OPENANGLEBRACKET ,
62+ tk . CLOSEANGLEBRACKET ,
6463 tk . OPENBRACE ,
64+ tk . CLOSEBRACE ,
65+ tk . CLOSEBRACKET ,
6566 tk . OPENBRACKET ,
6667 tk . OPENPAREN ,
67- tk . FULLWIDTH_OPENPAREN ,
68- tk . QUERY ,
69- tk . QUOTE ,
70- tk . SEMI
68+ tk . CLOSEPAREN ,
69+ tk . FULLWIDTHLEFTPAREN ,
70+ tk . FULLWIDTHRIGHTPAREN ,
71+ tk . LEFTCORNERBRACKET ,
72+ tk . RIGHTCORNERBRACKET ,
73+ tk . LEFTWHITECORNERBRACKET ,
74+ tk . RIGHTWHITECORNERBRACKET ,
75+ tk . FULLWIDTHLESSTHAN ,
76+ tk . FULLWIDTHGREATERTHAN ,
7177 ] ;
7278
7379 // For addresses without the mailto prefix
@@ -79,11 +85,11 @@ export function init({ groups }) {
7985 tk . BACKSLASH ,
8086 tk . BACKTICK ,
8187 tk . CARET ,
82- tk . CLOSEBRACE ,
8388 tk . DOLLAR ,
8489 tk . EQUALS ,
8590 tk . HYPHEN ,
8691 tk . OPENBRACE ,
92+ tk . CLOSEBRACE ,
8793 tk . PERCENT ,
8894 tk . PIPE ,
8995 tk . PLUS ,
@@ -92,7 +98,7 @@ export function init({ groups }) {
9298 tk . SLASH ,
9399 tk . SYM ,
94100 tk . TILDE ,
95- tk . UNDERSCORE
101+ tk . UNDERSCORE ,
96102 ] ;
97103
98104 // The universal starting state.
@@ -104,7 +110,9 @@ export function init({ groups }) {
104110 ta ( Localpart , localpartAccepting , Localpart ) ;
105111 ta ( Localpart , groups . domain , Localpart ) ;
106112
107- const Domain = makeState ( ) , Scheme = makeState ( ) , SlashScheme = makeState ( ) ;
113+ const Domain = makeState ( ) ,
114+ Scheme = makeState ( ) ,
115+ SlashScheme = makeState ( ) ;
108116 ta ( Start , groups . domain , Domain ) ; // parsed string ends with a potential domain name (A)
109117 ta ( Start , groups . scheme , Scheme ) ; // e.g., 'mailto'
110118 ta ( Start , groups . slashscheme , SlashScheme ) ; // e.g., 'http'
@@ -144,7 +152,7 @@ export function init({ groups }) {
144152
145153 // Final possible email states
146154 const EmailColon = tt ( Email , tk . COLON ) ; // URL followed by colon (potential port number here)
147- /*const EmailColonPort = */ ta ( EmailColon , groups . numeric , mtk . Email ) ; // URL followed by colon and port numner
155+ /*const EmailColonPort = */ ta ( EmailColon , groups . numeric , mtk . Email ) ; // URL followed by colon and port number
148156
149157 // Account for dots and hyphens. Hyphens are usually parts of domain names
150158 // (but not TLDs)
@@ -206,86 +214,46 @@ export function init({ groups }) {
206214 ta ( UriPrefix , qsAccepting , Url ) ;
207215 tt ( UriPrefix , tk . SLASH , Url ) ;
208216
209- // URL, followed by an opening bracket
210- const UrlOpenbrace = tt ( Url , tk . OPENBRACE ) ; // URL followed by {
211- const UrlOpenbracket = tt ( Url , tk . OPENBRACKET ) ; // URL followed by [
212- const UrlOpenanglebracket = tt ( Url , tk . OPENANGLEBRACKET ) ; // URL followed by <
213- const UrlOpenparen = tt ( Url , tk . OPENPAREN ) ; // URL followed by (
214- const UrlFullwidthOpenparen = tt ( Url , tk . FULLWIDTH_OPENPAREN ) ; // URL followed by (
215-
216- tt ( UrlNonaccept , tk . OPENBRACE , UrlOpenbrace ) ;
217- tt ( UrlNonaccept , tk . OPENBRACKET , UrlOpenbracket ) ;
218- tt ( UrlNonaccept , tk . OPENANGLEBRACKET , UrlOpenanglebracket ) ;
219- tt ( UrlNonaccept , tk . OPENPAREN , UrlOpenparen ) ;
220- tt ( UrlNonaccept , tk . FULLWIDTH_OPENPAREN , UrlFullwidthOpenparen ) ;
221-
222- // Closing bracket component. This character WILL be included in the URL
223- tt ( UrlOpenbrace , tk . CLOSEBRACE , Url ) ;
224- tt ( UrlOpenbracket , tk . CLOSEBRACKET , Url ) ;
225- tt ( UrlOpenanglebracket , tk . CLOSEANGLEBRACKET , Url ) ;
226- tt ( UrlOpenparen , tk . CLOSEPAREN , Url ) ;
227- tt ( UrlFullwidthOpenparen , tk . FULLWIDTH_CLOSEPAREN , Url ) ;
228- tt ( UrlOpenbrace , tk . CLOSEBRACE , Url ) ;
229-
230- // URL that beings with an opening bracket, followed by a symbols.
231- // Note that the final state can still be `UrlOpenbrace` (if the URL only
232- // has a single opening bracket for some reason).
233- const UrlOpenbraceQ = makeState ( mtk . Url ) ; // URL followed by { and some symbols that the URL can end it
234- const UrlOpenbracketQ = makeState ( mtk . Url ) ; // URL followed by [ and some symbols that the URL can end it
235- const UrlOpenanglebracketQ = makeState ( mtk . Url ) ; // URL followed by < and some symbols that the URL can end it
236- const UrlOpenparenQ = makeState ( mtk . Url ) ; // URL followed by ( and some symbols that the URL can end it
237- const UrlFullwidthOpenparenQ = makeState ( mtk . Url ) ; // URL followed by ( and some symbols that the URL can end it
238- ta ( UrlOpenbrace , qsAccepting , UrlOpenbraceQ ) ;
239- ta ( UrlOpenbracket , qsAccepting , UrlOpenbracketQ ) ;
240- ta ( UrlOpenanglebracket , qsAccepting , UrlOpenanglebracketQ ) ;
241- ta ( UrlOpenparen , qsAccepting , UrlOpenparenQ ) ;
242- ta ( UrlFullwidthOpenparen , qsAccepting , UrlFullwidthOpenparenQ ) ;
243-
244- const UrlOpenbraceSyms = makeState ( ) ; // UrlOpenbrace followed by some symbols it cannot end it
245- const UrlOpenbracketSyms = makeState ( ) ; // UrlOpenbracketQ followed by some symbols it cannot end it
246- const UrlOpenanglebracketSyms = makeState ( ) ; // UrlOpenanglebracketQ followed by some symbols it cannot end it
247- const UrlOpenparenSyms = makeState ( ) ; // UrlOpenparenQ followed by some symbols it cannot end it
248- const UrlFullwidthOpenparenSyms = makeState ( ) ; // UrlFullwidthOpenparenQ followed by some symbols it cannot end it
249- ta ( UrlOpenbrace , qsNonAccepting ) ;
250- ta ( UrlOpenbracket , qsNonAccepting ) ;
251- ta ( UrlOpenanglebracket , qsNonAccepting ) ;
252- ta ( UrlOpenparen , qsNonAccepting ) ;
253- ta ( UrlFullwidthOpenparen , qsNonAccepting ) ;
254-
255- // URL that begins with an opening bracket, followed by some symbols
256- ta ( UrlOpenbraceQ , qsAccepting , UrlOpenbraceQ ) ;
257- ta ( UrlOpenbracketQ , qsAccepting , UrlOpenbracketQ ) ;
258- ta ( UrlOpenanglebracketQ , qsAccepting , UrlOpenanglebracketQ ) ;
259- ta ( UrlOpenparenQ , qsAccepting , UrlOpenparenQ ) ;
260- ta ( UrlFullwidthOpenparenQ , qsAccepting , UrlFullwidthOpenparenQ ) ;
261- ta ( UrlOpenbraceQ , qsNonAccepting , UrlOpenbraceQ ) ;
262- ta ( UrlOpenbracketQ , qsNonAccepting , UrlOpenbracketQ ) ;
263- ta ( UrlOpenanglebracketQ , qsNonAccepting , UrlOpenanglebracketQ ) ;
264- ta ( UrlOpenparenQ , qsNonAccepting , UrlOpenparenQ ) ;
265- ta ( UrlFullwidthOpenparenQ , qsAccepting , UrlFullwidthOpenparenQ ) ;
266-
267- ta ( UrlOpenbraceSyms , qsAccepting , UrlOpenbraceSyms ) ;
268- ta ( UrlOpenbracketSyms , qsAccepting , UrlOpenbracketQ ) ;
269- ta ( UrlOpenanglebracketSyms , qsAccepting , UrlOpenanglebracketQ ) ;
270- ta ( UrlOpenparenSyms , qsAccepting , UrlOpenparenQ ) ;
271- ta ( UrlFullwidthOpenparenSyms , qsAccepting , UrlFullwidthOpenparenQ ) ;
272- ta ( UrlOpenbraceSyms , qsNonAccepting , UrlOpenbraceSyms ) ;
273- ta ( UrlOpenbracketSyms , qsNonAccepting , UrlOpenbracketSyms ) ;
274- ta ( UrlOpenanglebracketSyms , qsNonAccepting , UrlOpenanglebracketSyms ) ;
275- ta ( UrlOpenparenSyms , qsNonAccepting , UrlOpenparenSyms ) ;
276- ta ( UrlFullwidthOpenparenSyms , qsAccepting , UrlFullwidthOpenparenSyms ) ;
277-
278- // Close brace/bracket to become regular URL
279- tt ( UrlOpenbracketQ , tk . CLOSEBRACKET , Url ) ;
280- tt ( UrlOpenanglebracketQ , tk . CLOSEANGLEBRACKET , Url ) ;
281- tt ( UrlOpenparenQ , tk . CLOSEPAREN , Url ) ;
282- tt ( UrlFullwidthOpenparenQ , tk . FULLWIDTH_CLOSEPAREN , Url ) ;
283- tt ( UrlOpenbraceQ , tk . CLOSEBRACE , Url ) ;
284- tt ( UrlOpenbracketSyms , tk . CLOSEBRACKET , Url ) ;
285- tt ( UrlOpenanglebracketSyms , tk . CLOSEANGLEBRACKET , Url ) ;
286- tt ( UrlFullwidthOpenparenSyms , tk . FULLWIDTH_CLOSEPAREN , Url ) ;
287- tt ( UrlOpenbraceSyms , tk . CLOSEPAREN , Url ) ;
288- tt ( UrlOpenbraceSyms , tk . FULLWIDTH_CLOSEPAREN , Url ) ;
217+ const bracketPairs = [
218+ [ tk . OPENBRACE , tk . CLOSEBRACE ] , // {}
219+ [ tk . OPENBRACKET , tk . CLOSEBRACKET ] , // []
220+ [ tk . OPENPAREN , tk . CLOSEPAREN ] , // ()
221+ [ tk . OPENANGLEBRACKET , tk . CLOSEANGLEBRACKET ] , // <>
222+ [ tk . FULLWIDTHLEFTPAREN , tk . FULLWIDTHRIGHTPAREN ] , // ()
223+ [ tk . LEFTCORNERBRACKET , tk . RIGHTCORNERBRACKET ] , // 「」
224+ [ tk . LEFTWHITECORNERBRACKET , tk . RIGHTWHITECORNERBRACKET ] , // 『』
225+ [ tk . FULLWIDTHLESSTHAN , tk . FULLWIDTHGREATERTHAN ] , // <>
226+ ] ;
227+
228+ for ( let i = 0 ; i < bracketPairs . length ; i ++ ) {
229+ const [ OPEN , CLOSE ] = bracketPairs [ i ] ;
230+ const UrlOpen = tt ( Url , OPEN ) ; // URL followed by open bracket
231+
232+ // Continue not accepting for open brackets
233+ tt ( UrlNonaccept , OPEN , UrlOpen ) ;
234+
235+ // Closing bracket component. This character WILL be included in the URL
236+ tt ( UrlOpen , CLOSE , Url ) ;
237+
238+ // URL that beings with an opening bracket, followed by a symbols.
239+ // Note that the final state can still be `UrlOpen` (if the URL has a
240+ // single opening bracket for some reason).
241+ const UrlOpenQ = makeState ( mtk . Url ) ;
242+ ta ( UrlOpen , qsAccepting , UrlOpenQ ) ;
243+
244+ const UrlOpenSyms = makeState ( ) ; // UrlOpen followed by some symbols it cannot end it
245+ ta ( UrlOpen , qsNonAccepting ) ;
246+
247+ // URL that begins with an opening bracket, followed by some symbols
248+ ta ( UrlOpenQ , qsAccepting , UrlOpenQ ) ;
249+ ta ( UrlOpenQ , qsNonAccepting , UrlOpenSyms ) ;
250+ ta ( UrlOpenSyms , qsAccepting , UrlOpenQ ) ;
251+ ta ( UrlOpenSyms , qsNonAccepting , UrlOpenSyms ) ;
252+
253+ // Close brace/bracket to become regular URL
254+ tt ( UrlOpenQ , CLOSE , Url ) ;
255+ tt ( UrlOpenSyms , CLOSE , Url ) ;
256+ }
289257
290258 tt ( Start , tk . LOCALHOST , DomainDotTld ) ; // localhost is a valid URL state
291259 tt ( Start , tk . NL , mtk . Nl ) ; // single new line
@@ -323,10 +291,7 @@ export function run(start, input, tokens) {
323291 textTokens . push ( tokens [ cursor ++ ] ) ;
324292 }
325293
326- while ( cursor < len && (
327- nextState = secondState || state . go ( tokens [ cursor ] . t ) )
328- ) {
329-
294+ while ( cursor < len && ( nextState = secondState || state . go ( tokens [ cursor ] . t ) ) ) {
330295 // Get the next state
331296 secondState = null ;
332297 state = nextState ;
0 commit comments