Skip to content

Commit 92d61be

Browse files
author
Nick Frasser
committed
Merge pull request #84 from SoapBox/html-string
New linkify-html interface
2 parents 82b87e0 + cca115d commit 92d61be

File tree

11 files changed

+268
-31
lines changed

11 files changed

+268
-31
lines changed

gulpfile.js

Lines changed: 24 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,8 @@ wrap = require('gulp-wrap');
2121

2222
var paths = {
2323
src: 'src/**/*.js',
24-
lib: 'lib/**/*.js',
24+
lib: ['lib/**/*.js'],
25+
libTest: ['lib/*.js', 'lib/linkify/**/*.js'],
2526
libCore: [
2627
'lib/linkify/core/*.js',
2728
'lib/linkify/utils/*.js',
@@ -99,38 +100,40 @@ gulp.task('build-core', ['babel'], function () {
99100
// Depends on build-core
100101
gulp.task('build-interfaces', ['babel-amd'], function () {
101102

102-
var stream, streams = [];
103-
104103
// Core linkify functionality as plugins
105104
var interface, interfaces = [
106105
'string',
107106
'element',
108-
['element', 'jquery'] // jQuery interface requires both element and jquery
107+
['linkify-element.js', 'jquery'], // jQuery interface requires both element and jquery
108+
[
109+
'simple-html-tokenizer/*.js',
110+
'simple-html-tokenizer.js',
111+
'html'
112+
]
109113
];
110114

111-
var files = {js: null, amd: null};
112-
113115
// Globals browser interface
114-
for (var i = 0; i < interfaces.length; i++) {
115-
interface = interfaces[i];
116+
var streams = [];
117+
118+
interfaces.forEach(function (interface) {
119+
120+
var files = {js: [], amd: []};
116121

117122
if (interface instanceof Array) {
118-
// Interface has dependencies
119-
files.js = [];
120-
files.amd = [];
121-
for (var j = 0; j < interface.length; j++) {
122-
files.js.push('src/linkify-' + interface[j] + '.js');
123-
files.amd.push('build/amd/linkify-' + interface[j] + '.js');
124-
}
123+
// Interface has other interface dependencies within this package
124+
interface.forEach(function (i, idx) {
125+
if (idx == interface.length - 1) { return; } // ignore last index
126+
files.js.push('src/' + i);
127+
files.amd.push('build/amd/' + i);
128+
});
125129

126130
// The last dependency is the name of the interface
127131
interface = interface.pop();
128-
129-
} else {
130-
files.js = 'src/linkify-' + interface + '.js';
131-
files.amd = 'build/amd/linkify-' + interface + '.js';
132132
}
133133

134+
files.js.push('src/linkify-' + interface + '.js');
135+
files.amd.push('build/amd/linkify-' + interface + '.js');
136+
134137
// Browser interface
135138
stream = gulp.src(files.js)
136139
.pipe(babel({
@@ -150,7 +153,7 @@ gulp.task('build-interfaces', ['babel-amd'], function () {
150153
.pipe(gulp.dest('build'));
151154

152155
streams.push(stream);
153-
}
156+
});
154157

155158
return merge.apply(this, streams);
156159
});
@@ -230,7 +233,7 @@ gulp.task('mocha', ['build'], function () {
230233
*/
231234
gulp.task('coverage', ['build'], function (cb) {
232235
// IMPORTANT: return not required here (and will actually cause bugs!)
233-
gulp.src(paths.lib)
236+
gulp.src(paths.libTest)
234237
.pipe(istanbul()) // Covering files
235238
.pipe(istanbul.hookRequire()) // Force `require` to return covered files
236239
.on('finish', function () {

html.js

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
module.exports = require('./lib/linkify-html');

package.json

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -27,7 +27,8 @@
2727
"mention",
2828
"mentions"
2929
],
30-
"dependencies": {},
30+
"dependencies": {
31+
},
3132
"devDependencies": {
3233
"amd-optimize": "^0.6.1",
3334
"brfs": "^1.4.1",
@@ -58,7 +59,8 @@
5859
"lodash": "^3.10.1",
5960
"merge-stream": "^1.0.0",
6061
"mocha": "^2.3.3",
61-
"phantomjs": "^1.9.18"
62+
"phantomjs": "^1.9.18",
63+
"simple-html-tokenizer": "https://github.com/nfrasser/simple-html-tokenizer.git#master"
6264
},
6365
"optionalDependencies": {
6466
"jquery": ">=1.9.0"

src/linkify-html.js

Lines changed: 181 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,181 @@
1+
import HTML5Tokenizer from './simple-html-tokenizer';
2+
import * as linkify from './linkify';
3+
4+
const StartTag = 'StartTag';
5+
const EndTag = 'EndTag';
6+
const Chars = 'Chars';
7+
const Comment = 'Comment';
8+
9+
/**
10+
`tokens` and `token` in this section refer to tokens generated by the HTML
11+
parser.
12+
*/
13+
export default function linkifyHtml(str, opts={}) {
14+
let tokens = HTML5Tokenizer.tokenize(str);
15+
let linkifiedTokens = [];
16+
let linkified = [];
17+
var i;
18+
19+
opts = linkify.options.normalize(opts);
20+
21+
// Linkify the tokens given by the parser
22+
for (i = 0; i < tokens.length; i++) {
23+
let token = tokens[i];
24+
25+
if (token.type === StartTag && token.tagName.toUpperCase() === 'A') {
26+
// Ignore all the contents of an anchor tag
27+
let preskipLen = linkifiedTokens.length;
28+
skipTokens('A', tokens, ++i, linkifiedTokens);
29+
30+
i += linkifiedTokens.length - preskipLen;
31+
continue;
32+
33+
} else if (token.type !== Chars) {
34+
// Skip this token, it's not important
35+
linkifiedTokens.push(token);
36+
continue;
37+
}
38+
39+
// Valid text token, linkify it!
40+
let linkifedChars = linkifyChars(token.chars, opts);
41+
linkifiedTokens.push.apply(linkifiedTokens, linkifedChars);
42+
}
43+
44+
// Convert the tokens back into a string
45+
for (i = 0; i < linkifiedTokens.length; i++) {
46+
let token = linkifiedTokens[i];
47+
switch (token.type) {
48+
case StartTag:
49+
let attrs = attrsToStrings(token.attributes);
50+
let link = '<' + token.tagName;
51+
if (attrs.length > 0) { link += ' ' + attrs.join(' '); }
52+
link += '>';
53+
linkified.push(link);
54+
break;
55+
case EndTag:
56+
linkified.push(`</${token.tagName}>`);
57+
break;
58+
case Chars:
59+
linkified.push(escapeText(token.chars));
60+
break;
61+
case Comment:
62+
linkified.push(`<!--${escapeText(token.chars)}-->`);
63+
break;
64+
}
65+
}
66+
67+
return linkified.join('');
68+
}
69+
70+
/**
71+
`tokens` and `token` in this section referes to tokens returned by
72+
`linkify.tokenize`. `linkified` will contain HTML Parser-style tokens
73+
*/
74+
function linkifyChars(str, opts) {
75+
let tokens = linkify.tokenize(str);
76+
let result = [];
77+
78+
for (var i = 0; i < tokens.length; i++) {
79+
let token = tokens[i];
80+
if (token.type === 'nl' && opts.nl2br) {
81+
result.push({
82+
type: StartTag,
83+
tagName: 'br',
84+
attributes: [],
85+
selfClosing: true
86+
});
87+
continue;
88+
} else if (!token.isLink) {
89+
result.push({type: Chars, chars: token.toString()});
90+
continue;
91+
}
92+
93+
let href = token.toHref(opts.defaultProtocol);
94+
let formatted = linkify.options.resolve(opts.format, token.toString(), token.type);
95+
let formattedHref = linkify.options.resolve(opts.formatHref, href, token.type);
96+
let attributesHash = linkify.options.resolve(opts.attributes, href, token.type);
97+
let tagName = linkify.options.resolve(opts.tagName, href, token.type);
98+
let linkClass = linkify.options.resolve(opts.linkClass, href, token.type);
99+
let target = linkify.options.resolve(opts.target, href, token.type);
100+
101+
// Build up attributes
102+
let attributes = [
103+
['href', formattedHref],
104+
['class', linkClass]
105+
];
106+
107+
if (target) {
108+
attributes.push(['target', target]);
109+
}
110+
111+
for (var attr in attributesHash) {
112+
attributes.push([attr, attributesHash[attr]]);
113+
}
114+
115+
// Add the required tokens
116+
result.push({
117+
type: StartTag,
118+
tagName: tagName,
119+
attributes: attributes,
120+
selfClosing: false
121+
});
122+
result.push({type: Chars, chars: formatted});
123+
result.push({type: EndTag, tagName: tagName});
124+
}
125+
126+
return result;
127+
}
128+
129+
/**
130+
Returns a list of tokens skipped until the closing tag of tagName.
131+
132+
* `tagName` is the closing tag which will prompt us to stop skipping
133+
* `tokens` is the array of tokens generated by HTML5Tokenizer which
134+
* `i` is the index immediately after the opening tag to skip
135+
* `skippedTokens` is an array which skipped tokens are being pushed into
136+
137+
Caveats
138+
139+
* Assumes that i is the first token after the given opening tagName
140+
* The closing tag will be skipped, but nothing after it
141+
* Will track whether there is a nested tag of the same type
142+
*/
143+
function skipTagTokens(tagName, tokens, i, skippedTokens) {
144+
145+
// number of tokens of this type on the [fictional] stack
146+
var stackCount = 1;
147+
148+
while (i < tokens.length && stackCount > 0) {
149+
let token = tokens[i];
150+
if (token.type === StartTag && token.tagName.toUpperCase() === tagName) {
151+
// Nested tag of the same type, "add to stack"
152+
stackCount++;
153+
} else if (token.type === EndTag && token.tagName.toUpperCase() === tagName) {
154+
// Closing tag
155+
stackCount--;
156+
}
157+
skippedTokens.push(token);
158+
i++;
159+
}
160+
161+
// Note that if stackCount > 0 here, the HTML is probably invalid
162+
return skippedTokens;
163+
}
164+
165+
function escapeText(text) {
166+
// Not required, HTML tokenizer ensures this occurs properly
167+
return text;
168+
}
169+
170+
function escapeAttr(attr) {
171+
return attr.replace(/"/g, '&quot;');
172+
}
173+
174+
function attrsToStrings(attrs) {
175+
let attrStrs = [];
176+
for (let i = 0; i < attrs.length; i++) {
177+
let [name, value] = attrs[i];
178+
attrStrs.push(`${name}="${escapeAttr(value)}"`);
179+
}
180+
return attrStrs;
181+
}

src/linkify-string.js

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -4,14 +4,14 @@
44

55
import {tokenize, options} from './linkify';
66

7-
function cleanText(text) {
7+
function escapeText(text) {
88
return text
99
.replace(/&/g, '&amp;')
1010
.replace(/</g, '&lt;')
1111
.replace(/>/g, '&gt;');
1212
}
1313

14-
function cleanAttr(href) {
14+
function escapeAttr(href) {
1515
return href.replace(/"/g, '&quot;');
1616
}
1717

@@ -22,7 +22,7 @@ function attributesToString(attributes) {
2222

2323
for (let attr in attributes) {
2424
let val = (attributes[attr] + '').replace(/"/g, '&quot;');
25-
result.push(`${attr}="${cleanAttr(val)}"`);
25+
result.push(`${attr}="${escapeAttr(val)}"`);
2626
}
2727
return result.join(' ');
2828
}
@@ -35,7 +35,7 @@ function linkifyStr(str, opts={}) {
3535
tokens = tokenize(str),
3636
result = [];
3737

38-
for (let i = 0; i < tokens.length; i++ ) {
38+
for (let i = 0; i < tokens.length; i++) {
3939
let token = tokens[i];
4040
if (token.isLink) {
4141

@@ -48,16 +48,16 @@ function linkifyStr(str, opts={}) {
4848
linkClass = options.resolve(opts.linkClass, href, token.type),
4949
target = options.resolve(opts.target, href, token.type);
5050

51-
let link = `<${tagName} href="${cleanAttr(formattedHref)}" class="${cleanAttr(linkClass)}"`;
51+
let link = `<${tagName} href="${escapeAttr(formattedHref)}" class="${escapeAttr(linkClass)}"`;
5252
if (target) {
53-
link += ` target="${cleanAttr(target)}"`;
53+
link += ` target="${escapeAttr(target)}"`;
5454
}
5555

5656
if (attributesHash) {
5757
link += ` ${attributesToString(attributesHash)}`;
5858
}
5959

60-
link += `>${cleanText(formatted)}</${tagName}>`;
60+
link += `>${escapeText(formatted)}</${tagName}>`;
6161
result.push(link);
6262

6363
} else if (token.type === 'nl' && opts.nl2br) {
@@ -67,7 +67,7 @@ function linkifyStr(str, opts={}) {
6767
result.push('<br>\n');
6868
}
6969
} else {
70-
result.push(cleanText(token.toString()));
70+
result.push(escapeText(token.toString()));
7171
}
7272
}
7373

src/simple-html-tokenizer

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
../node_modules/simple-html-tokenizer/lib/simple-html-tokenizer

src/simple-html-tokenizer.js

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,15 @@
1+
import HTML5NamedCharRefs from './simple-html-tokenizer/html5-named-char-refs';
2+
import EntityParser from './simple-html-tokenizer/entity-parser';
3+
import EventedTokenizer from './simple-html-tokenizer/evented-tokenizer';
4+
import Tokenizer from './simple-html-tokenizer/tokenizer';
5+
import tokenize from './simple-html-tokenizer/tokenize';
6+
7+
var HTML5Tokenizer = {
8+
HTML5NamedCharRefs,
9+
EntityParser,
10+
EventedTokenizer,
11+
Tokenizer,
12+
tokenize,
13+
};
14+
15+
export default HTML5Tokenizer;

templates/linkify-html.amd.js

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
<%= contents %>

templates/linkify-html.js

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
;(function (linkify) {
2+
"use strict";
3+
<%= contents %>
4+
window.linkifyHtml = linkifyHtml;
5+
})(window.linkify);

0 commit comments

Comments
 (0)