我强烈建议不要使用正则表达式,而是使用 DOM API 来解析和删除 HTML 元素,并使用可能保留的元素白名单:
function stripHTML(opts) {
// The default settings for the function, can be overridden
// by the user,
// HTML: String of text/HTML from which the HTML elements
// should be removed.
// allowedHTML: Array of Strings, the HTML elements that are
// permitted to remain within the returned HTML string.
var settings = {
'html': null,
'allowedHTML': ['h2', 'br']
},
// creating an element for containing the supplied String
// of content in order for it to be parsed:
temp = document.createElement('div'),
// uninitialised variables for later use:
allowedHTML,
elementNodes,
parent;
// Iterating over the keys of the opts Object if one has
// been supplied, otherwise we iterate over the empty
// object-literal to prevent an error being thrown:
Object.keys(opts || {}).forEach(function(key) {
// here we update the settings Object with the
// properties, and property-values, from the
// opts Object (if supplied):
settings[key] = opts[key];
});
// if we have a settings.html property-value, and
// settings.html is a String:
if (settings.html && 'string' === typeof settings.html) {
// assign the settings.html String as the innerHTML of
// the created-element:
temp.innerHTML = settings.html;
// retrieve all elements from the created-element using
// the universal selector ('*') from CSS and converting
// the resulting Array-like collection into an Array,
// using Array.from():
elementNodes = Array.from(temp.querySelectorAll('*'));
// here we ensure that the Array of elements is of the
// type ['h1','span'] not ['<h1>','<span>'] by iterating
// over the array of settings.allowedHTML and returning
// a new Array of its elements using Array.prototype.map():
allowedHTML = settings.allowedHTML.map(function(el) {
// 'el' the first argument is a reference to the
// current Array-element of the Array over which
// we're iterating.
// returning the string having first removed all ('g')
// incidences of '<' or ('|') '>' from said string:
return el.replace(/<|>/g, '');
});
// iterating over the elementNodes Array:
elementNodes.forEach(function(node) {
// 'node' is (again) a reference to the current
// Array-element of the Array over which we're
// iterating.
// caching a reference to the parentNode of the
// current element:
parent = node.parentNode;
// if the node's localName (same as tagName, but
// lower-case) is not found in the Array of allowed HTML:
if (settings.allowedHTML.indexOf(node.localName) === -1) {
// while the node has a firstChild:
while (node.firstChild) {
// we insert that firstChild into the
// node's parentNode ahead of the node itself:
parent.insertBefore(node.firstChild, node);
}
// removing the node from the parent:
parent.removeChild(node);
}
});
// here we return the innerHTML of the created-element,
// having trimmed its leading and trailing white-space:
return temp.innerHTML.trim();
}
}
console.log(stripHTML({
'html': "jQuery is a JavaScript library.<br>And is the most widely-used such library (at this time)"
}));
// => jQuery is a JavaScript library.<br>And is the most widely-used such library (at this time).
function stripHTML(opts) {
var settings = {
'html': null,
'allowedHTML': ['h2', 'br']
},
temp = document.createElement('div'),
allowedHTML,
elementNodes,
parent;
Object.keys(opts || {}).forEach(function(key) {
settings[key] = opts[key];
});
if (settings.html && 'string' === typeof settings.html) {
temp.innerHTML = settings.html;
elementNodes = Array.from(temp.querySelectorAll('*'));
allowedHTML = settings.allowedHTML.map(function(el) {
return el.replace(/<|>/g, '');
});
elementNodes.forEach(function(node) {
parent = node.parentNode;
if (settings.allowedHTML.indexOf(node.localName) === -1) {
while (node.firstChild) {
parent.insertBefore(node.firstChild, node);
}
parent.removeChild(node);
}
});
return temp.innerHTML.trim();
}
}
console.log(stripHTML({
'html': "jQuery is a JavaScript library.<br>And is the most widely-used such library (at this time). "
}));
JS Fiddle demo.
上面允许allowedHTML 的空数组,这会导致该函数删除所有 HTML 标记(来自一些有限的测试):
console.log(stripHTML({
'html': "jQuery is a JavaScript library.<br>And is the most widely-used such library (at this time). ",
'allowedHTML': []
}));
// => jQuery is a JavaScript library.And is the most widely-used such library (at this time).
function stripHTML(opts) {
var settings = {
'html': null,
'allowedHTML': ['h2', 'br']
},
temp = document.createElement('div'),
allowedHTML,
elementNodes,
parent;
Object.keys(opts || {}).forEach(function(key) {
settings[key] = opts[key];
});
if (settings.html && 'string' === typeof settings.html) {
temp.innerHTML = settings.html;
elementNodes = Array.from(temp.querySelectorAll('*'));
allowedHTML = settings.allowedHTML.map(function(el) {
return el.replace(/<|>/g, '');
});
elementNodes.forEach(function(node) {
parent = node.parentNode;
if (settings.allowedHTML.indexOf(node.localName) === -1) {
while (node.firstChild) {
parent.insertBefore(node.firstChild, node);
}
parent.removeChild(node);
}
});
return temp.innerHTML.trim();
}
}
console.log(stripHTML({
'html': "jQuery is a JavaScript library.<br>And is the most widely-used such library (at this time).",
'allowedHTML': []
}));
JS Fiddle demo.
似乎可以可靠地应对——只要任何浏览器都能够应对——无效的 HTML,例如未打开的元素或“重叠”的元素(第一个打开元素的结束标记出现在第二个打开的元素的结束标记之前-打开元素):
console.log(stripHTML({
'html': "<div><h1>jQuery</div> is a JavaScript library.</h1><br>And is the most widely-used such library (at this time). "
}));
// => jQuery is a JavaScript library.<br>And is the most widely-used such library (at this time).
function stripHTML(opts) {
var settings = {
'html': null,
'allowedHTML': ['h2', 'br']
},
temp = document.createElement('div'),
allowedHTML,
elementNodes,
parent;
Object.keys(opts || {}).forEach(function(key) {
settings[key] = opts[key];
});
if (settings.html && 'string' === typeof settings.html) {
temp.innerHTML = settings.html;
elementNodes = Array.from(temp.querySelectorAll('*'));
allowedHTML = settings.allowedHTML.map(function(el) {
return el.replace(/<|>/g, '');
});
elementNodes.forEach(function(node) {
parent = node.parentNode;
if (settings.allowedHTML.indexOf(node.localName) === -1) {
while (node.firstChild) {
parent.insertBefore(node.firstChild, node);
}
parent.removeChild(node);
}
});
return temp.innerHTML.trim();
}
}
console.log(stripHTML({
'html': "<div><h1>jQuery</div> is a JavaScript library.</h1><br>And is the most widely-used such library (at this time). "
}));
JS Fiddle demo.
它似乎也可以通过(荒谬的)嵌套来管理:
console.log(stripHTML({
'html': "<div>jQuery <h1>is <br>a <span><strong><em><span>JavaScript</span></em> library</strong></span>.</span><br>And is the most widely-used such library (at this time).</h1></div> "
}));
function stripHTML(opts) {
var settings = {
'html': null,
'allowedHTML': ['h2', 'br']
},
temp = document.createElement('div'),
allowedHTML,
elementNodes,
parent;
Object.keys(opts || {}).forEach(function(key) {
settings[key] = opts[key];
});
if (settings.html && 'string' === typeof settings.html) {
temp.innerHTML = settings.html;
elementNodes = Array.from(temp.querySelectorAll('*'));
allowedHTML = settings.allowedHTML.map(function(el) {
return el.replace(/<|>/g, '');
});
elementNodes.forEach(function(node) {
parent = node.parentNode;
if (allowedHTML.indexOf(node.localName) === -1) {
while (node.firstChild) {
parent.insertBefore(node.firstChild, node);
}
parent.removeChild(node);
}
});
return temp.innerHTML.trim();
}
}
console.log(stripHTML({
'html': "<div>jQuery <h1>is <br>a <span><strong><em><span>JavaScript</span></em> library</strong></span>.</span><br>And is the most widely-used such library (at this time).</h1></div> "
}));
JS Fiddle demo.
但我不能保证这对在stripHTML 函数的html 字符串中插入<script> 元素的人有效、将有效或能够有效,例如:
console.log(stripHTML({
'html': "<script>alert('Will this work?'); console.log('Maybe not?');</" + "script>"
}));
// => alert('Will this work?'); console.log('Maybe not?');
// it doesn't work in my (again: limited) testing, and
// there's no evaluation (eval()) of the inserted, or resulting
// string so it should be safe. This is not a guarantee, so
// please: test your edge cases
function stripHTML(opts) {
var settings = {
'html': null,
'allowedHTML': ['h2', 'br']
},
temp = document.createElement('div'),
allowedHTML,
elementNodes,
parent;
Object.keys(opts || {}).forEach(function(key) {
settings[key] = opts[key];
});
if (settings.html && 'string' === typeof settings.html) {
temp.innerHTML = settings.html;
elementNodes = Array.from(temp.querySelectorAll('*'));
allowedHTML = settings.allowedHTML.map(function(el) {
return el.replace(/<|>/g, '');
});
elementNodes.forEach(function(node) {
parent = node.parentNode;
if (settings.allowedHTML.indexOf(node.localName) === -1) {
while (node.firstChild) {
parent.insertBefore(node.firstChild, node);
}
parent.removeChild(node);
}
});
return temp.innerHTML.trim();
}
}
console.log(stripHTML({
'html': "<script>alert('Will this work?'); console.log('Maybe not?');</"+"script>"
}));
JS Fiddle demo.
参考资料: