Ry-*_*Ry- 3 html javascript html-parsing
在Google Chrome(Canary)上,似乎没有字符串可以使DOM解析器失败.我正在尝试解析一些HTML,但如果HTML不完全,100%,有效,我希望它显示错误.我试过了明显的事:
var newElement = document.createElement('div');
newElement.innerHTML = someMarkup; // Might fail on IE, never on Chrome.
Run Code Online (Sandbox Code Playgroud)
我也在这个问题上试过这个方法.无效标记不会失败,即使是我能生成的最无效的标记也是如此.
那么,至少有一些方法可以在Google Chrome中"严格"解析HTML吗?我不想自己使用令牌或使用外部验证实用程序.如果没有其他选择,那么严格的XML解析器就可以了,但某些元素不需要在HTML中关闭标记,最好不要失败.
使用DOMParser以两个步骤检查文档:
HTMLUnknownElement.为此,getElementsByTagName('*')适合.<area>in <map>)/* DOM parser for text/html, see https://stackoverflow.com/a/9251106/938089 */
;(function(DOMParser) {"use strict";var DOMParser_proto=DOMParser.prototype,real_parseFromString=DOMParser_proto.parseFromString;try{if((new DOMParser).parseFromString("", "text/html"))return;}catch(e){}DOMParser_proto.parseFromString=function(markup,type){if(/^\s*text\/html\s*(;|$)/i.test(type)){var doc=document.implementation.createHTMLDocument(""),doc_elt=doc.documentElement,first_elt;doc_elt.innerHTML=markup;first_elt=doc_elt.firstElementChild;if (doc_elt.childElementCount===1&&first_elt.localName.toLowerCase()==="html")doc.replaceChild(first_elt,doc_elt);return doc;}else{return real_parseFromString.apply(this, arguments);}};}(DOMParser));
/*
* @description Validate a HTML string
* @param String html The HTML string to be validated
* @returns null If the string is not wellformed XML
* false If the string contains an unknown element
* true If the string satisfies both conditions
*/
function validateHTML(html) {
var parser = new DOMParser()
, d = parser.parseFromString('<?xml version="1.0"?>'+html,'text/xml')
, allnodes;
if (d.querySelector('parsererror')) {
console.log('Not welformed HTML (XML)!');
return null;
} else {
/* To use text/html, see https://stackoverflow.com/a/9251106/938089 */
d = parser.parseFromString(html, 'text/html');
allnodes = d.getElementsByTagName('*');
for (var i=allnodes.length-1; i>=0; i--) {
if (allnodes[i] instanceof HTMLUnknownElement) return false;
}
}
return true; /* The document is syntactically correct, all tags are closed */
}
console.log(validateHTML('<div>')); // null, because of the missing close tag
console.log(validateHTML('<x></x>'));// false, because it's not a HTML element
console.log(validateHTML('<a></a>'));// true, because the tag is closed,
// and the element is a HTML element
Run Code Online (Sandbox Code Playgroud)
有关没有DOMParser的XML验证的替代方法,请参阅本答案的修订版1.
null了<input type="text">,而它是有效的HTML5(因为标签未关闭).