htmlタグをjsonに変換
6651 ワード
/** * html json
* @param {String} html html 。html 。
* @param {int} indent_str_count
* @param {String} indent_str , tab
* @param {Object} indent
* @return {String} json
* {
* "tag": " ",
* "attrs": {" ": " ", ...},
* "content": [" " | json | [json ]]
* }
* : {"error": "html "}
* @test (nested):html2json("<table><tr><td class=\"popupmenu_option\" onclick=\"Do('detail')\"> </td></tr><tr><td class=\"popupmenu_option\" onclick=\"Do('rawdata')\"> </td><tr><td class=\"popupmenu_option\" onclick=\"Do('detail')\"> </td></tr></tr></table>")
* @test (sibling):html2json("<tr><td class=\"popupmenu_option\" onclick=\"Do('detail')\"> </td></tr><tr><td class=\"popupmenu_option\" onclick=\"Do('rawdata')\"> </td></tr>")
* @create @ 2012-6-7 16:30:48
* @modify [content " \" ] by [ ] @ [2012-6-8 9:03:36]
* @modify [content : html , json] by [ ] @ [2012-6-8 9:22:32]
* @modify [ json ] by [ ] @ [2012-6-8 11:54:27]
* @modify [ (attrs)JSON , (.split(/\s+/)) (.match(/\w+=(?:"(?:[^"]|(?:\"))+?"|'(?:[^']|(?:\'))+?')/g);) ] by [ ] @ [2012 6 11 11:51:56]
* @modify [ html / ] by [ ] @ [2012-6-11 17:25:07]
* @modify [log] by [user] @ [time]
* @TODO: input img
*/
function html2json(html, indent_str_count, indent_str, indent) {
///
var HTML_TAG_NAME = "tag";
var HTML_TAG_ATTRIBUTES = "attrs";
var HTML_TAG_CONTENT = "content";
// HTML
var htmlEls = null;
// ( ) 。indent
var isFirstRun = indent ? false : true;
/// ,
if (!html || typeof(html) != "string") {
return null;
}
String.prototype.trim = String.prototype.trim ||
function(){return this.replace(/^\s+|\s+$/g, "");};
html = html.trim();
/// , match 1, */
htmlEls = html.match(/<(\w+)([^>]*)>(.*?)<\/\1>/gi);
if (!htmlEls) {
// ( ), html , null; content
return isFirstRun ? null : "\"" + html.replace(/\"/g, "\\\"") + "\"";
}
///
// 1
indent_str_count = indent_str_count || 1;
// tab
indent_str = indent_str || "\t";
// ( )
indent = indent || {
"strings": [],
"str": indent_str,
/**
* count
* p.s. , , 。
* get factory。
* strings , 。
* ,factory(5) strings[4] 。
* @param {int} count
* @return {String} count
* @test
* @create @ 2012-6-8 10:26:32
* @modify [log] by [user] @ [time]
*/
"factory": function(count) {
var index = count - 1;
if (!this.strings[index]) {
var string = new Array(count + 1).join(this.str);
this.strings[index] = string;
}
return this.strings[index];
},
"getIndentStr": function() {
return this.str || "\t";;
}
};
if (htmlEls.length > 1) {
/// html
if (isFirstRun) {
// html 。
return {"error": "html "};
}
var elsJson = "["
for (var i = 0; ; i++) {
elsJson += html2json(htmlEls[i], indent_str_count + 1, indent.getIndentStr(), indent);
if (i >= htmlEls.length -1) {
elsJson += "
";
break;
}
elsJson += ", ";
}
elsJson += indent.factory(indent_str_count - 1) + "]";
return elsJson;
} else if (htmlEls.length == 1) {
/// html
//
htmlEls = html.match(/^\s*<(\w+)([^>]*)>(.*)<\/\1>\s*$/gi);
var tag = RegExp.$1;
var content = RegExp.$3;
var attrs = RegExp.$2.trim().match(/\w+=(?:"(?:[^"]|(?:\"))+?"|'(?:[^']|(?:\'))+?')/g);
var json = "";
json = "{
" +
indent.factory(indent_str_count) + "\"" + HTML_TAG_NAME + "\": \"" + tag + "\",
" +
indent.factory(indent_str_count) + "\"" + HTML_TAG_ATTRIBUTES + "\": " +
(function() {
var attrsJson = "\"\"";
if (attrs && attrs.length) {
attrsJson = "{
";
for (var i = 0; ; i++) {
attrPair = attrs[i].split("=");
name = attrPair[0];
value = attrPair[1].replace(/^['"]?|['"]?$/g, "");
attrsJson += indent.factory(indent_str_count + 1) +
"\"" + name + "\": \"" + value + "\"";
if (i >= attrs.length - 1) {
attrsJson += "
";
break;
}
attrsJson += ",
";
}
attrsJson += indent.factory(indent_str_count)+ "}";
}
return attrsJson;
})() + ",
" +
indent.factory(indent_str_count) + "\"" + HTML_TAG_CONTENT + "\": " +
html2json(content, indent_str_count + 1, indent.getIndentStr(), indent) + "
" +
indent.factory(indent_str_count-1) + "}";
return json;
}
}