Index: openacs-4/packages/acs-templating/www/resources/xinha-nightly/modules/GetHtml/TransformInnerHTML.js =================================================================== RCS file: /usr/local/cvsroot/openacs-4/packages/acs-templating/www/resources/xinha-nightly/modules/GetHtml/TransformInnerHTML.js,v diff -u -r1.6 -r1.7 --- openacs-4/packages/acs-templating/www/resources/xinha-nightly/modules/GetHtml/TransformInnerHTML.js 27 Mar 2009 08:20:43 -0000 1.6 +++ openacs-4/packages/acs-templating/www/resources/xinha-nightly/modules/GetHtml/TransformInnerHTML.js 23 May 2010 11:58:33 -0000 1.7 @@ -1,3 +1,231 @@ -/* This compressed file is part of Xinha. For uncompressed sources, forum, and bug reports, go to xinha.org */ -/* This file is part of version 0.96beta2 released Fri, 20 Mar 2009 11:01:14 +0100 */ -function GetHtmlImplementation(a){this.editor=a}GetHtmlImplementation._pluginInfo={name:"GetHtmlImplementation TransformInnerHTML",version:"1.0",developer:"Nelson Bright",developer_url:"http://www.brightworkweb.com/",sponsor:"",sponsor_url:"",license:"htmlArea"};Xinha.RegExpCache=[/<\s*\/?([^\s\/>]+)[\s*\/>]/gi,/(\s+)_moz[^=>]*=[^\s>]*/gi,/\s*=\s*(([^'"][^>\s]*)([>\s])|"([^"]+)"|'([^']+)')/g,/\/>/g,/<(br|hr|img|input|link|meta|param|embed|area)((\s*\S*="[^"]*")*)>/g,/(<\w+\s+(\w*="[^"]*"\s+)*)(checked|compact|declare|defer|disabled|ismap|multiple|no(href|resize|shade|wrap)|readonly|selected)([\s>])/gi,/(="[^']*)'([^'"]*")/,/&(?=(?!(#[0-9]{2,5};|[a-zA-Z0-9]{2,6};|#x[0-9a-fA-F]{2,4};))[^<]*>)/g,/<\s+/g,/\s+(\/)?>/g,/\s{2,}/g,/\s+([^=\s]+)((="[^"]+")|([\s>]))/g,/\s+contenteditable(=[^>\s\/]*)?/gi,/((href|src)=")([^\s]*)"/g,/<\/?(div|p|h[1-6]|table|tr|td|th|ul|ol|li|dl|dt|dd|blockquote|object|br|hr|img|embed|param|pre|script|html|head|body|meta|link|title|area|input|form|textarea|select|option)[^>]*>/g,/<\/(div|p|h[1-6]|table|tr|ul|ol|dl|blockquote|object|html|head|body|script|form|select)( [^>]*)?>/g,/<(div|p|h[1-6]|table|tr|ul|ol|dl|blockquote|object|html|head|body|script|form|select)( [^>]*)?>/g,/<(td|th|li|dt|dd|option|br|hr|embed|param|pre|meta|link|title|area|input|textarea)[^>]*>/g,/(^|<\/(pre|script)>)(\s|[^\s])*?(<(pre|script)[^>]*>|$)/g,/(]*>)([\s\S])*?(<\/pre>)/g,/(^|)([\s\S]*?)(?=|$)/g,/\S*=""/g,/|<\?[\s\S]*?\?>|<\/?\w[^>]*>/g,/(^|<\/script>)[\s\S]*?(]*>|$)/g];if(typeof RegExp.prototype.compile=="function"){for(var i=0;i").replace(d[9],"$1>").replace(d[5],'$1$3="$3"$5').replace(d[4],"<$1$2 />").replace(d[6],"$1$2").replace(d[7],"&").replace(d[8],"<").replace(d[10]," ");if(Xinha.is_ie&&d[13].test(b)){b=b.replace(d[13],"$1"+Xinha._escapeDollars(stripBaseURL(RegExp.$3))+'"')}if(this.config.only7BitPrintablesInURLs){if(Xinha.is_ie){d[13].test(b)}if(d[13].test(b)){try{b=b.replace(d[13],"$1"+Xinha._escapeDollars(decodeURIComponent(RegExp.$3).replace(/([^!-~]+)/g,function(c){return escape(c)}))+'"')}catch(a){b=b.replace(d[13],Xinha._escapeDollars("$1"+RegExp.$3.replace(/([^!-~]+)/g,function(c){return escape(c)})+'"'))}}}return b};Xinha.indent=function(a,b){Xinha.__nindent=0;Xinha.__sindent="";Xinha.__sindentChar=(typeof b=="undefined")?" ":b;var d=Xinha.RegExpCache;if(Xinha.is_gecko){a=a.replace(d[19],function(c){return c.replace(/
/g,"\n")})}a=a.replace(d[18],function(c){c=c.replace(d[20],function(f,e,g){string=g.replace(/[\n\r]/gi," ").replace(/\s+/gi," ").replace(d[14],function(l){if(l.match(d[16])){var k="\n"+Xinha.__sindent+l;Xinha.__sindent+=Xinha.__sindentChar;++Xinha.__nindent;return k}else{if(l.match(d[15])){--Xinha.__nindent;Xinha.__sindent="";for(var h=Xinha.__nindent;h>0;--h){Xinha.__sindent+=Xinha.__sindentChar}return"\n"+Xinha.__sindent+l}else{if(l.match(d[17])){return"\n"+Xinha.__sindent+l}}}return l});return e+string});return c});a=a.replace(/^\s*/,"").replace(/ +\n/g,"\n").replace(/[\r\n]+(\s+)<\/script>/g,"\n$1<\/script>");return a};Xinha.getHTML=function(k,e,g){var f="";var h=Xinha.RegExpCache;if(k.nodeType==11){var d=document.createElement("div");var p=k.insertBefore(d,k.firstChild);for(j=p.nextSibling;j;j=j.nextSibling){p.appendChild(j.cloneNode(true))}f+=p.innerHTML.replace(h[23],function(a){a=a.replace(h[22],function(c){if(/^<[!\?]/.test(c)){return c}else{return g.cleanHTML(c)}});return a})}else{var n=(k.nodeType==1)?k.tagName.toLowerCase():"";if(e){f+="<"+n;var o=k.attributes;for(i=0;i\/]+)/,"$1")))){return g.cleanHTML(c)}else{return""}}});return a});if(Xinha.is_ie){f=f.replace(/<(li|dd|dt)( [^>]*)?>/g,"<$1$2>").replace(/(<[uod]l[^>]*>[\s\S]*?)<\/(li|dd|dt)>/g,"$1").replace(/\s*<\/(li|dd|dt)>(\s*<\/(li|dd|dt)>)+/g,"").replace(/(][\s\S]*?)(<\/d[dt]>)+/g,"$1")}if(Xinha.is_gecko){f=f.replace(/
\n$/,"")}f=f.replace(/\s*(<\/(li|dd|dt)>)/g,"$1");if(e){f+=""}f=Xinha.indent(f)}return f};Xinha._escapeDollars=function(a){return a.replace(/\$/g,"$$$$")}; \ No newline at end of file +/** + * Based on XML_Utility functions submitted by troels_kn. + * credit also to adios, who helped with reg exps: + * http://www.sitepoint.com/forums/showthread.php?t=201052 + * + * A replacement for Xinha.getHTML + * + * Features: + * - Generates XHTML code + * - Much faster than Xinha.getHTML + * - Eliminates the hacks to accomodate browser quirks + * - Returns correct code for Flash objects and scripts + * - Formats html in an indented, readable format in html mode + * - Preserves script and pre formatting + * - Preserves formatting in comments + * - Removes contenteditable from body tag in full-page mode + * - Supports only7BitPrintablesInURLs config option + * - Supports htmlRemoveTags config option + */ + +function GetHtmlImplementation(editor) { + this.editor = editor; +} + +GetHtmlImplementation._pluginInfo = { + name : "GetHtmlImplementation TransformInnerHTML", + version : "1.0", + developer : "Nelson Bright", + developer_url : "http://www.brightworkweb.com/", + sponsor : "", + sponsor_url : "", + license : "htmlArea" +}; + +Xinha.RegExpCache = [ +/*00*/ /<\s*\/?([^\s\/>]+)[\s*\/>]/gi,//lowercase tags +/*01*/ /(\s+)_moz[^=>]*=[^\s>]*/gi,//strip _moz attributes +/*02*/ /\s*=\s*(([^'"][^>\s]*)([>\s])|"([^"]+)"|'([^']+)')/g,// find attributes +/*03*/ /\/>/g,//strip singlet terminators +/*04*/ /<(br|hr|img|input|link|meta|param|embed|area)((\s*\S*="[^"]*")*)>/g,//terminate singlet tags +/*05*/ /(<\w+\s+(\w*="[^"]*"\s+)*)(checked|compact|declare|defer|disabled|ismap|multiple|no(href|resize|shade|wrap)|readonly|selected)([\s>])/gi,//expand singlet attributes +/*06*/ /(="[^']*)'([^'"]*")/,//check quote nesting +/*07*/ /&(?=(?!(#[0-9]{2,5};|[a-zA-Z0-9]{2,6};|#x[0-9a-fA-F]{2,4};))[^<]*>)/g,//expand query ampersands not in html entities +/*08*/ /<\s+/g,//strip tagstart whitespace +/*09*/ /\s+(\/)?>/g,//trim whitespace +/*10*/ /\s{2,}/g,//trim extra whitespace +/*11*/ /\s+([^=\s]+)((="[^"]+")|([\s>]))/g,// lowercase attribute names +/*12*/ /\s+contenteditable(=[^>\s\/]*)?/gi,//strip contenteditable +/*13*/ /((href|src)=")([^\s]*)"/g, //find href and src for stripBaseHref() +/*14*/ /<\/?(div|p|h[1-6]|table|tr|td|th|ul|ol|li|dl|dt|dd|blockquote|object|br|hr|img|embed|param|pre|script|html|head|body|meta|link|title|area|input|form|textarea|select|option)[^>]*>/g, +/*15*/ /<\/(div|p|h[1-6]|table|tr|ul|ol|dl|blockquote|html|head|body|script|form|select)( [^>]*)?>/g,//blocklevel closing tag +/*16*/ /<(div|p|h[1-6]|table|tr|ul|ol|dl|blockquote|object|html|head|body|script|form|select)( [^>]*)?>/g,//blocklevel opening tag +/*17*/ /<(td|th|li|dt|dd|option|br|hr|embed|param|pre|meta|link|title|area|input|textarea)[^>]*>/g,//singlet tag or output on 1 line +/*18*/ /(^|<\/(pre|script)>)(\s|[^\s])*?(<(pre|script)[^>]*>|$)/g,//find content NOT inside pre and script tags +/*19*/ /(]*>)([\s\S])*?(<\/pre>)/g,//find content inside pre tags +/*20*/ /(^|)([\s\S]*?)(?=|$)/g,//find content NOT inside comments +/*21*/ /\S*=""/g, //find empty attributes +/*22*/ /|<\?[\s\S]*?\?>|<\/?\w[^>]*>/g, //find all tags, including comments and php +/*23*/ /(^|<\/script>)[\s\S]*?(]*>|$)/g //find content NOT inside script tags +]; +// compile for performance; WebKit doesn't support this +var testRE = new RegExp().compile(Xinha.RegExpCache[3]); +if (typeof testRE != 'undefined') { + for (var i=0; i').//strip singlet terminators + replace(c[9], '$1>').//trim whitespace + replace(c[5], '$1$3="$3"$5').//expand singlet attributes + replace(c[4], '<$1$2 />').//terminate singlet tags + replace(c[6], '$1$2').//check quote nesting + replace(c[7], '&').//expand query ampersands + replace(c[8], '<').//strip tagstart whitespace + replace(c[10], ' ');//trim extra whitespace + if(Xinha.is_ie && c[13].test(sHtml)) { + sHtml = sHtml.replace(c[13],'$1'+Xinha._escapeDollars(this.stripBaseURL(RegExp.$3))+'"'); + } + + if(this.config.only7BitPrintablesInURLs) { + if (Xinha.is_ie) c[13].test(sHtml); // oddly the test below only triggers when we call this once before (IE6), in Moz it fails if tested twice + if ( c[13].test(sHtml)) { + try { //Mozilla returns an incorrectly encoded value with innerHTML + sHtml = sHtml.replace(c[13], '$1'+Xinha._escapeDollars(decodeURIComponent(RegExp.$3).replace(/([^!-~]+)/g, function(chr) + {return escape(chr);}))+'"'); + } catch (e) { // once the URL is escape()ed, you can't decodeURIComponent() it anymore + sHtml = sHtml.replace(c[13], Xinha._escapeDollars('$1'+RegExp.$3.replace(/([^!-~]+)/g,function(chr){return escape(chr);})+'"')); + } + } + } + return sHtml; +}; + +/** + * Prettyfies html by inserting linebreaks before tags, and indenting blocklevel tags + */ +Xinha.indent = function(s, sindentChar) { + Xinha.__nindent = 0; + Xinha.__sindent = ""; + Xinha.__sindentChar = (typeof sindentChar == "undefined") ? " " : sindentChar; + var c = Xinha.RegExpCache; + if(Xinha.is_gecko) { //moz changes returns into
inside
 tags
+		s = s.replace(c[19], function(str){return str.replace(/
/g,"\n")}); + } + s = s.replace(c[18], function(strn) { //skip pre and script tags + strn = strn.replace(c[20], function(st,$1,$2) { //exclude comments + string = $2.replace(/[\n\r]/gi, " ").replace(/\s+/gi," ").replace(c[14], function(str) { + if (str.match(c[16])) { + var s = "\n" + Xinha.__sindent + str; + // blocklevel openingtag - increase indent + Xinha.__sindent += Xinha.__sindentChar; + ++Xinha.__nindent; + return s; + } else if (str.match(c[15])) { + // blocklevel closingtag - decrease indent + --Xinha.__nindent; + Xinha.__sindent = ""; + for (var i=Xinha.__nindent;i>0;--i) { + Xinha.__sindent += Xinha.__sindentChar; + } + return "\n" + Xinha.__sindent + str; + } else if (str.match(c[17])) { + // singlet tag + return "\n" + Xinha.__sindent + str; + } + return str; // this won't actually happen + }); + return $1 + string; + });return strn; + }); + //final cleanup + s = s.replace(/^\s*/,'').//strip leading whitespace + replace(/ +\n/g,'\n').//strip spaces at end of lines + replace(/[\r\n]+(\s+)<\/script>/g,'\n$1');//strip returns added into scripts + return s; +}; + +Xinha.getHTML = function(root, outputRoot, editor) { + var html = ""; + var c = Xinha.RegExpCache; + + if(root.nodeType == 11) {//document fragment + //we can't get innerHTML from the root (type 11) node, so we + //copy all the child nodes into a new div and get innerHTML from the div + var div = document.createElement("div"); + var temp = root.insertBefore(div,root.firstChild); + for (j = temp.nextSibling; j; j = j.nextSibling) { + temp.appendChild(j.cloneNode(true)); + } + html += temp.innerHTML.replace(c[23], function(strn) { //skip content inside script tags + strn = strn.replace(c[22], function(tag){ + if(/^<[!\?]/.test(tag)) return tag; //skip comments and php tags + else return editor.cleanHTML(tag)}); + return strn; + }); + + } else { + + var root_tag = (root.nodeType == 1) ? root.tagName.toLowerCase() : ''; + if (outputRoot) { //only happens with tag in fullpage mode + html += "<" + root_tag; + var attrs = root.attributes; // strangely, this doesn't work in moz + for (i = 0; i < attrs.length; ++i) { + var a = attrs.item(i); + if (!a.specified) { + continue; + } + var name = a.nodeName.toLowerCase(); + var value = a.nodeValue; + html += " " + name + '="' + value + '"'; + } + html += ">"; + } + if(root_tag == "html") { + innerhtml = editor._doc.documentElement.innerHTML; + } else { + innerhtml = root.innerHTML; + } + //pass tags to cleanHTML() one at a time + //includes support for htmlRemoveTags config option + html += innerhtml.replace(c[23], function(strn) { //skip content inside script tags + strn = strn.replace(c[22], function(tag){ + if(/^<[!\?]/.test(tag)) return tag; //skip comments and php tags + else if(!(editor.config.htmlRemoveTags && editor.config.htmlRemoveTags.test(tag.replace(/<([^\s>\/]+)/,'$1')))) + return editor.cleanHTML(tag); + else return ''}); + return strn; + }); + //IE drops all ,, tags in a list except the last one + if(Xinha.is_ie) { + html = html.replace(/<(li|dd|dt)( [^>]*)?>/g,'<$1$2>'). + replace(/(<[uod]l[^>]*>[\s\S]*?)<\/(li|dd|dt)>/g, '$1'). + replace(/\s*<\/(li|dd|dt)>(\s*<\/(li|dd|dt)>)+/g, ''). + replace(/(][\s\S]*?)(<\/d[dt]>)+/g, '$1'); + } + if(Xinha.is_gecko) + html = html.replace(/
\n$/, ''); //strip trailing
added by moz + //Cleanup redundant whitespace before in IE and Mozilla + html = html.replace(/\s*(<\/(li|dd|dt)>)/g, '$1'); + if (outputRoot) { + html += ""; + } + html = Xinha.indent(html); + }; +// html = Xinha.htmlEncode(html); + + return html; +}; + +/** + * Escapes dollar signs ($) to make them safe to use in regex replacement functions by replacing each $ in the input with $$. + * + * This is advisable any time the replacement string for a call to replace() is a variable and could contain dollar signs that should not be interpreted as references to captured groups (e.g., when you want the text "$10" and not the first captured group followed by a 0). + * See http://trac.xinha.org/ticket/1337 + */ +Xinha._escapeDollars = function(str) { + return str.replace(/\$/g, "$$$$"); +};