Index: openacs.org-dev/packages/acs-tcl/tcl/text-html-procs.tcl
===================================================================
RCS file: /usr/local/cvsroot/openacs.org-dev/packages/acs-tcl/tcl/text-html-procs.tcl,v
diff -u -r1.4 -r1.5
--- openacs.org-dev/packages/acs-tcl/tcl/text-html-procs.tcl	6 Nov 2002 22:35:30 -0000	1.4
+++ openacs.org-dev/packages/acs-tcl/tcl/text-html-procs.tcl	23 Sep 2003 13:07:12 -0000	1.5
@@ -16,12 +16,18 @@
 
 ad_proc -public ad_text_to_html {
     -no_links:boolean
+    -no_lines:boolean
+    -no_quote:boolean
+    -includes_html:boolean
     text 
 } {
     Converts plaintext to html. Also translates any recognized 
     email addresses or URLs into a hyperlink.
 
     @param no_links will prevent it from highlighting 
+    @param no_quote will prevent it from HTML-quoting output, so this can be run on 
+    semi-HTML input and preserve that formatting. This will also cause spaces/tabs to not be
+    replaced with nbsp's, because this can too easily mess up HTML tags.
 
     @author Branimir Dolicki (branimir@arsdigita.com)
     @author Lars Pind (lars@pinds.com)
@@ -39,6 +45,132 @@
 	# (bd) The only purpose of thiese sTaRtUrL and
 	# eNdUrL markers is to get rid of trailing dots,
 	# commas and things like that.  Note that there
+	# is a \x001 special char before and after each marker.
+	
+        regsub -nocase -all {([^a-zA-Z0-9]+)(http://[^\(\)"<>\s]+)} $text "\\1\x001sTaRtUrL\\2eNdUrL\x001" text
+        regsub -nocase -all {([^a-zA-Z0-9]+)(https://[^\(\)"<>\s]+)} $text "\\1\x001sTaRtUrL\\2eNdUrL\x001" text
+        regsub -nocase -all {([^a-zA-Z0-9]+)(ftp://[^\(\)"<>\s]+)} $text "\\1\x001sTaRtUrL\\2eNdUrL\x001" text
+
+        # Don't dress URLs that are already links
+        regsub -nocase -all {(href\s*=\s*['"]?)\x001sTaRtUrL([^\x001]*)eNdUrL\x001} $text {\1\2} text
+	
+	# email links have the form xxx@xxx.xxx
+        # JCD: don't treat things =xxx@xxx.xxx as email since most
+        # common occurance seems to be in urls (although VPATH bounce
+        # emails like bounce-user=domain.com@sourcehost.com will then not
+        # work correctly).  It's all quite ugly.
+ 
+        regsub -nocase -all {([^a-zA-Z0-9=]+)(mailto:)?([^=\(\)\s:;,@<>]+@[^\(\)\s.:;,@<>]+[.][^\(\)\s:;,@<>]+)} $text \
+                "\\1\x001sTaRtEmAiL\\3eNdEmAiL\x001" text
+    }    
+
+    # At this point, before inserting some of our own <, >, and "'s
+    # we quote the ones entered by the user:
+    if { !$no_quote_p } {
+        set text [ad_quotehtml $text]
+    }
+
+    # Convert line breaks
+    if { !$no_lines_p } {
+        set text [util_convert_line_breaks_to_html -includes_html=$includes_html_p -- $text]
+    }
+
+    if { !$no_quote_p } {
+        # Convert every two spaces to an nbsp
+        regsub -all {  } $text "\\\&nbsp; " text
+        
+        # Convert tabs to four nbsp's
+        regsub -all {\t} $text {\&nbsp;\&nbsp;\&nbsp;\&nbsp;} text
+    }
+
+    if { !$no_links_p } {
+        # Move the end of the link before any punctuation marks at the end of the URL
+	regsub -all {([]!?.:;,<>\(\)\}"'-]+)(eNdUrL\x001)} $text {\2\1} text
+	regsub -all {([]!?.:;,<>\(\)\}"'-]+)(eNdEmAiL\x001)} $text {\2\1} text
+
+	# Dress the links and emails with A HREF
+	regsub -all {\x001sTaRtUrL([^\x001]*)eNdUrL\x001} $text {<a href="\1">\1</a>} text
+	regsub -all {\x001sTaRtEmAiL([^\x001]*)eNdEmAiL\x001} $text {<a href="mailto:\1">\1</a>} text
+	set text [string trimleft $text]
+    }
+
+    # JCD: Remove all the eNd sTaRt stuff and warn if we do it since its bad
+    # to have these left (means something is broken in our regexps above)
+    if {[regsub -all {(\x001sTaRtUrL|eNdUrL\x001|\x001sTaRtEmAiL|eNdEmAiL\x001)} $text {} text]} {
+        ns_log warning "Replaced sTaRt/eNd magic tags in ad_text_to_html"
+    }
+
+    return $text
+}
+
+
+ad_proc -public util_convert_line_breaks_to_html {
+    {-includes_html:boolean}
+    text
+} {
+    Convert line breaks to <p> and <br> tags, respectively.
+} {
+    # Remove any leading or trailing whitespace
+    regsub {^[\s]*} $text {} text
+    regsub {[\s]*$} $text {} text
+
+    # Make sure all line breaks are single \n's
+    regsub -all {\r\n} $text "\n" text
+    regsub -all {\r} $text "\n" text
+    
+    # Remove whitespace before \n's
+    regsub -all {[ \t]*\n} $text "\n" text
+    
+    # Wrap P's around paragraphs
+    set text "<p>$text</p>"
+    regsub -all {([^\n\s])\n\n([^\n\s])} $text {\1</p><p>\2} text
+
+    # Convert _single_ CRLF's to <br>'s to preserve line breaks
+    # Lars: This must be done after we've made P tags, because otherwise the line
+    # breaks will already have been converted into BR's.
+
+    # remove line breaks right before and after HTML tags that will insert a paragraph break themselves
+    if { $includes_html_p } {
+        foreach tag { ul ol li blockquote p div table tr td th } {
+            regsub -all -nocase "\\n\\s*(</?${tag}\\s*\[^>\]*>)" $text {\1} text
+            regsub -all -nocase "(</?${tag}\\s*\[^>\]*>)\\s*\\n" $text {\1} text
+        }
+    }
+
+    regsub -all {\n} $text "<br />\n" text
+
+    # Add line breaks to P tags
+    regsub -all {</p>} $text "</p>\n" text
+
+    return $text
+}
+
+
+ad_proc -public ad_text_to_html_old {
+    -no_links:boolean
+    text 
+} {
+    Converts plaintext to html. Also translates any recognized 
+    email addresses or URLs into a hyperlink.
+
+    @param no_links will prevent it from highlighting 
+
+    @author Branimir Dolicki (branimir@arsdigita.com)
+    @author Lars Pind (lars@pinds.com)
+    @creation-date 19 July 2000
+} {
+
+    if { !$no_links_p } {
+	# We start by putting a space in front so our URL/email highlighting will work
+	# for URLs/emails right in the beginning of the text.
+	set text " $text"
+	
+	# if something is " http://" or " https://"
+	# we assume it is a link to an outside source. 
+	
+	# (bd) The only purpose of thiese sTaRtUrL and
+	# eNdUrL markers is to get rid of trailing dots,
+	# commas and things like that.  Note that there
 	# is a TAB before and after each marker.
 	
 	regsub -nocase -all {([^a-zA-Z0-9]+)(http://[^\(\)"<>\s]+)} $text "\\1\tsTaRtUrL\\2eNdUrL\t" text