Index: openacs.org-dev/packages/acs-tcl/tcl/text-html-procs.tcl
===================================================================
RCS file: /usr/local/cvsroot/openacs.org-dev/packages/acs-tcl/tcl/text-html-procs.tcl,v
diff -u -r1.6 -r1.7
--- openacs.org-dev/packages/acs-tcl/tcl/text-html-procs.tcl	23 Sep 2003 13:15:27 -0000	1.6
+++ openacs.org-dev/packages/acs-tcl/tcl/text-html-procs.tcl	23 Sep 2003 13:19:45 -0000	1.7
@@ -33,7 +33,10 @@
     @author Lars Pind (lars@pinds.com)
     @creation-date 19 July 2000
 } {
-
+    if { [empty_string_p $text] } {
+        return {}
+    }
+    
     if { !$no_links_p } {
 	# We start by putting a space in front so our URL/email highlighting will work
 	# for URLs/emails right in the beginning of the text.
@@ -103,7 +106,6 @@
     return $text
 }
 
-
 ad_proc -public util_convert_line_breaks_to_html {
     {-includes_html:boolean}
     text
@@ -122,7 +124,7 @@
     regsub -all {[ \t]*\n} $text "\n" text
     
     # Wrap P's around paragraphs
-    set text "<p>$text</p>"
+    set text "<p style=\"margin-top: 0px;\">$text</p>"
     regsub -all {([^\n\s])\n\n([^\n\s])} $text {\1</p><p>\2} text
 
     # Convert _single_ CRLF's to <br>'s to preserve line breaks
@@ -146,77 +148,7 @@
 }
 
 
-ad_proc -public ad_text_to_html_old {
-    -no_links:boolean
-    text 
-} {
-    Converts plaintext to html. Also translates any recognized 
-    email addresses or URLs into a hyperlink.
 
-    @param no_links will prevent it from highlighting 
-
-    @author Branimir Dolicki (branimir@arsdigita.com)
-    @author Lars Pind (lars@pinds.com)
-    @creation-date 19 July 2000
-} {
-
-    if { !$no_links_p } {
-	# We start by putting a space in front so our URL/email highlighting will work
-	# for URLs/emails right in the beginning of the text.
-	set text " $text"
-	
-	# if something is " http://" or " https://"
-	# we assume it is a link to an outside source. 
-	
-	# (bd) The only purpose of thiese sTaRtUrL and
-	# eNdUrL markers is to get rid of trailing dots,
-	# commas and things like that.  Note that there
-	# is a TAB before and after each marker.
-	
-	regsub -nocase -all {([^a-zA-Z0-9]+)(http://[^\(\)"<>\s]+)} $text "\\1\tsTaRtUrL\\2eNdUrL\t" text
-	regsub -nocase -all {([^a-zA-Z0-9]+)(https://[^\(\)"<>\s]+)} $text "\\1\tsTaRtUrL\\2eNdUrL\t" text
-	regsub -nocase -all {([^a-zA-Z0-9]+)(ftp://[^\(\)"<>\s]+)} $text "\\1\tsTaRtUrL\\2eNdUrL\t" text
-	
-	# email links have the form xxx@xxx.xxx
-	regsub -nocase -all {([^a-zA-Z0-9]+)([^\(\)\s:;,@<>]+@[^\(\)\s.:;,@<>]+[.][^\(\)\s:;,@<>]+)} $text \
-		"\\1\tsTaRtEmAiL\\2eNdEmAiL\t" text
-
-    }    
-
-    # At this point, before inserting some of our own <, >, and "'s
-    # we quote the ones entered by the user:
-    set text [ad_quotehtml $text]
-
-    # Convert _single_ CRLF's to <br>'s to preserve line breaks
-    regsub -all {\r*\n} $text "<br>" text
-
-    # Convert every two spaces to an nbsp
-    regsub -all {  } $text "\\\&nbsp; " text
-
-    # turn CRLFCRLF into <P>
-    if { [regsub -all {\r\n\s*\r\n} $text "<p>" text] == 0 } {
-	# try LFLF
-	if { [regsub -all {\n\s*\n} $text "<p>" text] == 0 } {
-		# try CRCR
-	    regsub -all {\r\s*\r} $text "<p>" text
-	}
-    }
-    
-    if { !$no_links_p } {
-	# Dress the links and emails with A HREF
-	regsub -all {([]!?.:;,<>\(\)\}"'-]+)(eNdUrL\t)} $text {\2\1} text
-	regsub -all {([]!?.:;,<>\(\)\}"'-]+)(eNdEmAiL\t)} $text {\2\1} text
-	regsub -all {\tsTaRtUrL([^\t]*)eNdUrL\t} $text {<a href="\1">\1</a>} text
-	regsub -all {\tsTaRtEmAiL([^\t]*)eNdEmAiL\t} $text {<a href="mailto:\1">\1</a>} text
-	set text [string trimleft $text]
-    }
-
-    # Convert every tab to 4 nbsp's
-    regsub -all {\t} $text {\&nbsp;\&nbsp;\&nbsp;\&nbsp;} text
-    
-    return $text
-}
-
 ad_proc -public ad_quotehtml { arg } {
 
     Quotes ampersands, double-quotes, and angle brackets in $arg.
@@ -484,14 +416,17 @@
     # we should now walk the stack and close any open tags.
 
     for {set i $tagptr} { $i > -1 } {incr i -1} { 
-        # append out "<!-- autoclose --> </$tagstack($i)>"
-        append out "</$tagstack($i)>"
+        set tag $tagstack($i)
+
+        # LARS: Only close tags which we aren't supposed to remove
+        if { ![string equal $syn($tag) "discard"] && ![string equal $syn($tag) "remove"] } {
+            append out "</$tagstack($i)>"
+        }
     }
     
     return $out
 }
 
-
 ad_proc ad_parse_html_attributes {
     -attribute_array
     html
@@ -718,9 +653,9 @@
 
 	    if { ![info exists allowed_tag($tagname)] } {
 		# Nope, this was a naughty tag.
-		return "<p>For security reasons we only accept the submission of HTML
-		containing the following tags:<pre>[join $allowed_tags_list " "]</pre> 
-		You have a &lt;$tagname&gt; tag in there.</p>"
+		return "For security reasons we only accept the submission of HTML
+		containing the following tags: <code>[join $allowed_tags_list " "]</code>. 
+		You have a &lt;$tagname&gt; tag in there."
 	    } else {
 		# Legal tag.
 		
@@ -760,6 +695,7 @@
 # HTML -> Text
 #
 ####################
+
 ad_proc -public ad_html_to_text {
     {-maxlen 70}
     {-showtags:boolean}
@@ -798,6 +734,15 @@
     for { set i [string first < $html] } { $i != -1 } { set i [string first < $html $i] } {
 	# append everything up to and not including the tag-opening <
 	ad_html_to_text_put_text output [string range $html $last_tag_end [expr {$i - 1}]]
+
+        # Check that tag doesn't start with whitespace, in which case it's just a lone < that
+        # was errorneously left unquoted
+        if { $i >= [expr $length-1] || ![string is alpha [string index $html [expr $i + 1]]] } {
+            # Output the < and continue with next character
+            ad_html_to_text_put_text output "<"
+            set last_tag_end [incr i]
+            continue
+        }
     
 	# we're inside a tag now. Find the end of it
 
@@ -837,7 +782,7 @@
 	    }
 	    set string_delimiter [string index $html $string_delimiter_idx]
 
-	    # If the greeater than sign appears before any of the string delimters, we've found the tag end.
+	    # If the greater than sign appears before any of the string delimters, we've found the tag end.
 	    if { $gt_idx < $string_delimiter_idx || $string_delimiter_idx == -1 } {
 		# we found the tag end
 		set i $gt_idx
@@ -856,7 +801,7 @@
 	
 	set full_tag [string range $html $tag_start [expr { $i - 1 }]]
 
-	if { ![regexp {(/?)([^\s]+)[\s]*(\s.*)?} $full_tag match slash tagname attributes] } {
+	if { ![regexp {^(/?)([^\s]+)[\s]*(\s.*)?$} $full_tag match slash tagname attributes] } {
 	    # A malformed tag -- just delete it
 	} else {
 
@@ -946,7 +891,7 @@
 		    ad_html_to_text_put_text output \"
 		}
 		img {
-		    if { [empty_string_p $slash] } {
+		    if { [empty_string_p $slash] && !$no_format_p } {
 			set img_info {}
 			if { [info exists attribute_array(alt)] } {
 			    lappend img_info "'$attribute_array(alt)'"
@@ -979,8 +924,8 @@
     # Close any unclosed tags
     set output(pre) 0
     while { $output(blockquote) > 0 } {
-	incr $output(blockquote) -1
-	incr $output(maxlen) 4
+	incr output(blockquote) -1
+	incr output(maxlen) 4
     }
     
     # write out URLs, if necessary:
@@ -991,235 +936,6 @@
     return $output(text)
 }
 
-ad_proc -public ad_html_to_text_old {
-    {-maxlen 70}
-    {-showtags:boolean}
-    html 
-} {
-    Returns a best-guess plain text version of an HTML fragment.
-    Parses the HTML and does some simple formatting. The parser and
-    formatting
-    is pretty stupid, but it's better than nothing.
-    
-    @param maxlen the line length you want your output wrapped to.
-    @param showtags causes any unknown (and uninterpreted) tags to get shown in the output.
-    
-    @author Lars Pind (lars@pinds.com)
-    @author Aaron Swartz (aaron@swartzfam.com)
-    @creation-date 19 July 2000
-} {
-    set output(text) {}
-    set output(linelen) 0
-    set output(maxlen) $maxlen
-    set output(pre) 0
-    set output(p) 0
-    set output(br) 0
-    set output(space) 0
-    set output(blockquote) 0
-    
-    set length [string length $html]
-    set last_tag_end 0
-
-    # For showing the URL of links.
-    set href_urls [list]
-    set href_stack [list]
-
-    for { set i [string first < $html] } { $i != -1 } { set i [string first < $html $i] } {
-	# append everything up to and not including the tag-opening <
-	ad_html_to_text_put_text output [string range $html $last_tag_end [expr {$i - 1}]]
-    
-	# we're inside a tag now. Find the end of it
-
-	# make i point to the char after the <
-	incr i
-	set tag_start $i
-	
-	set count 0
-	while 1 {
-	    if { [incr count] > 100 } {
-		error "There appears to be a programming bug in ad_html_to_text: We've entered an infinite loop."
-	    }
-	    # Find the positions of the first quote, apostrophe and greater-than sign.
-	    set quote_idx [string first \" $html $i]
-	    set apostrophe_idx [string first ' $html $i]
-	    set gt_idx [string first > $html $i]
-
-	    # If there is no greater-than sign, then the tag isn't closed.
-	    if { $gt_idx == -1 } {
-		set i $length
-		break
-	    }
-
-	    # Find the first of the quote and the apostrophe 
-	    if { $apostrophe_idx == -1 } {
-		set string_delimiter_idx $quote_idx
-	    } else {
-		if { $quote_idx == -1 } {
-		    set string_delimiter_idx $apostrophe_idx
-		} else {
-		    if { $apostrophe_idx < $quote_idx } {
-			set string_delimiter_idx $apostrophe_idx
-		    } else {
-			set string_delimiter_idx $quote_idx
-		    }
-		}
-	    }
-	    set string_delimiter [string index $html $string_delimiter_idx]
-
-	    # If the greeater than sign appears before any of the string delimters, we've found the tag end.
-	    if { $gt_idx < $string_delimiter_idx || $string_delimiter_idx == -1 } {
-		# we found the tag end
-		set i $gt_idx
-		break
-	    }   
-
-	    # Otherwise, we'll have to skip past the ending string delimiter
-	    set i [string first $string_delimiter $html [incr string_delimiter_idx]]
-	    if { $i == -1 } {
-		# Missing string end delimiter
-		set i $length
-		break
-	    }
-	    incr i
-	}
-	
-	set full_tag [string range $html $tag_start [expr { $i - 1 }]]
-
-	if { ![regexp {(/?)([^\s]+)[\s]*(\s.*)?} $full_tag match slash tagname attributes] } {
-	    # A malformed tag -- just delete it
-	} else {
-
-	    # Reset/create attribute array
-            array unset attribute_array
-
-	    # Parse the attributes
-	    ad_parse_html_attributes -attribute_array attribute_array $attributes
-
-	    switch -- [string tolower $tagname] {
-		p - ul - ol - table {
-		    set output(p) 1
-		}
-		br {
-		    ad_html_to_text_put_newline output
-		}
-		tr - td - th {
-		    set output(br) 1
-		}
-		h1 - h2 - h3 - h4 - h5 - h6 {
-		    set output(p) 1
-		    if { [empty_string_p $slash] } {
-			ad_html_to_text_put_text output [string repeat "*" [string index $tagname 1]]
-		    }
-		}
-		li {
-		    set output(br) 1
-		    if { [empty_string_p $slash] } {
-			ad_html_to_text_put_text output "- "
-		    }
-		}
-		strong - b {
-		    ad_html_to_text_put_text output "*"
-		}
-		em - i - cite - u {
-		    ad_html_to_text_put_text output "_"
-		}
-		a {
-		    if { [empty_string_p $slash] } {
-			if { [info exists attribute_array(href)] } {
-			    if { [info exists attribute_array(title)] } {
-				set title ": '$attribute_array(title)'"
-			    } else {
-				set title ""
-			    }
-			    set href_no [expr [llength $href_urls] + 1]
-			    lappend href_urls "\[$href_no\] $attribute_array(href) "
-			    lappend href_stack "\[$href_no$title\]"
-			} elseif { [info exists attribute_array(title)] } {
-			    lappend href_stack "\[$attribute_array(title)\]"
-			} else {
-			    lappend href_stack {}
-			}
-		    } else {
-			if { [llength $href_stack] > 0 } {
-			    if { ![empty_string_p [lindex $href_stack end]] } {
-				ad_html_to_text_put_text output [lindex $href_stack end]
-			    }
-			    set href_stack [lreplace $href_stack end end]
-			}
-		    }
-		}
-		pre {
-		    set output(p) 1
-		    if { [empty_string_p $slash] } {
-			incr output(pre)
-		    } else {
-			incr output(pre) -1
-		    }
-		}
-		blockquote {
-		    set output(p) 1
-		    if { [empty_string_p $slash] } {
-			incr output(blockquote)
-			incr output(maxlen) -4
-		    } else {
-			incr output(blockquote) -1
-			incr output(maxlen) 4
-		    }
-		}
-		hr {
-		    set output(p) 1
-		    ad_html_to_text_put_text output [string repeat "-" $output(maxlen)]
-		    set output(p) 1
-		}
-		q {
-		    ad_html_to_text_put_text output \"
-		}
-		img {
-		    if { [empty_string_p $slash] } {
-			set img_info {}
-			if { [info exists attribute_array(alt)] } {
-			    lappend img_info "'$attribute_array(alt)'"
-			}   
-			if { [info exists attribute_array(src)] } {
-			    lappend img_info $attribute_array(src)
-			}
-			if { [llength $img_info] == 0 } {
-			    ad_html_to_text_put_text output {[IMAGE]}
-			} else {
-			    ad_html_to_text_put_text output "\[IMAGE: [join $img_info " "]\]"
-			}
-		    }
-		}
-		default {
-		    # Other tag
-		    if { $showtags_p } {
-			ad_html_to_text_put_text output "&lt;$slash$tagname$attributes&gt;"
-		    }
-		}
-	    }
-	}
-	
-	# set end of last tag to the character following the >
-	set last_tag_end [incr i]
-    }
-    # append everything after the last tag
-    ad_html_to_text_put_text output [string range $html $last_tag_end end]
-    
-    # Close any unclosed tags
-    set output(pre) 0
-    while { $output(blockquote) > 0 } {
-	incr $output(blockquote) -1
-	incr $output(maxlen) 4
-    }
-    
-    # write out URLs, if necessary:
-    if { [llength $href_urls] > 0 } {
-	append output(text) "\n\n[join $href_urls "\n"]"
-    }
-
-    return $output(text)
-}
-
 ad_proc -private ad_html_to_text_put_newline { output_var } {
     Helper proc for ad_html_to_text
 
@@ -1360,7 +1076,7 @@
 } {
     regsub -all {&lt;} $html {<} html
     regsub -all {&gt;} $html {>} html
-    regsub -all {&quot;} $html {"} html
+    regsub -all {&quot;} $html "\"" html
     regsub -all {&mdash;} $html {--} html
     regsub -all {&#151;} $html {--} html
     # Need to do the &amp; last, because otherwise it could interfere with the other expansions,
@@ -1504,8 +1220,9 @@
 ####################
 
 ad_proc -public ad_html_text_convert {
-    {-from text}
-    {-to html}
+    {-from text/plain}
+    {-to text/html}
+    {-maxlen 70}
     text
 } {
     Converts a chunk of text from text/html to text/html.
@@ -1522,46 +1239,91 @@
     @author Lars Pind (lars@pinds.com)
     @creation-date 19 July 2000
 } {
+    set valid_froms { text/enhanced text/plain text/fixed-width text/html }
+    set valid_tos { text/plain text/html }
+    
+    # Validate procedure input
+    set from [ad_decode $from "html" "text/html" "text" "text/plain" "plain" "text/plain" $from]
+    if { [lsearch $valid_froms $from] == -1 } {
+        error "Unknown text input format, '$from'. Valid formats are $valid_froms."
+    }
+    
+    set to [ad_decode $to "html" "text/html" "text" "text/plain" "plain" "text/plain" $to]
+    if { [lsearch $valid_tos $to] == -1 } {
+        error "Unknown text input format, '$to'. Valid formats are $valid_tos."
+    }
+    
+    # Do the conversion
     switch $from {
-        text/html -
-	html {
+        text/enhanced {
 	    switch $to {
-                text/html -
-		html {
-		    ad_html_security_check $text
-		    return [util_close_html_tags $text]
+                text/html {
+                    set text [ad_enhanced_text_to_html $text]
 		}
-                text/plain -
-		text {
-		    return [ad_html_to_text -- $text]
+                text/plain {
+		    set text [ad_enhanced_text_to_plain_text $text]
 		}
-		default {
-		    return -code error "Can only convert to text or html"
+	    }
+        }
+        text/plain {
+	    switch $to {
+                text/html {
+		    set text [ad_text_to_html -- $text]
 		}
+                text/plain {
+		    set text [wrap_string $text $maxlen]
+		}
 	    }
-	} 
-        text/plain -
-	text {
+        }
+        text/fixed-width {
 	    switch $to {
-                text/html -
-		html {
-		    return [ad_text_to_html -- $text]
+                text/html {
+		    set text "<pre>[ad_text_to_html -no_lines -- $text]</pre>"
 		}
-                text/plain -
-		text {
-		    return [wrap_string $text 70]
+                text/plain {
+		    set text [wrap_string $text $maxlen]
 		}
-		default {
-		    return -code error "Can only convert to text or html"
+	    }
+	} 
+        text/html {
+	    switch $to {
+                text/html {
+                    set text [util_close_html_tags $text]
 		}
+                text/plain {
+		    set text [ad_html_to_text -maxlen $maxlen -- $text]
+		}
 	    }
 	} 
-	default {
-	    return -code error "Can only convert from text or html"
-	}
     }
+
+    return $text
 }
 
+ad_proc -public ad_enhanced_text_to_html {
+    text
+} {
+    Converts enhanced text format to normal HTML.
+    @author Lars Pind (lars@pinds.com)
+    @creation-date 2003-01-27
+} {
+    return [ad_text_to_html -no_quote -includes_html -- [util_close_html_tags $text]]
+}
+
+ad_proc -public ad_enhanced_text_to_plain_text {
+    {-maxlen 70}
+    text
+} {
+    Converts enhanced text format to normal plaintext format.
+    @author Lars Pind (lars@pinds.com)
+    @creation-date 2003-01-27
+} {
+    # Convert the HTML version to plaintext.
+    return [ad_html_to_text -maxlen $maxlen -- [ad_enhanced_text_to_html $text]]
+}
+
+
+
 ad_proc -public ad_convert_to_html {
     {-html_p f}
     text
@@ -1640,6 +1402,8 @@
 ad_proc string_truncate { 
     {-len 200}
     {-format html}
+    {-no_format:boolean}
+    {-ellipsis "..."}
     string 
 } {
     Truncates a string to len characters (defaults to the
@@ -1650,6 +1414,7 @@
 
     @param len The lenght to truncate to. Defaults to parameter TruncateDescriptionLength.
     @param format html or text.
+    @param no_format causes hyperlink tags not to get listed at the end of the output.
     @param string The string to truncate.
     @return The truncated string, with HTML tags cloosed or
             converted to text, depending on format.
@@ -1658,13 +1423,17 @@
     @creation-date September 8, 2002
 } {
     if { [string length $string] > $len } {
-        set string "[string range $string 0 $len]..."
+        set string "[string range $string 0 [expr $len-[string length $ellipsis]-1]]$ellipsis"
     } 
     
-    if { [string equal $format "html"] } {
+    if { [string equal $format "html"] && !$no_format_p } {
         set string [util_close_html_tags $string]
     } else {
-        set string [ad_html_to_text -- $string]
+       if { $no_format_p } {
+           set string [ad_html_to_text -no_format $string]
+       } else {
+           set string [ad_html_to_text -- $string]
+       }
     }
     return $string
 }
@@ -1679,7 +1448,9 @@
 
 
 ad_proc -deprecated util_striphtml {html} {
-    Use <a href="/api-doc/proc-view?proc=ad_html_to_text"><code>ad_html_to_text</code></a> instead.
+    Deprecated. Use ad_html_to_text instead.
+
+    @see ad_html_to_text
 } {
     return [ad_html_to_text -- $html]
 }
@@ -1700,6 +1471,7 @@
     plaintext. We'd rather let the user change our
     opinion about the text, e.g. html_p = 't'.
 
+    @see ad_text_to_html
 } {
     if { [regexp -nocase {<p>} $raw_string] || [regexp -nocase {<br>} $raw_string] } {
 	# user was already trying to do this as HTML
@@ -1715,6 +1487,8 @@
     href="/api-doc/proc-view?proc=ad_convert_to_html"><code>ad_convert_to_html</code></a>
     instead.
 
+    @see ad_convert_to_html
+
 }  {
     if { $html_p == "t" } {
 	return $raw_string
@@ -1726,20 +1500,26 @@
 ad_proc -deprecated util_quotehtml { arg } {
     This proc does exactly the same as <a href="/api-doc/proc-view?proc=ad_quotehtml"><code>ad_quotehtml</code></a>. 
     Use that instead. This one will be deleted eventually.
+
+    @see ad_quotehtml
 } {
     return [ad_quotehtml $arg]
 }
 
 ad_proc -deprecated util_quote_double_quotes {arg} {
     This proc does exactly the same as <a href="/api-doc/proc-view?proc=ad_quotehtml"><code>ad_quotehtml</code></a>. 
     Use that instead. This one will be deleted eventually.
+
+    @see ad_quotehtml
 } {
     return [ad_quotehtml $arg]
 }
 
 ad_proc -deprecated philg_quote_double_quotes {arg} {
     This proc does exactly the same as <a href="/api-doc/proc-view?proc=ad_quotehtml"><code>ad_quotehtml</code></a>. 
     Use that instead. This one will be deleted eventually.
+
+    @see ad_quotehtml
 } {
     return [ad_quotehtml $arg]
 }