Index: openacs.org-dev/packages/acs-tcl/tcl/text-html-procs.tcl =================================================================== RCS file: /usr/local/cvsroot/openacs.org-dev/packages/acs-tcl/tcl/text-html-procs.tcl,v diff -u -r1.4 -r1.5 --- openacs.org-dev/packages/acs-tcl/tcl/text-html-procs.tcl 6 Nov 2002 22:35:30 -0000 1.4 +++ openacs.org-dev/packages/acs-tcl/tcl/text-html-procs.tcl 23 Sep 2003 13:07:12 -0000 1.5 @@ -16,12 +16,18 @@ ad_proc -public ad_text_to_html { -no_links:boolean + -no_lines:boolean + -no_quote:boolean + -includes_html:boolean text } { Converts plaintext to html. Also translates any recognized email addresses or URLs into a hyperlink. @param no_links will prevent it from highlighting + @param no_quote will prevent it from HTML-quoting output, so this can be run on + semi-HTML input and preserve that formatting. This will also cause spaces/tabs to not be + replaced with nbsp's, because this can too easily mess up HTML tags. @author Branimir Dolicki (branimir@arsdigita.com) @author Lars Pind (lars@pinds.com) @@ -39,6 +45,132 @@ # (bd) The only purpose of thiese sTaRtUrL and # eNdUrL markers is to get rid of trailing dots, # commas and things like that. Note that there + # is a \x001 special char before and after each marker. + + regsub -nocase -all {([^a-zA-Z0-9]+)(http://[^\(\)"<>\s]+)} $text "\\1\x001sTaRtUrL\\2eNdUrL\x001" text + regsub -nocase -all {([^a-zA-Z0-9]+)(https://[^\(\)"<>\s]+)} $text "\\1\x001sTaRtUrL\\2eNdUrL\x001" text + regsub -nocase -all {([^a-zA-Z0-9]+)(ftp://[^\(\)"<>\s]+)} $text "\\1\x001sTaRtUrL\\2eNdUrL\x001" text + + # Don't dress URLs that are already links + regsub -nocase -all {(href\s*=\s*['"]?)\x001sTaRtUrL([^\x001]*)eNdUrL\x001} $text {\1\2} text + + # email links have the form xxx@xxx.xxx + # JCD: don't treat things =xxx@xxx.xxx as email since most + # common occurance seems to be in urls (although VPATH bounce + # emails like bounce-user=domain.com@sourcehost.com will then not + # work correctly). It's all quite ugly. + + regsub -nocase -all {([^a-zA-Z0-9=]+)(mailto:)?([^=\(\)\s:;,@<>]+@[^\(\)\s.:;,@<>]+[.][^\(\)\s:;,@<>]+)} $text \ + "\\1\x001sTaRtEmAiL\\3eNdEmAiL\x001" text + } + + # At this point, before inserting some of our own <, >, and "'s + # we quote the ones entered by the user: + if { !$no_quote_p } { + set text [ad_quotehtml $text] + } + + # Convert line breaks + if { !$no_lines_p } { + set text [util_convert_line_breaks_to_html -includes_html=$includes_html_p -- $text] + } + + if { !$no_quote_p } { + # Convert every two spaces to an nbsp + regsub -all { } $text "\\\  " text + + # Convert tabs to four nbsp's + regsub -all {\t} $text {\ \ \ \ } text + } + + if { !$no_links_p } { + # Move the end of the link before any punctuation marks at the end of the URL + regsub -all {([]!?.:;,<>\(\)\}"'-]+)(eNdUrL\x001)} $text {\2\1} text + regsub -all {([]!?.:;,<>\(\)\}"'-]+)(eNdEmAiL\x001)} $text {\2\1} text + + # Dress the links and emails with A HREF + regsub -all {\x001sTaRtUrL([^\x001]*)eNdUrL\x001} $text {\1} text + regsub -all {\x001sTaRtEmAiL([^\x001]*)eNdEmAiL\x001} $text {\1} text + set text [string trimleft $text] + } + + # JCD: Remove all the eNd sTaRt stuff and warn if we do it since its bad + # to have these left (means something is broken in our regexps above) + if {[regsub -all {(\x001sTaRtUrL|eNdUrL\x001|\x001sTaRtEmAiL|eNdEmAiL\x001)} $text {} text]} { + ns_log warning "Replaced sTaRt/eNd magic tags in ad_text_to_html" + } + + return $text +} + + +ad_proc -public util_convert_line_breaks_to_html { + {-includes_html:boolean} + text +} { + Convert line breaks to

and
tags, respectively. +} { + # Remove any leading or trailing whitespace + regsub {^[\s]*} $text {} text + regsub {[\s]*$} $text {} text + + # Make sure all line breaks are single \n's + regsub -all {\r\n} $text "\n" text + regsub -all {\r} $text "\n" text + + # Remove whitespace before \n's + regsub -all {[ \t]*\n} $text "\n" text + + # Wrap P's around paragraphs + set text "

$text

" + regsub -all {([^\n\s])\n\n([^\n\s])} $text {\1

\2} text + + # Convert _single_ CRLF's to
's to preserve line breaks + # Lars: This must be done after we've made P tags, because otherwise the line + # breaks will already have been converted into BR's. + + # remove line breaks right before and after HTML tags that will insert a paragraph break themselves + if { $includes_html_p } { + foreach tag { ul ol li blockquote p div table tr td th } { + regsub -all -nocase "\\n\\s*(\]*>)" $text {\1} text + regsub -all -nocase "(\]*>)\\s*\\n" $text {\1} text + } + } + + regsub -all {\n} $text "
\n" text + + # Add line breaks to P tags + regsub -all {

} $text "

\n" text + + return $text +} + + +ad_proc -public ad_text_to_html_old { + -no_links:boolean + text +} { + Converts plaintext to html. Also translates any recognized + email addresses or URLs into a hyperlink. + + @param no_links will prevent it from highlighting + + @author Branimir Dolicki (branimir@arsdigita.com) + @author Lars Pind (lars@pinds.com) + @creation-date 19 July 2000 +} { + + if { !$no_links_p } { + # We start by putting a space in front so our URL/email highlighting will work + # for URLs/emails right in the beginning of the text. + set text " $text" + + # if something is " http://" or " https://" + # we assume it is a link to an outside source. + + # (bd) The only purpose of thiese sTaRtUrL and + # eNdUrL markers is to get rid of trailing dots, + # commas and things like that. Note that there # is a TAB before and after each marker. regsub -nocase -all {([^a-zA-Z0-9]+)(http://[^\(\)"<>\s]+)} $text "\\1\tsTaRtUrL\\2eNdUrL\t" text