Index: openacs.org-dev/packages/acs-tcl/tcl/text-html-procs.tcl =================================================================== RCS file: /usr/local/cvsroot/openacs.org-dev/packages/acs-tcl/tcl/text-html-procs.tcl,v diff -u -r1.6 -r1.7 --- openacs.org-dev/packages/acs-tcl/tcl/text-html-procs.tcl 23 Sep 2003 13:15:27 -0000 1.6 +++ openacs.org-dev/packages/acs-tcl/tcl/text-html-procs.tcl 23 Sep 2003 13:19:45 -0000 1.7 @@ -33,7 +33,10 @@ @author Lars Pind (lars@pinds.com) @creation-date 19 July 2000 } { - + if { [empty_string_p $text] } { + return {} + } + if { !$no_links_p } { # We start by putting a space in front so our URL/email highlighting will work # for URLs/emails right in the beginning of the text. @@ -103,7 +106,6 @@ return $text } - ad_proc -public util_convert_line_breaks_to_html { {-includes_html:boolean} text @@ -122,7 +124,7 @@ regsub -all {[ \t]*\n} $text "\n" text # Wrap P's around paragraphs - set text "
$text
" + set text "$text
" regsub -all {([^\n\s])\n\n([^\n\s])} $text {\1\2} text
# Convert _single_ CRLF's to
's to preserve line breaks
@@ -146,77 +148,7 @@
}
-ad_proc -public ad_text_to_html_old {
- -no_links:boolean
- text
-} {
- Converts plaintext to html. Also translates any recognized
- email addresses or URLs into a hyperlink.
- @param no_links will prevent it from highlighting
-
- @author Branimir Dolicki (branimir@arsdigita.com)
- @author Lars Pind (lars@pinds.com)
- @creation-date 19 July 2000
-} {
-
- if { !$no_links_p } {
- # We start by putting a space in front so our URL/email highlighting will work
- # for URLs/emails right in the beginning of the text.
- set text " $text"
-
- # if something is " http://" or " https://"
- # we assume it is a link to an outside source.
-
- # (bd) The only purpose of thiese sTaRtUrL and
- # eNdUrL markers is to get rid of trailing dots,
- # commas and things like that. Note that there
- # is a TAB before and after each marker.
-
- regsub -nocase -all {([^a-zA-Z0-9]+)(http://[^\(\)"<>\s]+)} $text "\\1\tsTaRtUrL\\2eNdUrL\t" text
- regsub -nocase -all {([^a-zA-Z0-9]+)(https://[^\(\)"<>\s]+)} $text "\\1\tsTaRtUrL\\2eNdUrL\t" text
- regsub -nocase -all {([^a-zA-Z0-9]+)(ftp://[^\(\)"<>\s]+)} $text "\\1\tsTaRtUrL\\2eNdUrL\t" text
-
- # email links have the form xxx@xxx.xxx
- regsub -nocase -all {([^a-zA-Z0-9]+)([^\(\)\s:;,@<>]+@[^\(\)\s.:;,@<>]+[.][^\(\)\s:;,@<>]+)} $text \
- "\\1\tsTaRtEmAiL\\2eNdEmAiL\t" text
-
- }
-
- # At this point, before inserting some of our own <, >, and "'s
- # we quote the ones entered by the user:
- set text [ad_quotehtml $text]
-
- # Convert _single_ CRLF's to
's to preserve line breaks
- regsub -all {\r*\n} $text "
" text
-
- # Convert every two spaces to an nbsp
- regsub -all { } $text "\\\ " text
-
- # turn CRLFCRLF into
- if { [regsub -all {\r\n\s*\r\n} $text "
" text] == 0 } { - # try LFLF - if { [regsub -all {\n\s*\n} $text "
" text] == 0 } { - # try CRCR - regsub -all {\r\s*\r} $text "
" text - } - } - - if { !$no_links_p } { - # Dress the links and emails with A HREF - regsub -all {([]!?.:;,<>\(\)\}"'-]+)(eNdUrL\t)} $text {\2\1} text - regsub -all {([]!?.:;,<>\(\)\}"'-]+)(eNdEmAiL\t)} $text {\2\1} text - regsub -all {\tsTaRtUrL([^\t]*)eNdUrL\t} $text {\1} text - regsub -all {\tsTaRtEmAiL([^\t]*)eNdEmAiL\t} $text {\1} text - set text [string trimleft $text] - } - - # Convert every tab to 4 nbsp's - regsub -all {\t} $text {\ \ \ \ } text - - return $text -} - ad_proc -public ad_quotehtml { arg } { Quotes ampersands, double-quotes, and angle brackets in $arg. @@ -484,14 +416,17 @@ # we should now walk the stack and close any open tags. for {set i $tagptr} { $i > -1 } {incr i -1} { - # append out " $tagstack($i)>" - append out "$tagstack($i)>" + set tag $tagstack($i) + + # LARS: Only close tags which we aren't supposed to remove + if { ![string equal $syn($tag) "discard"] && ![string equal $syn($tag) "remove"] } { + append out "$tagstack($i)>" + } } return $out } - ad_proc ad_parse_html_attributes { -attribute_array html @@ -718,9 +653,9 @@ if { ![info exists allowed_tag($tagname)] } { # Nope, this was a naughty tag. - return "
For security reasons we only accept the submission of HTML - containing the following tags:
[join $allowed_tags_list " "]- You have a <$tagname> tag in there." + return "For security reasons we only accept the submission of HTML + containing the following tags:
[join $allowed_tags_list " "]
.
+ You have a <$tagname> tag in there."
} else {
# Legal tag.
@@ -760,6 +695,7 @@
# HTML -> Text
#
####################
+
ad_proc -public ad_html_to_text {
{-maxlen 70}
{-showtags:boolean}
@@ -798,6 +734,15 @@
for { set i [string first < $html] } { $i != -1 } { set i [string first < $html $i] } {
# append everything up to and not including the tag-opening <
ad_html_to_text_put_text output [string range $html $last_tag_end [expr {$i - 1}]]
+
+ # Check that tag doesn't start with whitespace, in which case it's just a lone < that
+ # was errorneously left unquoted
+ if { $i >= [expr $length-1] || ![string is alpha [string index $html [expr $i + 1]]] } {
+ # Output the < and continue with next character
+ ad_html_to_text_put_text output "<"
+ set last_tag_end [incr i]
+ continue
+ }
# we're inside a tag now. Find the end of it
@@ -837,7 +782,7 @@
}
set string_delimiter [string index $html $string_delimiter_idx]
- # If the greeater than sign appears before any of the string delimters, we've found the tag end.
+ # If the greater than sign appears before any of the string delimters, we've found the tag end.
if { $gt_idx < $string_delimiter_idx || $string_delimiter_idx == -1 } {
# we found the tag end
set i $gt_idx
@@ -856,7 +801,7 @@
set full_tag [string range $html $tag_start [expr { $i - 1 }]]
- if { ![regexp {(/?)([^\s]+)[\s]*(\s.*)?} $full_tag match slash tagname attributes] } {
+ if { ![regexp {^(/?)([^\s]+)[\s]*(\s.*)?$} $full_tag match slash tagname attributes] } {
# A malformed tag -- just delete it
} else {
@@ -946,7 +891,7 @@
ad_html_to_text_put_text output \"
}
img {
- if { [empty_string_p $slash] } {
+ if { [empty_string_p $slash] && !$no_format_p } {
set img_info {}
if { [info exists attribute_array(alt)] } {
lappend img_info "'$attribute_array(alt)'"
@@ -979,8 +924,8 @@
# Close any unclosed tags
set output(pre) 0
while { $output(blockquote) > 0 } {
- incr $output(blockquote) -1
- incr $output(maxlen) 4
+ incr output(blockquote) -1
+ incr output(maxlen) 4
}
# write out URLs, if necessary:
@@ -991,235 +936,6 @@
return $output(text)
}
-ad_proc -public ad_html_to_text_old {
- {-maxlen 70}
- {-showtags:boolean}
- html
-} {
- Returns a best-guess plain text version of an HTML fragment.
- Parses the HTML and does some simple formatting. The parser and
- formatting
- is pretty stupid, but it's better than nothing.
-
- @param maxlen the line length you want your output wrapped to.
- @param showtags causes any unknown (and uninterpreted) tags to get shown in the output.
-
- @author Lars Pind (lars@pinds.com)
- @author Aaron Swartz (aaron@swartzfam.com)
- @creation-date 19 July 2000
-} {
- set output(text) {}
- set output(linelen) 0
- set output(maxlen) $maxlen
- set output(pre) 0
- set output(p) 0
- set output(br) 0
- set output(space) 0
- set output(blockquote) 0
-
- set length [string length $html]
- set last_tag_end 0
-
- # For showing the URL of links.
- set href_urls [list]
- set href_stack [list]
-
- for { set i [string first < $html] } { $i != -1 } { set i [string first < $html $i] } {
- # append everything up to and not including the tag-opening <
- ad_html_to_text_put_text output [string range $html $last_tag_end [expr {$i - 1}]]
-
- # we're inside a tag now. Find the end of it
-
- # make i point to the char after the <
- incr i
- set tag_start $i
-
- set count 0
- while 1 {
- if { [incr count] > 100 } {
- error "There appears to be a programming bug in ad_html_to_text: We've entered an infinite loop."
- }
- # Find the positions of the first quote, apostrophe and greater-than sign.
- set quote_idx [string first \" $html $i]
- set apostrophe_idx [string first ' $html $i]
- set gt_idx [string first > $html $i]
-
- # If there is no greater-than sign, then the tag isn't closed.
- if { $gt_idx == -1 } {
- set i $length
- break
- }
-
- # Find the first of the quote and the apostrophe
- if { $apostrophe_idx == -1 } {
- set string_delimiter_idx $quote_idx
- } else {
- if { $quote_idx == -1 } {
- set string_delimiter_idx $apostrophe_idx
- } else {
- if { $apostrophe_idx < $quote_idx } {
- set string_delimiter_idx $apostrophe_idx
- } else {
- set string_delimiter_idx $quote_idx
- }
- }
- }
- set string_delimiter [string index $html $string_delimiter_idx]
-
- # If the greeater than sign appears before any of the string delimters, we've found the tag end.
- if { $gt_idx < $string_delimiter_idx || $string_delimiter_idx == -1 } {
- # we found the tag end
- set i $gt_idx
- break
- }
-
- # Otherwise, we'll have to skip past the ending string delimiter
- set i [string first $string_delimiter $html [incr string_delimiter_idx]]
- if { $i == -1 } {
- # Missing string end delimiter
- set i $length
- break
- }
- incr i
- }
-
- set full_tag [string range $html $tag_start [expr { $i - 1 }]]
-
- if { ![regexp {(/?)([^\s]+)[\s]*(\s.*)?} $full_tag match slash tagname attributes] } {
- # A malformed tag -- just delete it
- } else {
-
- # Reset/create attribute array
- array unset attribute_array
-
- # Parse the attributes
- ad_parse_html_attributes -attribute_array attribute_array $attributes
-
- switch -- [string tolower $tagname] {
- p - ul - ol - table {
- set output(p) 1
- }
- br {
- ad_html_to_text_put_newline output
- }
- tr - td - th {
- set output(br) 1
- }
- h1 - h2 - h3 - h4 - h5 - h6 {
- set output(p) 1
- if { [empty_string_p $slash] } {
- ad_html_to_text_put_text output [string repeat "*" [string index $tagname 1]]
- }
- }
- li {
- set output(br) 1
- if { [empty_string_p $slash] } {
- ad_html_to_text_put_text output "- "
- }
- }
- strong - b {
- ad_html_to_text_put_text output "*"
- }
- em - i - cite - u {
- ad_html_to_text_put_text output "_"
- }
- a {
- if { [empty_string_p $slash] } {
- if { [info exists attribute_array(href)] } {
- if { [info exists attribute_array(title)] } {
- set title ": '$attribute_array(title)'"
- } else {
- set title ""
- }
- set href_no [expr [llength $href_urls] + 1]
- lappend href_urls "\[$href_no\] $attribute_array(href) "
- lappend href_stack "\[$href_no$title\]"
- } elseif { [info exists attribute_array(title)] } {
- lappend href_stack "\[$attribute_array(title)\]"
- } else {
- lappend href_stack {}
- }
- } else {
- if { [llength $href_stack] > 0 } {
- if { ![empty_string_p [lindex $href_stack end]] } {
- ad_html_to_text_put_text output [lindex $href_stack end]
- }
- set href_stack [lreplace $href_stack end end]
- }
- }
- }
- pre {
- set output(p) 1
- if { [empty_string_p $slash] } {
- incr output(pre)
- } else {
- incr output(pre) -1
- }
- }
- blockquote {
- set output(p) 1
- if { [empty_string_p $slash] } {
- incr output(blockquote)
- incr output(maxlen) -4
- } else {
- incr output(blockquote) -1
- incr output(maxlen) 4
- }
- }
- hr {
- set output(p) 1
- ad_html_to_text_put_text output [string repeat "-" $output(maxlen)]
- set output(p) 1
- }
- q {
- ad_html_to_text_put_text output \"
- }
- img {
- if { [empty_string_p $slash] } {
- set img_info {}
- if { [info exists attribute_array(alt)] } {
- lappend img_info "'$attribute_array(alt)'"
- }
- if { [info exists attribute_array(src)] } {
- lappend img_info $attribute_array(src)
- }
- if { [llength $img_info] == 0 } {
- ad_html_to_text_put_text output {[IMAGE]}
- } else {
- ad_html_to_text_put_text output "\[IMAGE: [join $img_info " "]\]"
- }
- }
- }
- default {
- # Other tag
- if { $showtags_p } {
- ad_html_to_text_put_text output "<$slash$tagname$attributes>"
- }
- }
- }
- }
-
- # set end of last tag to the character following the >
- set last_tag_end [incr i]
- }
- # append everything after the last tag
- ad_html_to_text_put_text output [string range $html $last_tag_end end]
-
- # Close any unclosed tags
- set output(pre) 0
- while { $output(blockquote) > 0 } {
- incr $output(blockquote) -1
- incr $output(maxlen) 4
- }
-
- # write out URLs, if necessary:
- if { [llength $href_urls] > 0 } {
- append output(text) "\n\n[join $href_urls "\n"]"
- }
-
- return $output(text)
-}
-
ad_proc -private ad_html_to_text_put_newline { output_var } {
Helper proc for ad_html_to_text
@@ -1360,7 +1076,7 @@
} {
regsub -all {<} $html {<} html
regsub -all {>} $html {>} html
- regsub -all {"} $html {"} html
+ regsub -all {"} $html "\"" html
regsub -all {—} $html {--} html
regsub -all {} $html {--} html
# Need to do the & last, because otherwise it could interfere with the other expansions,
@@ -1504,8 +1220,9 @@
####################
ad_proc -public ad_html_text_convert {
- {-from text}
- {-to html}
+ {-from text/plain}
+ {-to text/html}
+ {-maxlen 70}
text
} {
Converts a chunk of text from text/html to text/html.
@@ -1522,46 +1239,91 @@
@author Lars Pind (lars@pinds.com)
@creation-date 19 July 2000
} {
+ set valid_froms { text/enhanced text/plain text/fixed-width text/html }
+ set valid_tos { text/plain text/html }
+
+ # Validate procedure input
+ set from [ad_decode $from "html" "text/html" "text" "text/plain" "plain" "text/plain" $from]
+ if { [lsearch $valid_froms $from] == -1 } {
+ error "Unknown text input format, '$from'. Valid formats are $valid_froms."
+ }
+
+ set to [ad_decode $to "html" "text/html" "text" "text/plain" "plain" "text/plain" $to]
+ if { [lsearch $valid_tos $to] == -1 } {
+ error "Unknown text input format, '$to'. Valid formats are $valid_tos."
+ }
+
+ # Do the conversion
switch $from {
- text/html -
- html {
+ text/enhanced {
switch $to {
- text/html -
- html {
- ad_html_security_check $text
- return [util_close_html_tags $text]
+ text/html {
+ set text [ad_enhanced_text_to_html $text]
}
- text/plain -
- text {
- return [ad_html_to_text -- $text]
+ text/plain {
+ set text [ad_enhanced_text_to_plain_text $text]
}
- default {
- return -code error "Can only convert to text or html"
+ }
+ }
+ text/plain {
+ switch $to {
+ text/html {
+ set text [ad_text_to_html -- $text]
}
+ text/plain {
+ set text [wrap_string $text $maxlen]
+ }
}
- }
- text/plain -
- text {
+ }
+ text/fixed-width {
switch $to {
- text/html -
- html {
- return [ad_text_to_html -- $text]
+ text/html {
+ set text "[ad_text_to_html -no_lines -- $text]" } - text/plain - - text { - return [wrap_string $text 70] + text/plain { + set text [wrap_string $text $maxlen] } - default { - return -code error "Can only convert to text or html" + } + } + text/html { + switch $to { + text/html { + set text [util_close_html_tags $text] } + text/plain { + set text [ad_html_to_text -maxlen $maxlen -- $text] + } } } - default { - return -code error "Can only convert from text or html" - } } + + return $text } +ad_proc -public ad_enhanced_text_to_html { + text +} { + Converts enhanced text format to normal HTML. + @author Lars Pind (lars@pinds.com) + @creation-date 2003-01-27 +} { + return [ad_text_to_html -no_quote -includes_html -- [util_close_html_tags $text]] +} + +ad_proc -public ad_enhanced_text_to_plain_text { + {-maxlen 70} + text +} { + Converts enhanced text format to normal plaintext format. + @author Lars Pind (lars@pinds.com) + @creation-date 2003-01-27 +} { + # Convert the HTML version to plaintext. + return [ad_html_to_text -maxlen $maxlen -- [ad_enhanced_text_to_html $text]] +} + + + ad_proc -public ad_convert_to_html { {-html_p f} text @@ -1640,6 +1402,8 @@ ad_proc string_truncate { {-len 200} {-format html} + {-no_format:boolean} + {-ellipsis "..."} string } { Truncates a string to len characters (defaults to the @@ -1650,6 +1414,7 @@ @param len The lenght to truncate to. Defaults to parameter TruncateDescriptionLength. @param format html or text. + @param no_format causes hyperlink tags not to get listed at the end of the output. @param string The string to truncate. @return The truncated string, with HTML tags cloosed or converted to text, depending on format. @@ -1658,13 +1423,17 @@ @creation-date September 8, 2002 } { if { [string length $string] > $len } { - set string "[string range $string 0 $len]..." + set string "[string range $string 0 [expr $len-[string length $ellipsis]-1]]$ellipsis" } - if { [string equal $format "html"] } { + if { [string equal $format "html"] && !$no_format_p } { set string [util_close_html_tags $string] } else { - set string [ad_html_to_text -- $string] + if { $no_format_p } { + set string [ad_html_to_text -no_format $string] + } else { + set string [ad_html_to_text -- $string] + } } return $string } @@ -1679,7 +1448,9 @@ ad_proc -deprecated util_striphtml {html} { - Use
ad_html_to_text
instead.
+ Deprecated. Use ad_html_to_text instead.
+
+ @see ad_html_to_text
} {
return [ad_html_to_text -- $html]
}
@@ -1700,6 +1471,7 @@
plaintext. We'd rather let the user change our
opinion about the text, e.g. html_p = 't'.
+ @see ad_text_to_html
} {
if { [regexp -nocase {} $raw_string] || [regexp -nocase {
} $raw_string] } {
# user was already trying to do this as HTML
@@ -1715,6 +1487,8 @@
href="/api-doc/proc-view?proc=ad_convert_to_html">ad_convert_to_html
instead.
+ @see ad_convert_to_html
+
} {
if { $html_p == "t" } {
return $raw_string
@@ -1726,20 +1500,26 @@
ad_proc -deprecated util_quotehtml { arg } {
This proc does exactly the same as ad_quotehtml
.
Use that instead. This one will be deleted eventually.
+
+ @see ad_quotehtml
} {
return [ad_quotehtml $arg]
}
ad_proc -deprecated util_quote_double_quotes {arg} {
This proc does exactly the same as ad_quotehtml
.
Use that instead. This one will be deleted eventually.
+
+ @see ad_quotehtml
} {
return [ad_quotehtml $arg]
}
ad_proc -deprecated philg_quote_double_quotes {arg} {
This proc does exactly the same as ad_quotehtml
.
Use that instead. This one will be deleted eventually.
+
+ @see ad_quotehtml
} {
return [ad_quotehtml $arg]
}