Index: openacs-4/packages/acs-tcl/tcl/text-html-procs.tcl
===================================================================
RCS file: /usr/local/cvsroot/openacs-4/packages/acs-tcl/tcl/text-html-procs.tcl,v
diff -u -r1.109.2.32 -r1.109.2.33
--- openacs-4/packages/acs-tcl/tcl/text-html-procs.tcl 29 Aug 2022 11:53:56 -0000 1.109.2.32
+++ openacs-4/packages/acs-tcl/tcl/text-html-procs.tcl 29 Aug 2022 13:02:43 -0000 1.109.2.33
@@ -318,7 +318,7 @@
if {$closeTags} {
if {[ns_conn isconnected]} {
append reason " called in [ns_conn url]?[ns_conn query]"
- }
+ }
ns_log notice "early call closeTags, reason: $reason"
set text [util_close_html_tags_ns_parsehtml $text]
}
@@ -422,7 +422,7 @@
regsub -all -nocase "\[\r\n\]*(?($tags)\[\r\n\]*\[^>\]*>)\[\r\n\]*" $text {\1} text
}
ns_log notice "... before pre handling <$text>"
-
+
if {[::acs::icanuse "ns_parsehtml"] && $contains_pre_p} {
#
# Convert _single_ CRLF's to
's to preserve line breaks
@@ -928,7 +928,9 @@
html
{pos 0}
} {
- This is a wrapper proc for ad_parse_html_attributes_upvar
, so you can parse attributes from a string without upvar'ing.
+ This is a wrapper proc for
+ ad_parse_html_attributes_upvar
, so you can parse
+ attributes from a string without upvar'ing.
See the documentation for the other proc.
@author Lars Pind (lars@pinds.com)
@@ -943,7 +945,7 @@
}
-ad_proc ad_parse_html_attributes_upvar {
+ad_proc -private ad_parse_html_attributes_upvar {
-attribute_array
html_varname
pos_varname
@@ -1144,69 +1146,167 @@
set allow_all_$var [expr {"*" in [set allowed_$var]}]
}
- # loop over all tags
- for { set i [string first < $html] } { $i != -1 } { set i [string first < $html $i] } {
- # move past the tag-opening <
- incr i
+ foreach var {attributes tags protocols} {
+ if {[set allow_all_$var]} {
+ set allowed_$var *
+ }
+ }
- if { ![regexp -indices -start $i {\A/?([-_a-zA-Z0-9:]+)\s*} $html match name_idx] } {
- # The tag-opener isn't followed by USASCII letters (with or without optional initial slash)
- # Not considered a tag. Shouldn't do any harm in browsers.
- # (Tested with digits, with A syntax, with whitespace)
- } else {
- #
- # The tag is potentially ok ... now let's see if it's
- # on the allowed list.
- #
- set tagname [string tolower [string range $html [lindex $name_idx 0] [lindex $name_idx 1]]]
+ return [ad_html_security_check_helper \
+ -allowed_tags $allowed_tags \
+ -allowed_attributes $allowed_attributes \
+ -allowed_protocols $allowed_protocols \
+ $html]
+ }
- if { !$allow_all_tags && $tagname ni $allowed_tags } {
+ ad_proc -private ad_html_security_check_protocol {
+ -attr_name
+ -attr_value
+ -allowed_protocols
+ } {
+ Check for allowed protocol in attribute value
+ } {
+ if { [regexp {^\s*(([^\s:]+):\/\/|(data|javascript|blob):)} $attr_value match . p1 p2] } {
+ set protocol [string tolower [expr {$p1 ne "" ? $p1 : $p2}]]
+ if { $protocol ni $allowed_protocols } {
+ return [subst {The allowed URLs can only use these protocols:
+ [join $allowed_protocols ", "].
+ You have a '$protocol' protocol in attribute '$attr_name' there.}]
+ }
+ }
+ return ""
+ }
+
+ if {[::acs::icanuse "ns_parsehtml"]} {
+ ad_proc -private ad_html_security_check_helper {
+ -allowed_tags:required
+ -allowed_attributes:required
+ -allowed_protocols:required
+ html
+ } {
+ Helper proc for ad_html_security_check doing the hard work
+ @see ad_html_security_check
+ } {
+ # loop over all tags
+ set parseListElements [ns_parsehtml -onlytags $html]
+ foreach parseListElement $parseListElements {
+ lassign [string tolower $parseListElement] tag dict
+
+ if {[string range $tag 0 0] eq "/"} {
#
+ # Ignore closing tags
+ #
+ continue
+ }
+ if {$allowed_tags ne "*" && $tag ni $allowed_tags} {
+ #
# This tag is not allowed.
#
return [subst {For security reasons we only accept the submission of HTML
containing the following tags: [join $allowed_tags " "].
- You have a '[string toupper $tagname]' tag in there.
+ You have a '[string toupper $tag]' tag in there.
}]
} else {
#
- # Valid and allowed tag. Make i point to the first
- # character inside the tag, after the tag name and
- # any whitespace.
+ # Valid and allowed tag. Check attributes.
#
- set i [expr { [lindex $match 1] + 1}]
+ if { $allowed_attributes ne "*"} {
+ foreach attr_name [dict keys $dict] {
+ if {$attr_name ni $allowed_attributes} {
+ return "The attribute '$attr_name' is not allowed for $tagname tags"
+ }
+ #
+ # Attribute is allowed. Check now protocols
+ #
+ if { $allowed_protocols ne "*" && $attr_name ne "style" } {
+ set r [ad_html_security_check_protocol \
+ -attr_name $attr_name \
+ -attr_value [dict get $dict $attr_name] \
+ -allowed_protocols $allowed_protocols]
+ if {$r ne ""} {
+ return $r
+ }
+ }
+ }
+ }
+ }
+ }
+ return ""
+ }
+ } else {
+ ad_proc -private ad_html_security_check {
+ -allowed_tags:required
+ -allowed_attributes:required
+ -allowed_protocols:required
+ html
+ } {
+ Helper proc for ad_html_security_check doing the hard work
+ @see ad_html_security_check
+ } {
+ # loop over all tags
- set attr_list [ad_parse_html_attributes_upvar html i]
+ for { set i [string first < $html] } { $i != -1 } { set i [string first < $html $i] } {
+ # move past the tag-opening <
+ incr i
- foreach attribute $attr_list {
+ if { ![regexp -indices -start $i {\A/?([-_a-zA-Z0-9:]+)\s*} $html match name_idx] } {
+ # The tag-opener isn't followed by USASCII letters (with or without optional initial slash)
+ # Not considered a tag. Shouldn't do any harm in browsers.
+ # (Tested with digits, with A syntax, with whitespace)
+ } else {
+ #
+ # The tag is potentially ok ... now let's see if it's
+ # on the allowed list.
+ #
+ set tagname [string tolower [string range $html [lindex $name_idx 0] [lindex $name_idx 1]]]
+
+ if {$allowed_tags ne "*" && $tagname ni $allowed_tags } {
#
- # All attribute names in $attr_list are
- # already lowercase.
+ # This tag is not allowed.
#
- lassign $attribute attr_name attr_value
+ return [subst {For security reasons we only accept the submission of HTML
+ containing the following tags: [join $allowed_tags " "].
+ You have a '[string toupper $tagname]' tag in there.
+ }]
+ } else {
+ #
+ # Valid and allowed tag. Make i point to the first
+ # character inside the tag, after the tag name and
+ # any whitespace.
+ #
+ set i [expr { [lindex $match 1] + 1}]
- if { !$allow_all_attributes
- && $attr_name ni $allowed_attributes} {
- return "The attribute '$attr_name' is not allowed for $tagname tags"
- }
+ set attr_list [ad_parse_html_attributes_upvar html i]
- if { !$allow_all_protocols && $attr_name ne "style" } {
- if { [regexp {^\s*(([^\s:]+):\/\/|(data|javascript|blob):)} $attr_value match . p1 p2] } {
- set protocol [string tolower [expr {$p1 ne "" ? $p1 : $p2}]]
- if { $protocol ni $allowed_protocols } {
- return [subst {The allowed URLs can only use these protocols:
- [join $allowed_protocols ", "].
- You have a '$protocol' protocol in there.}]
+ foreach attribute $attr_list {
+ #
+ # All attribute names in $attr_list are
+ # already lowercase.
+ #
+ lassign $attribute attr_name attr_value
+
+ if { $allowed_attributes ne "*"
+ && $attr_name ni $allowed_attributes
+ } {
+ return "The attribute '$attr_name' is not allowed for $tagname tags"
+ }
+
+ if { $allowed_protocols ne "*" && $attr_name ne "style" } {
+ set r [ad_html_security_check_protocol \
+ -attr_name $attr_name \
+ -attr_value $attr_value \
+ -allowed_protocols $allowed_protocols]
+ if {$r ne ""} {
+ return $r
}
}
}
}
}
}
+ return ""
}
- return ""
}
-
# This was created in order to pre-process some content to be fed
# to tDOM in ad_sanitize_html. In fact, even with its least picky
# behavior, tDOM cannot swallow whatever markup you give it. This
@@ -2653,7 +2753,7 @@
if {[info exists tags_are_closed]} {
ns_log notice "No need to call util_close_html_tags"
} else {
- ns_log notice "regular call closeTags (from $from to $to)"
+ ns_log notice "regular call closeTags (from $from to $to)"
set text [util_close_html_tags $text $truncate_len $truncate_len $ellipsis $more]
}
}