Index: openacs-4/packages/acs-tcl/tcl/text-html-procs.tcl =================================================================== RCS file: /usr/local/cvsroot/openacs-4/packages/acs-tcl/tcl/text-html-procs.tcl,v diff -u -r1.109.2.25 -r1.109.2.26 --- openacs-4/packages/acs-tcl/tcl/text-html-procs.tcl 25 Jan 2022 23:12:04 -0000 1.109.2.25 +++ openacs-4/packages/acs-tcl/tcl/text-html-procs.tcl 2 Mar 2022 17:01:22 -0000 1.109.2.26 @@ -856,15 +856,30 @@ - ad_proc ad_html_security_check { html } { + ad_proc ad_html_security_check { + -allowed_tags + -allowed_attributes + -allowed_protocols + html + } { Returns a human-readable explanation if the user has used any - HTML tag other than the ones marked allowed in antispam - section of the kernel parameters. Otherwise returns an empty - string. + HTML tag other than the allowed ones. - @return a human-readable, plaintext explanation of what's wrong with the user's input. + It uses for checking the provided values. If these values are + not provided the function takes the union of the per-package + instance value and the values from the "antispam" section of + the kernel parameters. + @param allowed_tags + @param allowed_attributes + @param allowed_protocols + @param html The HTML text being validated. + + @return a human-readable, plaintext explanation of what's + wrong with the user's input. If everthing is ok, + return an empty string. + @author Lars Pind (lars@pinds.com) @creation-date 20 July 2000 @@ -880,33 +895,37 @@ return "For security reasons, you're not allowed to have the less-than-percent combination in your input." } - array set allowed_attribute [list] - array set allowed_tag [list] - array set allowed_protocol [list] + if {[info exists allowed_tags]} { + lappend allowed_tags {*}$extra_tags + } else { + set allowed_tags [concat \ + $extra_tags \ + [ad_parameter_all_values_as_list \ + -package_id $::acs::kernel_id \ + AllowedTag antispam] \ + [ad_parameter_all_values_as_list AllowedTag antispam]] + } - # Use the antispam tags for this package instance and whatever is on the kernel. - set allowed_tags_list [concat \ - $extra_tags \ - [ad_parameter_all_values_as_list -package_id $::acs::kernel_id AllowedTag antispam] \ - [ad_parameter_all_values_as_list AllowedTag antispam]] + if {![info exists allowed_attributes]} { + set allowed_attributes [concat \ + [ad_parameter_all_values_as_list \ + -package_id $::acs::kernel_id \ + AllowedAttribute antispam] \ + [ad_parameter_all_values_as_list AllowedAttribute antispam]] + } - set allowed_attributes_list [concat \ - [ad_parameter_all_values_as_list -package_id $::acs::kernel_id AllowedAttribute antispam] \ - [ad_parameter_all_values_as_list AllowedAttribute antispam]] + if {![info exists allowed_protocols]} { + set allowed_protocols [concat \ + [ad_parameter_all_values_as_list \ + -package_id $::acs::kernel_id \ + AllowedProtocol antispam] \ + [ad_parameter_all_values_as_list AllowedProtocol antispam]] + } - set allowed_protocols_list [concat \ - [ad_parameter_all_values_as_list -package_id $::acs::kernel_id AllowedProtocol antispam] \ - [ad_parameter_all_values_as_list AllowedProtocol antispam]] - - foreach attribute $allowed_attributes_list { - set allowed_attribute([string tolower $attribute]) 1 + foreach var {attributes tags protocols} { + set allowed_$var [string tolower [set allowed_$var]] + set allow_all_$var [expr {"*" in [set allowed_$var]}] } - foreach tagname $allowed_tags_list { - set allowed_tag([string tolower $tagname]) 1 - } - foreach protocol $allowed_protocols_list { - set allowed_protocol([string tolower $protocol]) 1 - } # loop over all tags for { set i [string first < $html] } { $i != -1 } { set i [string first < $html $i] } { @@ -918,45 +937,57 @@ # Not considered a tag. Shouldn't do any harm in browsers. # (Tested with digits, with A syntax, with whitespace) } else { - # The tag was valid ... now let's see if it's on the allowed list. + # + # The tag is potentially ok ... now let's see if it's + # on the allowed list. + # set tagname [string tolower [string range $html [lindex $name_idx 0] [lindex $name_idx 1]]] - if { ![info exists allowed_tag($tagname)] && ![info exists allowed_tag(*)] } { - # Nope, this was a naughty tag. - return "For security reasons we only accept the submission of HTML - containing the following tags: [join $allowed_tags_list " "]. - You have a [string toupper $tagname] tag in there." + if { !$allow_all_tags && $tagname ni $allowed_tags } { + # + # This tag is not allowed. + # + return [subst {For security reasons we only accept the submission of HTML + containing the following tags: [join $allowed_tags " "]. + You have a '[string toupper $tagname]' tag in there. + }] } else { - # Legal tag. - - # Make i point to the first character inside the tag, after the tag name and any whitespace + # + # Valid and allowed tag. Make i point to the first + # character inside the tag, after the tag name and + # any whitespace. + # set i [expr { [lindex $match 1] + 1}] set attr_list [ad_parse_html_attributes_upvar html i] foreach attribute $attr_list { + # + # All attribute names in $attr_list are + # already lower case. + # lassign $attribute attr_name attr_value - if { ![info exists allowed_attribute($attr_name)] - && ![info exists allowed_attribute(*)] } { + if { !$allow_all_attributes + && $attr_name ni $allowed_attributes} { return "The attribute '$attr_name' is not allowed for $tagname tags" } - if { [string tolower $attr_name] ne "style" } { + if { !$allow_all_protocols && $attr_name ne "style" } { if { [regexp {^\s*(([^\s:]+):\/\/|(data|javascript|blob):)} $attr_value match . p1 p2] } { - set protocol [expr {$p1 ne "" ? $p1 : $p2}] - if { ![info exists allowed_protocol([string tolower $protocol])] - && ![info exists allowed_protocol(*)] } { - return "Your URLs can only use these protocols: [join $allowed_protocols_list ", "]. - You have a '$protocol' protocol in there." + set protocol [string tolower [expr {$p1 ne "" ? $p1 : $p2}]] + if { $protocol ni $allowed_protocols } { + return [subst {The allowed URLs can only use these protocols: + [join $allowed_protocols ", "]. + You have a '$protocol' protocol in there.}] } } } } } } } - return {} + return "" } # This was created in order to pre-process some content to be fed Index: openacs-4/packages/acs-tcl/tcl/test/html-conversion-procs.tcl =================================================================== RCS file: /usr/local/cvsroot/openacs-4/packages/acs-tcl/tcl/test/html-conversion-procs.tcl,v diff -u -r1.20.2.8 -r1.20.2.9 --- openacs-4/packages/acs-tcl/tcl/test/html-conversion-procs.tcl 16 Feb 2021 20:59:03 -0000 1.20.2.8 +++ openacs-4/packages/acs-tcl/tcl/test/html-conversion-procs.tcl 2 Mar 2022 17:01:22 -0000 1.20.2.9 @@ -115,11 +115,11 @@ ad_parse_html_attributes_upvar } \ ad_html_security_check_href_allowed { - tests is href attribute is allowed of A tags + Tests is href attribute is allowed } { - set html "An Link" + set html "An Link" aa_equals "href with http:// is allowed for 'a' tags" [ad_html_security_check $html] "" - set html "An Link" + set html "An Link" aa_equals "href with https:// is allowed for 'a' tags" [ad_html_security_check $html] "" } @@ -132,20 +132,39 @@ ad_parse_html_attributes_upvar } \ ad_html_security_check_forbidden_protolcols { - tests is href attribute is forbidden for certain tags + Tests is href contains allowed protocols } { - set html "An Link" + set html "An Link" aa_true "protocol 'foo' is not allowed" {[ad_html_security_check $html] ne ""} - set html "An Link" + set html "An Link" aa_true "protocol 'javascript' is not allowed" {[ad_html_security_check $html] ne ""} - set html "An Link" + set html "An Link" aa_true "protocol 'data' is not allowed" {[ad_html_security_check $html] ne ""} - set html "An Link" + set html "An Link" aa_true "protocol 'blob' is not allowed" {[ad_html_security_check $html] ne ""} } aa_register_case \ -cats {api smoke} \ + -procs { + ad_html_security_check + } \ + ad_html_security_check_forbidden_tags { + tests is text contains allowed tags +} { + set html "hello An Link world." + aa_true "Tag a is not allowed - empty tag list" {[ad_html_security_check -allowed_tags "" $html] ne {}} + + set html "hello An Link world." + aa_true "Tag a is not allowed - non-empty tag list" {[ad_html_security_check -allowed_tags "b h1" $html] ne {}} + + set html "hello An Link world." + aa_equals "Tag 'a' is allowed" [ad_html_security_check -allowed_tags "a b h1" $html] "" +} + + +aa_register_case \ + -cats {api smoke} \ -procs {util_close_html_tags} \ util_close_html_tags { Tests closing HTML tags. @@ -291,27 +310,27 @@ } { #Convert leading and trailing spaces or tabs set html "\tinter spaces " - aa_log "html= \"$html\" - Contains tabs and spaces" + aa_log "html= '$html' - Contains tabs and spaces" set result [util_convert_line_breaks_to_html $html] - aa_false "Now html=\"$result\"" [regexp {\sinter spaces\s} $result] + aa_false "Now html='$result'" [regexp {\sinter spaces\s} $result] #convert single break set html "\r\n inter\r\nbreaks \r\n" - aa_log "html= \"$html\" - Contains a single break" + aa_log "html= '$html' - Contains a single break" set result [util_convert_line_breaks_to_html $html] - aa_false "Now html=\"$result\"" [regexp {inter\nspaces} $result] + aa_false "Now html='$result'" [regexp {inter\nspaces} $result] #convert paragraph break set html "\r\n inter\r\n\r\nbreaks \r\n" - aa_log "html= \"$html\" - Contains a double break" + aa_log "html= '$html' - Contains a double break" set result [util_convert_line_breaks_to_html $html] - aa_false "Now html=\"$result\"" [regexp {inter
spaces} $result] + aa_false "Now html='$result'" [regexp {inter
spaces} $result] #convert more than 2 breaks set html "\r\n inter\r\n\r\n\r\nbreaks \r\n" - aa_log "html= \"$html\" - Contains more than 2 breaks" + aa_log "html= '$html' - Contains more than 2 breaks" set result [util_convert_line_breaks_to_html $html] - aa_false "Now html=\"$result\"" [regexp {inter\n\n\nspaces} $result] + aa_false "Now html='$result'" [regexp {inter\n\n\nspaces} $result] } @@ -331,7 +350,7 @@ set html $result aa_log "Quote html=$html" set result [ad_unquotehtml $html] - aa_equals "Unquote html=$result" "\"<&text>\"" $result + aa_equals "Unquote html=$result" "'<&text>'" $result } aa_register_case \ @@ -359,10 +378,10 @@ util_remove_html_tags { Test if it remove all between tags } { - set html "
some text to probe if it