| |
237 |
237 |
# an explicitly stated, yet optional "charset" parameter is |
| |
238 |
238 |
# permitted for all text/* media subtypes (RFC 2616) and selected |
| |
239 |
239 |
# the XML media type classes listed by RFC 3023 (beyond the text/* |
| |
240 |
240 |
# media type; e.g. "application/xml*", "*/*+xml", etc.). |
| |
241 |
241 |
# |
| |
242 |
242 |
# (B) If the "charset" is omitted, certain default values apply (!): |
| |
243 |
243 |
# |
| |
244 |
244 |
# (B.1) RFC 3023 text/* registrations default to us-ascii (!), |
| |
245 |
245 |
# and not iso-8859-1 (overruling RFC 2616). |
| |
246 |
246 |
# |
| |
247 |
247 |
# (B.2) RFC 3023 application/* and non-text "+xml" registrations |
| |
248 |
248 |
# are to be left untreated (in our context, no encoding |
| |
249 |
249 |
# filtering is to be applied -> "binary") |
| |
250 |
250 |
# |
| |
251 |
251 |
# (B.3) RFC 2616 text/* registration (if not covered by B.1) |
| |
252 |
252 |
# default to iso-8859-1 |
| |
253 |
253 |
# |
| |
254 |
254 |
# (C) If neither A or B apply (e.g., because an invalid charset |
| |
255 |
255 |
# name was given to the charset parameter), we default to |
| |
256 |
256 |
# "binary". This corresponds to the behavior of |
| |
257 |
|
# [ns_encodingfortype]. Also note, that the RFCs 3023 and 2616 do |
| |
|
257 |
# [ns_encodingfortype]. Also note that the RFCs 3023 and 2616 do |
| |
258 |
258 |
# not state any procedure when "invalid" charsets etc. are |
| |
259 |
259 |
# identified. I assume, RFC-compliant clients have to ignore them |
| |
260 |
260 |
# which means keep the channel in- and output unfiltered (encoding |
| |
261 |
261 |
# = "binary"). This requires the client of the *HttpRequest* to |
| |
262 |
262 |
# treat the data accordingly. |
| |
263 |
263 |
# |
| |
264 |
264 |
|
| |
265 |
265 |
set enc "" |
| |
266 |
266 |
if {[regexp {^text/.*$|^.*/xml.*$|^.*\+xml.*$} $content_type]} { |
| |
267 |
267 |
# Case (A): Check for an explicitly provided charset parameter |
| |
268 |
268 |
if {[regexp {;\s*charset\s*=([^;]*)} $content_type _ charset]} { |
| |
269 |
269 |
set enc [ns_encodingforcharset [string trim $charset]] |
| |
270 |
270 |
} |
| |
271 |
271 |
# Case (B.1) |
| |
272 |
272 |
if {$enc eq "" && [regexp {^text/xml.*$|text/.*\+xml.*$} $content_type]} { |
| |
273 |
273 |
set enc [ns_encodingforcharset us-ascii] |
| |
274 |
274 |
} |
| |
275 |
275 |
|
| |
276 |
276 |
# Case (B.3) |
| |
277 |
277 |
if {$enc eq "" && [string match "text/*" $content_type]} { |