@@ -71,23 +93,48 @@
database this question automatically).
-
+
+
+
+
-
-
-
Index: openacs-4/packages/acs-lang/www/admin/set-system-timezone.tcl
===================================================================
RCS file: /usr/local/cvsroot/openacs-4/packages/acs-lang/www/admin/set-system-timezone.tcl,v
diff -u -r1.2 -r1.3
--- openacs-4/packages/acs-lang/www/admin/set-system-timezone.tcl 13 Feb 2003 14:26:50 -0000 1.2
+++ openacs-4/packages/acs-lang/www/admin/set-system-timezone.tcl 8 Aug 2003 12:21:28 -0000 1.3
@@ -35,22 +35,24 @@
foreach entry [lc_list_all_timezones] {
set tz [lindex $entry 0]
- multirow append timezones $entry $tz [string equal $tz $system_timezone]
+ multirow append timezones $entry $tz [string equal $tz $system_timezone]>
}
# Try to get the correct UTC time from www.timeanddate.com
+
if { [catch {
+
set time_and_date_page [util_httpget "http://www.timeanddate.com/worldclock/"]
regexp {Current UTC \(or GMT\)-time used: ([^<]*)} $time_and_date_page match utc_from_page
# UTC in format:
# Wednesday, November 20, 2002, at 2:49:07 PM
+ # Wednesday, August 6, 2003, at 12:11:48
+ regexp {^([^,]*), *([^ ]*) *([0-9]*), *([0-9]*), at (.*)$} $utc_from_page match weekday month day year time
- regexp {^([^,]*), ([^ ]*) ([0-9]*), ([0-9]*), at (.*)$} $utc_from_page match weekday month day year time
-
set utc_epoch [clock scan "${month} ${day}, ${year} ${time}"]
-
+
set utc_ansi [clock format $utc_epoch -format "%Y-%m-%d %T"]
} errmsg] } {
@@ -60,3 +62,43 @@
set utc_ansi {Couldn't get time from timeanddate.com, sorry.}
}
+set correct_p {}
+
+if { [info exists utc_epoch] } {
+ with_catch errmsg {
+ set sysdate_utc_epoch [clock scan $sysdate_utc]
+ set delta_hours [expr round(($sysdate_utc_epoch - $utc_epoch)*4.0 / (60*60)) / 4.0]
+ set recommended_offset [expr $system_utc_offset + $delta_hours]
+
+ if { $delta_hours == 0 } {
+ set correct_p 1
+ } else {
+ set correct_p 0
+ }
+
+ set try_offsets [list]
+ foreach offset [list $recommended_offset [expr $recommended_offset -24]] {
+ lappend try_offsets [expr int($offset*60*60)]
+ }
+
+ set query "
+ select tz.tz, tz.gmt_offset
+ from timezones tz,
+ timezone_rules tzr
+ where tzr.gmt_offset in ([join $try_offsets ", "])
+ and tzr.tz_id = tz.tz_id
+ and to_date('$utc_ansi', 'YYYY-MM-DD HH24:MI:SS') between tzr.utc_start and tzr.utc_end
+ order by tz
+ "
+
+ db_multirow -extend { value label selected_p } suggested_timezones select_suggested_timezones $query {
+ set selected_p [string equal $tz $system_timezone]
+ set value $tz
+ set label "$tz $gmt_offset"
+ }
+ } {
+ # Didn't work, too bad
+ global errorInfo
+ error $errmsg $errorInfo
+ }
+}
Index: openacs-4/packages/acs-lang/www/doc/i18n-requirements.html
===================================================================
RCS file: /usr/local/cvsroot/openacs-4/packages/acs-lang/www/doc/i18n-requirements.html,v
diff -u -r1.1 -r1.2
--- openacs-4/packages/acs-lang/www/doc/i18n-requirements.html 20 Apr 2001 20:51:09 -0000 1.1
+++ openacs-4/packages/acs-lang/www/doc/i18n-requirements.html 8 Aug 2003 12:21:28 -0000 1.2
@@ -1,688 +1,466 @@
-
-
-
-
-
-
-ACS 4 Globalization Requirements
-
-
-
-
-
ACS 4 Globalization Requirements
-
-
by Henry Minsky, Yon Feldman, Lars Pind, others
-
-
-
-
I. Introduction
-
-
-This document describes the requirements for functionality in the ACS
-platform to support globalization of the core and optional
-modules. The goal is to make it possible to support delivery of
-applications which work properly in multiple locales with the lowest
-development and maintenance cost.
-
-
-
Definitions
-
-
-internationalization (i18n)
-
-
-The provision within a computer program of the capability of making
-itself adaptable to the requirements of different native languages,
-local customs and coded character sets.
-
-
-
-locale
-
-
-The definition of the subset of a user's environment that depends on
-language and cultural conventions.
-
-
-localization (L10n)
-
-The process of establishing information within a computer system
-specific to the operation of particular native languages, local
-customs and coded character sets.
-
-
-globalization
-
-A product development approach which ensures that software products
-are usable in the worldwide markets through a combination of
-internationalization and localization.
-
-
-
-
-
II. Vision Statement
-
-The Mozilla project suggests keeping two catchy phrases in mind when thinking
-about globalization:
-
-
-
One code base for the world
-
English is just another language
-
-
-
-Building an application often involves making a number of assumptions
-on the part of the developers which depend on their own culture. These
-include constant strings in the user interface and system error
-messages, names of countries, cities, order of given and family names
-for people, syntax of numeric and date strings and collation order of
-strings.
-
-
-
-The ACS should be able to operate in languages and regions beyond US
-English. The goal of ACS Globalization is to provide a clean and
-efficient way to factor out the locale dependent functionality from
-our applications, in order to be able to easily swap in alternate
-localizations.
-
-This in turn will reduce redundant, costly, and error prone rework
-when targeting the toolkit or applications built with the toolkit to
-another locale.
-
-
-The cost of porting the ACS to another locale without some kind of
-globalization support would be large and ongoing, since without a
-mechanism to incorporate the locale-specific changes cleanly back into
-the code base, it would require making a new fork of the source code
-for each locale.
-
-
-
III. System/Application Overview
-
-
-A globalized application will perform some or all of the following
-steps to handle a page request for a specific locale:
-
-
-
Decide what the target locale is for an incoming page request
-
-
Decide which character set encoding the output should be delivered in
-
-
If a script file to handle the request needs to be loaded from
-disk, determine if a character set conversion needs to be performed
-when loading the script
-
-
-
If needed, locale-specific resources are fetched. These can include text, graphics,
-or other resources that would vary with the target locale.
-
-
-If content data is fetched from the database, check for
-locale-specific versions of the data (e.g. country names).
-
-
-
-
Source code should use a message catalog API to translate constant strings in
-the code to the target locale
-
-
Perform locale-specific linguistic sorting on data if needed
-
-
If the user submitted form input data, decide what character set
-encoding conversion if any is needed. Parse locale-specific quantities
-if needed (number formats, date formats).
-
-
If templating is being used, select correct locale-specific template to merge with content
-
-
-
Format output data quantities in locale-specific manner (date, time, numeric, currency).
-If templating is being used, this may be done either before and/or after merging the data with a template.
-
-
-
-
-Since the internationalization APIs may potentially be used on every
-page in an application, the overhead for adding internationalization to a
-module or application must not cause a significant time delay in
-handling page requests.
-
-In many cases there are facilities in Oracle to perform various
-localization functions, and also there are facilities in Java which we
-will want to move to. So the design to meet the requirements will tend
-to rely on these capabilities, or close approximations to them where
-possible, in order to make it easier to maintain Tcl and Java ACS
-versions.
-
-
-
-
IV. Use-cases and User-scenarios
-
-Here are the cases that we need to be able to handle efficiently:
-
-
-
-
-
-A developer needs to author a web site/application in a language
-besides English, and possibly a character set besides ISO-8859-1. This
-includes the operation of the ACS itself, i.e., navigation, admin
-pages for modules, error messages, as well as additional modules or
-content supplied by the web site developer.
-
-What do they need to modify to make this work? Can their localization work
-be easily folded in to future releases of ACS?
-
-
-
-
-A developer needs to author a web site which operates in multiple
-languages simultaneously. For example, arsDigita.com with content and
-navigation in English, German, and Japanese.
-
-The site would have an end-user visible UI to support these languages,
-and the content management system must allow articles to be posted in
-these languages. In some cases it may be necessary to make the
-modules' admin UI's operate in more than one supported language, while in other
-cases the backend admin interface can operate in a single language.
-
-
-
-A developer is writing a new module, and wants to make it easy for
-someone to localize it. There should be a clear path to author the
-module so that future developers can easily add support for other
-locales. This would include support for creating resources such as
-message catalogs, non-text assets such as graphics, and use of
-templates which help to separate application logic from presentation.
-
-
-
-
-
-
Competitive Analysis
-
-
-
-Other application servers: ATG Dyanmo, Broadvision, Vignette, ... ? Anyone
-know how they deal with i18n ?
-
-
-
-
V. Related Links
-
-
-
System/Package "coversheet" - where all
- documentation for this software is linked off of
-
-Because the requirements for globalization affect many areas of the
-system, we will break up the requirements into phases, with a base
-required set of features, and then stages of increasing functionality.
-
-
-
-
VI.A Locales
-10.0 A standard representation of locale will be used throughout the
-system. A locale refers to a language and territory, and is uniquely
-identified by a combination of ISO language and ISO country abbreviations.
-
-10.10 Provide a consistent representation and API for creating and referencing a locale
-
-
-10.20 There will be a Tcl library of locale-aware formatting
-and parsing functions for numbers, dates and times. Note that Java
-has builtin support for these already.
-
-
-10.30 For each locale there will be default date, number and currency formats.
-
-
-
-
-
VI.B Associating a Locale with a Request
-
-20.0 The request processor must have a mechanism for associating a
-locale with each request. This locale is then used to select the
-appropriate template for a request, and will also be passed as the
-locale argument to the message catalog or locale-specific formatting
-functions.
-
-
-
-20.10 The locale for a request should be computed by the following method, in descending
-order of priority:
-
-
get locale associated with subsite or package id
-
get locale from user preference
-
get locale from site wide default
-
-
-20.20 An API will be provided for getting the current request locale from
-the ad_conn structure.
-
-
-
VI.C Resource Bundles / Content Repository
-
-30.0 A mechanism must be provided for a developer to group a set of
-arbitrary content resources together, keyed by a unique identifier and
-a locale.
-
-
-For example, what approaches could be used to implement a localizable
-nav-bar mechanism for a site? A navigation bar might be made up of a
-set of text strings and graphics, where the graphics themselves are
-locale-specific, such as images of English or Japanese text (as on
-www.arsdigita.com). It should be easy to specify alternate
-configurations of text and graphics to lay out the page for different
-locales.
-
-Design note: Alternative mechanisms to implement this functionality
-might include using templates, Java ResourceBundles, content-item
-containers in the Content Repository, or some convention assigning a
-common prefix to key strings in the message catalog.
-
-
-
-
VI.D Message Catalog for String Translation
-
-40.0 A message catalog facility will provide a database of translations for
-constant strings for multilingual applications. It must support the
-following:
-
-
-
-
-40.10 Each message will referenced via unique a key.
-
-
-
-40.20 The key for a message will have some hierarchical structure to it,
-so that sets of messages can be grouped with respect to a module name
-or package path.
-
-
-
-40.30 The API for lookup of a message will take a locale and message key as
-arguments, and return the appropriate translation of that message for
-the specifed locale.
-
-
-40.40 The API for lookup of a message will accept an optional default string
-which can be used if the message key is not found in the catalog. This
-lets the developer get code working and tested in a single
-language before having to initialize or update a message catalog.
-
-
-
-
-40.50 For use within templates, custom tags which invoke the message lookup
-API will be provided.
-
-
-
-40.60 Provide a method for importing and exporting a flat file of
-translation strings, in order to make it as easy as possible to create
-and modify message translations in bulk without having to use a web
-interface.
-
-
-40.70 Since translations may be in different character sets, there must
-be provision for writing and reading catalog files in different
-character sets. A mechanism must exist for identifying the character
-set of a catalog file before reading it.
-
-
-40.80 There should be a mechanism for tracking dependencies in the message
-catalog, so that if a string is modified, the other translations of
-that string can be flagged as needing update.
-
-
-40.90 The message lookup must be as efficient as possible so as not to slow
-down the delivery of pages.
-
-
-
-
-Design question: Is there any reason to implement the message catalog on top of the content repository as
-the underlying storage and retrieval service, with a layer of caching for
-performance? Would we get a nice user interface and version control
-almost for free?
-
-
-
-
-
VI.E Character Set Encoding
-
-Character Sets
-
-50.0 A locale will have a primary associated character set
-which is used to encode text in the language. When given a locale, we
-can query the system for the associated character set to use.
-
-The assumption is that we are going to use Unicode in our database to
-hold all text data. Our current programming environments (Tcl/Oracle
-or Java/Oracle) operate on Unicode data internally. However, since
-Unicode is not yet commonly used in browsers and authoring tools, the
-system must be able to read and write other character sets. In
-particular, conversions to and from Unicode will need to be explicitly
-performed at the following times:
-
-
-
Loading source files (.tcl or .adp) or content files from the
-filesystem
-
-
Accepting form input data from users
-
Delivering
-text output to a browser
-
-
Composing an email message
-
-
Writing
-data to the filesystem
-
-
-
- Design question: Do we want to mandate that all template files
-be stored in UTF8? I don't think so, because most people don't have Unicode
-editors, or don't want to be bothered with an extra step to convert
-files to UTF8 and back when editing them in their favorite editor.
-
-Same question for script and template files, how do we know what
-language and character set they are authored in? Should we overload
-the filename suffix (e.g., '.shiftjis.adp', '.ja_JP.euc.adp')?
-
-The simplest design is probably just to assign a default mapping from
-each locale to character a set: e.g. ja_JP -> ShiftJIS, fr_FR ->
-ISO-8859-1. +++ (see new ACS/Java notes) +++
-
-
-
-
-
-
Tcl Source File Character Set
-
-There are two classes of Tcl files loaded by the system; library files
-loaded at server startup, and page script files, which are run on
-each page request.
-
-
- Should we require all Tcl files be stored as UTF8? That
-seems too much of a burden on developers.
-
-
-50.10 Tcl library files can be authored in any character set. The system
-must have a way to determine the character set before loading the files, probably from the filename.
-
-50.20 Tcl page script files can be authored in any character set. The system
-must have a way to determine the character set before loading the files, probably from the filename.
-
-
-
Submitted Form Data Character Set
-
-50.30 Data which is submitted with a HTTP request using a GET or POST
-method may be in any character set. The system must be able
-to determine the encoding of the form data and convert it
-to Unicode on demand.
-
-
-50.35 The developer must be able to override the default system
-choice of character set when parsing and validating user form data.
-
-
-50.30.10 Extra hair: In Japan and some other Asian languages where there are multiple
-character set encodings in common use, the server may need to attempt to
-do an auto-detection of the character set, because buggy browsers may submit
-form data in an unexpected alternate encoding.
-
-
-
-
Output Character Set
-
-50.40 The output character set for a page request will be determined by default by the
-locale associated with the request (see requirement 20.0).
-
-
-50.50 It must be possible for a developer to manually override the output
-character set encoding for a request using an API function.
-
-
-
-
-
VI.F ACS Kernel Issues
-
-
-
-60.10 All ACS error messages must use the message catalog and the request locale
-to generate error message for the appropriate locale.
-
-60.20 Web server error messages such as 404, 500, etc must also be delivered
-in the appropriate locale.
-
-60.30 Where files are written or read from disk, their filenames must use a
-character set and character values which are safe for the underlying
-operating system.
-
-
-
-
-
VI.G Templates
-
-
-
-
-70.0 For a given abstract URL, the designer may create multiple locale-specific template files may be created (one per locale or language)
-
-70.10 For a given page request, the system must be able to select
-an approprate locale-specific template file to use.
-The request locale is computed as per (see requirement 20.0).
-
-
Design note: this would probably be implemented
-by suffixing the locale or a locale abbreviation to the template filename, such as foo.ja.adp or foo.en_GB.adp.
-
-
-
-
-70.20A template file may be created for a partial locale (language only, without
-a territory), and the request processor should be able to find the closest match for
-the current request locale.
-
-
-
-70.30 A template file may be created in any character set. The system must have a
-way to know which character set a template file contains, so it can
-properly process it.
-
-
-
-
Formatting Datasource Output in Templates
-
-70.50 The properties of a datasource column may include a datatype so that
-the templating system can format the output for the current
-locale. The datatype is defined by a standard ACS datatype plus a
-format token or format string, for example: a date column might be
-specified as 'current_date:date LONG,' or 'current_date:date
-"YYYY-Mon-DD"'
-
-
Forms
-
-70.60 The forms API must support construction of locale-specific HTML form
-widgets, such as date entry widgets, and form validation of user input
-data for locale-specific data, such as dates or numbers.
-
-
-
-70.70 For forms which allow users to upload files, a standard
-method for a user to indicate the charset of a text file being
-uploaded must be provided.
-
-
Design note:
-this presumably applies to uploading data to the content repository as
-well
-
-
-
-
-
VI.H Sorting and Searching
-
-
-
-80.10 Support API for correct collation (sorting order) on lists of strings in locale-dependent way.
-
-
-
-80.20 For the Tcl API, we will say that locale-dependent sorting will use Oracle SQL
-operations (i.e., we won't provide a Tcl API for this). We require
-a Tcl API function to return the correct incantation of NLS_SORT to use
-for a given locale with ORDER BY clauses in queries.
-
-
-80.40 The system must handle full-text search in any supported language.
-
-
-
-
VI.G Time Zones
-
-
-90.10 Provide API support for specifying a time zone
-
-
-90.20 Provide an API for computing time and date operations which are aware
-of timezones. So for example a calendar module can properly
-synchronize items inserted into a calendar from users in different
-time zones using their own local times.
-
-
-90.30 Store all dates and times in universal time zone, UTC.
-
-
-
-90.40 For a registered users, a time zone preference should be stored.
-
-
-90.50 For a non-registered user a time zone preference should
-be attached via a session or else UTC should be used to display
-every date and time.
-
-
-90.60 The default if we can't determine a time zone is to display
-
- all dates and times in some universal time zone such as GMT.
-
-
-
-
-
-
VI.H Database
-
-
-
-
-100.10 Since UTF8 strings can use up to three (UCS2) or six (UCS4) bytes
-per character, make sure that column size declarations in the schema
-are large enough to accomodate required data (such as email addresses
-in Japanese).
-
-
-
-
-
VI.I Email and Messaging
-
-When sending an email message, just as when delivering the content in
-web page over an HTTP connection, it is necessary to be able to
-specify what character set encoding to use.
-
-
-
-
-
-110.10 The email message sending API will allow for a character set encoding to be specified.
-
-110.20 The email accepting API will allow for character set to be parsed correctly (hopefully
-a well formatted message will have a MIME character set content type header)
-
-
-
-
-
Implementation Notes
-
-Because globalization touches many different parts of the system,
-we want to reduce the implementation risk by breaking the implementation
-into phases.
-
-
+This document describes the requirements for functionality in the
+ACS platform to support globalization of the core and optional
+modules. The goal is to make it possible to support delivery of
+applications which work properly in multiple locales with the
+lowest development and maintenance cost.
+
Definitions
+
internationalization (i18n)
+
The provision within a computer program of the capability of
+making itself adaptable to the requirements of different native
+languages, local customs and coded character sets.
+
locale
+
The definition of the subset of a user's environment that
+depends on language and cultural conventions.
+
localization (L10n)
+
The process of establishing information within a computer system
+specific to the operation of particular native languages, local
+customs and coded character sets.
+
globalization
+
A product development approach which ensures that software
+products are usable in the worldwide markets through a combination
+of internationalization and localization.
+
+
II. Vision Statement
+The Mozilla project suggests keeping two catchy phrases in mind
+when thinking about globalization:
+
+
One code base for the world
+
English is just another language
+
+
Building an application often involves making a number of
+assumptions on the part of the developers which depend on their own
+culture. These include constant strings in the user interface and
+system error messages, names of countries, cities, order of given
+and family names for people, syntax of numeric and date strings and
+collation order of strings.
+
The ACS should be able to operate in languages and regions
+beyond US English. The goal of ACS Globalization is to provide a
+clean and efficient way to factor out the locale dependent
+functionality from our applications, in order to be able to easily
+swap in alternate localizations.
+
This in turn will reduce redundant, costly, and error prone
+rework when targeting the toolkit or applications built with the
+toolkit to another locale.
+
The cost of porting the ACS to another locale without some kind
+of globalization support would be large and ongoing, since without
+a mechanism to incorporate the locale-specific changes cleanly back
+into the code base, it would require making a new fork of the
+source code for each locale.
+
III. System/Application Overview
+A globalized application will perform some or all of the following
+steps to handle a page request for a specific locale:
+
+
Decide what the target locale is for an incoming page
+request
+
Decide which character set encoding the output should be
+delivered in
+
If a script file to handle the request needs to be loaded from
+disk, determine if a character set conversion needs to be performed
+when loading the script
+
+
If needed, locale-specific resources are fetched. These can
+include text, graphics, or other resources that would vary with the
+target locale.
+
If content data is fetched from the database, check for
+locale-specific versions of the data (e.g. country names).
+
Source code should use a message catalog API to translate
+constant strings in the code to the target locale
+
Perform locale-specific linguistic sorting on data if
+needed
+
If the user submitted form input data, decide what character
+set encoding conversion if any is needed. Parse locale-specific
+quantities if needed (number formats, date formats).
+
If templating is being used, select correct locale-specific
+template to merge with content
+
Format output data quantities in locale-specific manner (date,
+time, numeric, currency). If templating is being used, this may be
+done either before and/or after merging the data with a
+template.
+
+
Since the internationalization APIs may potentially be used on
+every page in an application, the overhead for adding
+internationalization to a module or application must not cause a
+significant time delay in handling page requests.
+
In many cases there are facilities in Oracle to perform various
+localization functions, and also there are facilities in Java which
+we will want to move to. So the design to meet the requirements
+will tend to rely on these capabilities, or close approximations to
+them where possible, in order to make it easier to maintain Tcl and
+Java ACS versions.
+
IV. Use-cases and User-scenarios
+Here are the cases that we need to be able to handle efficiently:
+
+
A developer needs to author a web site/application in a
+language besides English, and possibly a character set besides
+ISO-8859-1. This includes the operation of the ACS itself, i.e.,
+navigation, admin pages for modules, error messages, as well as
+additional modules or content supplied by the web site developer.
+
What do they need to modify to make this work? Can their
+localization work be easily folded in to future releases of
+ACS?
+
+
A developer needs to author a web site which operates in
+multiple languages simultaneously. For example, arsDigita.com with
+content and navigation in English, German, and Japanese.
+
The site would have an end-user visible UI to support these
+languages, and the content management system must allow articles to
+be posted in these languages. In some cases it may be necessary to
+make the modules' admin UI's operate in more than one supported
+language, while in other cases the backend admin interface can
+operate in a single language.
+
+
A developer is writing a new module, and wants to make it easy
+for someone to localize it. There should be a clear path to author
+the module so that future developers can easily add support for
+other locales. This would include support for creating resources
+such as message catalogs, non-text assets such as graphics, and use
+of templates which help to separate application logic from
+presentation.
+
+
Competitive Analysis
+
Other application servers: ATG Dyanmo, Broadvision, Vignette,
+... ? Anyone know how they deal with i18n ?
+
V. Related Links
+
+
System/Package "coversheet" - where all documentation for
+this software is linked off of
+Because the requirements for globalization affect many areas of the
+system, we will break up the requirements into phases, with a base
+required set of features, and then stages of increasing
+functionality.
+
VI.A Locales
+10.0 A standard representation of locale will be used
+throughout the system. A locale refers to a language and territory,
+and is uniquely identified by a combination of ISO language and ISO
+country abbreviations.
+
10.10 Provide a consistent representation and API for
+creating and referencing a locale
+
10.20 There will be a Tcl library of locale-aware
+formatting and parsing functions for numbers, dates and times.
+Note that Java has builtin support for these already.
+
10.30 For each locale there will be default date, number
+and currency formats.
+
+
VI.B Associating a Locale with a Request
+20.0 The request processor must have a mechanism for
+associating a locale with each request. This locale is then used to
+select the appropriate template for a request, and will also be
+passed as the locale argument to the message catalog or
+locale-specific formatting functions.
+
+
20.10 The locale for a request should be computed by the
+following method, in descending order of priority:
+
+
get locale associated with subsite or package id
+
get locale from user preference
+
get locale from site wide default
+
20.20 An API will be provided for getting the current
+request locale from the ad_conn structure.
+
+
+
+
VI.C Resource Bundles / Content Repository
+30.0 A mechanism must be provided for a developer to group a
+set of arbitrary content resources together, keyed by a unique
+identifier and a locale.
+
For example, what approaches could be used to implement a
+localizable nav-bar mechanism for a site? A navigation bar might be
+made up of a set of text strings and graphics, where the graphics
+themselves are locale-specific, such as images of English or
+Japanese text (as on www.arsdigita.com). It should be easy to
+specify alternate configurations of text and graphics to lay out
+the page for different locales.
+
Design note: Alternative mechanisms to implement this
+functionality might include using templates, Java ResourceBundles,
+content-item containers in the Content Repository, or some
+convention assigning a common prefix to key strings in the message
+catalog.
+
VI.D Message Catalog for String Translation
+40.0 A message catalog facility will provide a database of
+translations for constant strings for multilingual applications. It
+must support the following:
+
+
40.10 Each message will referenced via unique a key.
+
40.20 The key for a message will have some hierarchical
+structure to it, so that sets of messages can be grouped with
+respect to a module name or package path.
+
40.30 The API for lookup of a message will take a locale
+and message key as arguments, and return the appropriate
+translation of that message for the specifed locale.
+
40.40 The API for lookup of a message will accept an
+optional default string which can be used if the message key is not
+found in the catalog. This lets the developer get code working and
+tested in a single language before having to initialize or update a
+message catalog.
+
40.50 For use within templates, custom tags which invoke
+the message lookup API will be provided.
+
40.60 Provide a method for importing and exporting a flat
+file of translation strings, in order to make it as easy as
+possible to create and modify message translations in bulk without
+having to use a web interface.
+
40.70 Since translations may be in different character
+sets, there must be provision for writing and reading catalog files
+in different character sets. A mechanism must exist for identifying
+the character set of a catalog file before reading it.
+
40.80 There should be a mechanism for tracking
+dependencies in the message catalog, so that if a string is
+modified, the other translations of that string can be flagged as
+needing update.
+
40.90 The message lookup must be as efficient as possible
+so as not to slow down the delivery of pages.
+
+Design question: Is there any reason to
+implement the message catalog on top of the content repository as
+the underlying storage and retrieval service, with a layer of
+caching for performance? Would we get a nice user interface and
+version control almost for free?
+
+
VI.E Character Set Encoding
+Character Sets
+
50.0 A locale will have a primary associated character
+set which is used to encode text in the language. When given a
+locale, we can query the system for the associated character set to
+use.
+
The assumption is that we are going to use Unicode in our
+database to hold all text data. Our current programming
+environments (Tcl/Oracle or Java/Oracle) operate on Unicode data
+internally. However, since Unicode is not yet commonly used in
+browsers and authoring tools, the system must be able to read and
+write other character sets. In particular, conversions to and from
+Unicode will need to be explicitly performed at the following
+times:
+
+
Loading source files (.tcl or .adp) or content files from the
+filesystem
+
Accepting form input data from users
+
Delivering text output to a browser
+
Composing an email message
+
Writing data to the filesystem
+
+
+Design question: Do we want to mandate
+that all template files be stored in UTF8? I don't think so,
+because most people don't have Unicode editors, or don't want to be
+bothered with an extra step to convert files to UTF8 and back when
+editing them in their favorite editor.
+
Same question for script and template
+files, how do we know what language and character set they are
+authored in? Should we overload the filename suffix (e.g.,
+'.shiftjis.adp', '.ja_JP.euc.adp')?
+
The simplest design is probably just to
+assign a default mapping from each locale to character a set: e.g.
+ja_JP -> ShiftJIS, fr_FR -> ISO-8859-1. +++ (see new ACS/Java
+notes) +++
+
+
Tcl Source File Character Set
+There are two classes of Tcl files loaded by the system; library
+files loaded at server startup, and page script files, which are
+run on each page request.
+
+Should we require all Tcl files be stored
+as UTF8? That seems too much of a burden on
+developers.
+
50.10 Tcl library files can be authored in any character
+set. The system must have a way to determine the character set
+before loading the files, probably from the filename.
+
50.20 Tcl page script files can be authored in any
+character set. The system must have a way to determine the
+character set before loading the files, probably from the
+filename.
+
Submitted Form Data Character Set
+50.30 Data which is submitted with a HTTP request using a
+GET or POST method may be in any character set. The system must be
+able to determine the encoding of the form data and convert it to
+Unicode on demand.
+
50.35 The developer must be able to override the default
+system choice of character set when parsing and validating user
+form data.
+
50.30.10 Extra hair: In Japan and some other Asian
+languages where there are multiple character set encodings in
+common use, the server may need to attempt to do an auto-detection
+of the character set, because buggy browsers may submit form data
+in an unexpected alternate encoding.
+
Output Character Set
+50.40 The output character set for a page request will be
+determined by default by the locale associated with the request
+(see requirement 20.0).
+
50.50 It must be possible for a developer to manually
+override the output character set encoding for a request using an
+API function.
+
+
VI.F ACS Kernel Issues
+
60.10 All ACS error messages must use the
+message catalog and the request locale to generate error message
+for the appropriate locale.
+
60.20 Web server error messages such as 404, 500, etc
+must also be delivered in the appropriate locale.
+
60.30 Where files are written or read from disk, their
+filenames must use a character set and character values which are
+safe for the underlying operating system.
+
+
VI.G Templates
+
70.0 For a given abstract URL, the designer may
+create multiple locale-specific template files may be created (one
+per locale or language)
+
70.10 For a given page request, the system must be able
+to select an approprate locale-specific template file to use. The
+request locale is computed as per (see requirement 20.0).
+
Design note: this would probably be
+implemented by suffixing the locale or a locale abbreviation to the
+template filename, such as foo.ja.adp or
+foo.en_GB.adp.
+
70.20A template file may be created for a partial locale
+(language only, without a territory), and the request processor
+should be able to find the closest match for the current request
+locale.
+
70.30 A template file may be created in any character
+set. The system must have a way to know which character set a
+template file contains, so it can properly process it.
+
Formatting Datasource Output in Templates
+70.50 The properties of a datasource column may include a
+datatype so that the templating system can format the output for
+the current locale. The datatype is defined by a standard ACS
+datatype plus a format token or format string, for example: a date
+column might be specified as 'current_date:date LONG,' or
+'current_date:date "YYYY-Mon-DD"'
+
Forms
+70.60 The forms API must support construction of
+locale-specific HTML form widgets, such as date entry widgets, and
+form validation of user input data for locale-specific data, such
+as dates or numbers.
+
70.70 For forms which allow users to upload files, a
+standard method for a user to indicate the charset of a text file
+being uploaded must be provided.
+
Design note: this presumably applies to
+uploading data to the content repository as well
+
+
VI.H Sorting and Searching
+
80.10 Support API for correct collation (sorting
+order) on lists of strings in locale-dependent way.
+
80.20 For the Tcl API, we will say that locale-dependent
+sorting will use Oracle SQL operations (i.e., we won't provide a
+Tcl API for this). We require a Tcl API function to return the
+correct incantation of NLS_SORT to use for a given locale with
+ORDER BY clauses in queries.
+
80.40 The system must handle full-text search in any
+supported language.
+
+
VI.G Time Zones
+
90.10 Provide API support for specifying a time
+zone
+
90.20 Provide an API for computing time and date
+operations which are aware of timezones. So for example a calendar
+module can properly synchronize items inserted into a calendar from
+users in different time zones using their own local times.
+
90.30 Store all dates and times in universal time zone,
+UTC.
+
90.40 For a registered users, a time zone preference
+should be stored.
+
90.50 For a non-registered user a time zone preference
+should be attached via a session or else UTC should be used to
+display every date and time.
+
90.60 The default if we can't determine a time zone is to
+display all dates and times in some universal time zone such as
+GMT.
+
+
VI.H Database
+
+
100.10 Since UTF8 strings can use up to three (UCS2) or
+six (UCS4) bytes per character, make sure that column size
+declarations in the schema are large enough to accomodate required
+data (such as email addresses in Japanese).
+
+
VI.I Email and Messaging
+When sending an email message, just as when delivering the content
+in web page over an HTTP connection, it is necessary to be able to
+specify what character set encoding to use.
+
+
110.10 The email message sending API will allow for a
+character set encoding to be specified.
+
110.20 The email accepting API will allow for character
+set to be parsed correctly (hopefully a well formatted message will
+have a MIME character set content type header)
+
+
Implementation Notes
+Because globalization touches many different parts of the system,
+we want to reduce the implementation risk by breaking the
+implementation into phases.
+