Index: Makefile.in =================================================================== diff -u -r01876262f15dc573d11e9f654417ad31ae604a98 -rb689afd2df2077ab7d033a0a411808fef36149b1 --- Makefile.in (.../Makefile.in) (revision 01876262f15dc573d11e9f654417ad31ae604a98) +++ Makefile.in (.../Makefile.in) (revision b689afd2df2077ab7d033a0a411808fef36149b1) @@ -257,6 +257,7 @@ $(src_doc_dir)/example-scripts/tk-spread.html \ $(src_doc_dir)/example-scripts/traits-composite.html \ $(src_doc_dir)/example-scripts/traits-simple.html \ + $(src_doc_dir)/example-scripts/rosetta-tokenizer.html %.html : %.tcl Index: TODO =================================================================== diff -u -r9422ecb735d83f418fe31e9537252981297aada5 -rb689afd2df2077ab7d033a0a411808fef36149b1 --- TODO (.../TODO) (revision 9422ecb735d83f418fe31e9537252981297aada5) +++ TODO (.../TODO) (revision b689afd2df2077ab7d033a0a411808fef36149b1) @@ -5835,6 +5835,8 @@ (which has been effectively replaced by performed by ::nsf::parameter::filter at the script level). +- Added Rosetta example: https://rosettacode.org/wiki/Tokenize_a_string_with_escaping + ======================================================================== TODO: @@ -5848,7 +5850,6 @@ https://rosettacode.org/wiki/Polymorphic_copy#Tcl (more substantial) - https://rosettacode.org/wiki/Tokenize_a_string_with_escaping https://rosettacode.org/wiki/Tree_traversal https://rosettacode.org/wiki/Active_object Index: doc/example-scripts/rosetta-tokenizer.html =================================================================== diff -u --- doc/example-scripts/rosetta-tokenizer.html (revision 0) +++ doc/example-scripts/rosetta-tokenizer.html (revision b689afd2df2077ab7d033a0a411808fef36149b1) @@ -0,0 +1,850 @@ + + + + + +Listing of doc/example-scripts/rosetta-tokenizer.tcl + + + + + +
+
+
+

Assumes Tcl 8.6 (couroutine support)

+
+
+
if {[catch {package req Tcl 8.6}]} return
+
+
+
+

Rosetta example: Tokenize a string with escaping

+
+

Write a class which allows for splitting a string at each non-escaped +occurrence of a separator character.

+ +
+
+
package req nx
+
+nx::Class create Tokenizer {
+    :property s:required
+    :method init {} {
+        :require namespace
+        set coro [coroutine [current]::nextCoro [current] iter ${:s}]
+        :public object forward next $coro
+    }
+    :public method iter {s} {
+        yield [info coroutine]
+        for {set i 0} {$i < [string length $s]} {incr i} {
+            yield [string index $s $i]
+        }
+        return -code break
+    }
+    :public object method tokenize {{-sep |} {-escape ^} s} {
+        set t [[current] new -s $s]
+        set part ""
+        set parts [list]
+        while {1} {
+            set c [$t next]
+            if {$c eq $escape} {
+                append part [$t next]
+            } elseif {$c eq $sep} {
+                lappend parts $part
+                set part ""
+            } else {
+                append part $c
+            }
+        }
+        lappend parts $part
+        return $parts
+    }
+}
+

Run some tests incl. the escape character:

+
+
+
% Tokenizer tokenize -sep | -escape ^ ^|
+|
+% Tokenizer tokenize -sep | -escape ^ ^|^|
+||
+% Tokenizer tokenize -sep | -escape ^ ^^^|
+^|
+% Tokenizer tokenize -sep | -escape ^ |
+{} {}
+

Test for the output required by the Rosetta example:

+
+
+
% Tokenizer tokenize -sep | -escape ^ one^|uno||three^^^^|four^^^|^cuatro|
+one|uno {} three^^ four^|cuatro {}
+
+
+
+

+ + + Index: doc/example-scripts/rosetta-tokenizer.tcl =================================================================== diff -u --- doc/example-scripts/rosetta-tokenizer.tcl (revision 0) +++ doc/example-scripts/rosetta-tokenizer.tcl (revision b689afd2df2077ab7d033a0a411808fef36149b1) @@ -0,0 +1,59 @@ +# Assumes Tcl 8.6 (couroutine support) +if {[catch {package req Tcl 8.6}]} return + +# +# == Rosetta example: Tokenize a string with escaping +# +# +# Write a class which allows for splitting a string at each non-escaped +# occurrence of a separator character. +# +# See https://rosettacode.org/wiki/Tokenize_a_string_with_escaping +# + +package req nx +package req nx::test + +nx::Class create Tokenizer { + :property s:required + :method init {} { + :require namespace + set coro [coroutine [current]::nextCoro [current] iter ${:s}] + :public object forward next $coro + } + :public method iter {s} { + yield [info coroutine] + for {set i 0} {$i < [string length $s]} {incr i} { + yield [string index $s $i] + } + return -code break + } + :public object method tokenize {{-sep |} {-escape ^} s} { + set t [[current] new -s $s] + set part "" + set parts [list] + while {1} { + set c [$t next] + if {$c eq $escape} { + append part [$t next] + } elseif {$c eq $sep} { + lappend parts $part + set part "" + } else { + append part $c + } + } + lappend parts $part + return $parts + } +} + +# Run some tests incl. the escape character: + +? {Tokenizer tokenize -sep | -escape ^ ^|} {|} +? {Tokenizer tokenize -sep | -escape ^ ^|^|} {||} +? {Tokenizer tokenize -sep | -escape ^ ^^^|} {^|} +? {Tokenizer tokenize -sep | -escape ^ |} {{} {}} + +# Test for the output required by the Rosetta example: +? {Tokenizer tokenize -sep | -escape ^ one^|uno||three^^^^|four^^^|^cuatro|} {one|uno {} three^^ four^|cuatro {}}