Index: openacs-4/etc/install/tcl/twt-procs.tcl
===================================================================
RCS file: /usr/local/cvsroot/openacs-4/etc/install/tcl/twt-procs.tcl,v
diff -u -N -r1.10 -r1.11
--- openacs-4/etc/install/tcl/twt-procs.tcl	4 Nov 2003 14:39:05 -0000	1.10
+++ openacs-4/etc/install/tcl/twt-procs.tcl	18 Nov 2003 11:07:23 -0000	1.11
@@ -175,46 +175,77 @@
     puts $file_id "[response body]"
 }
 
-ad_proc ::twt::crawl_links {} {
-    TODO: This proc currently doesn't crawl nearly as many links as we would like
+ad_proc ::twt::crawl_links { 
+    {-max_requests 2000}
+    start_url 
 } {
+    Crawl links recursively under the given
+    url. Will only visit links Does not visit links with external URLs (outside the server).
 
-    global __url_history
+    @param max_requests The maximum number of links that the proc will crawl
+    @param start_url    The url to start crawling from
 
-    set start_url [lindex $__url_history end]
-
+    @author Peter Marklund
+} {
     # Return if given start URL is external
-    global __server_url
-    set absolute_url [tclwebtest::absolute_link $start_url]
-    if { [string first $__server_url $absolute_url] == -1 } {
+    set server_url [::twt::config::server_url]
+    set start_url_absolute [tclwebtest::absolute_link $start_url]
+    if { [string first $server_url $start_url_absolute] == -1 } {
+        #::twt::log "pm debug returning because link $start_url_absolute is external"
         return
     }
 
     # Also return if this is the logout link
     if { [regexp {/register/logout} $start_url match] } {
+        #::twt::log "pm debug returning because link $start_url_absolute is logout"
         return
     }
 
-    ::twt::do_request $start_url
+    global __url_history
 
-    set errno 0
-    while { $errno == "0" } {
-	set errno [catch {
-            array set link_array [link find -next]} error]
+    # Return if maximum number of links is exceeded
+    if { [llength $__url_history] > $max_requests } {
+        ::twt::log "[::twt::config::alert_keyword] - maximum number of links exceeded, not following link to $start_url_absolute"
+        return
+    }
 
-         if { [string equal $errno "0"] } {
-            set url $link_array(url)
+    lappend __url_history $start_url_absolute
 
-            # Don't revisit URL:s we have already tested
-            # Don't follow relative anchors on pages - can't get them to work with TclWebtest
-            if { [lsearch -exact $__url_history $url] == -1 && [string range $url 0 0] != "#" } {
-                lappend __url_history $url
+    # Request the page
+    if { [catch {::twt::do_request $start_url_absolute} errmsg] } {
+        ::twt::log "[::twt::config::alert_keyword] - requesting url $start_url_absolute failed. Response status is [response status] and error is $errmsg"
+        return
+    }
 
-                crawl_links
-            } else {
-            }
-         }
-   }
+    # Get all links on the page
+    if { [catch {set all_links [link all]} errmsg] } {
+        #::twt::log "pm debug could not get links for url $start_url_absolute : $errmsg"
+        return
+    }
+
+    # Loop over and recurse on each appropriate link
+    foreach link_list $all_links {
+        array set link $link_list
+        set url $link(url)
+        set absolute_url [tclwebtest::absolute_link $url]
+        #::twt::log "pm debug looping with url $absolute_url"
+
+        # Don't revisit URL:s we have already tested
+        # Don't follow relative anchors on pages - can't get them to work with TclWebtest
+        set new_url_p [expr [lsearch -exact $__url_history $absolute_url] == -1]
+        if { [string range $url 0 0] == "#" } {
+            set anchor_link_p 1
+        } else {
+            set anchor_link_p 0
+        }
+        set under_start_url_p [expr [string first $start_url_absolute $absolute_url] != -1]
+        if { $new_url_p && !$anchor_link_p && $under_start_url_p } {
+            #::twt::log "pm debug appending to url history: $url and invoking crawl_links again" 
+            crawl_links $url
+        } else {
+            #::twt::log "pm debug skipping url $url $new_url_p $anchor_link_p $under_start_url_p"
+        }
+    }
 }
 
 ad_proc ::twt::multiple_select_value { name value } {