Index: openacs-4/etc/keepalive/keepalive-config.tcl =================================================================== RCS file: /usr/local/cvsroot/openacs-4/etc/keepalive/keepalive-config.tcl,v diff -u -N -r1.1 -r1.2 --- openacs-4/etc/keepalive/keepalive-config.tcl 12 Feb 2004 12:28:55 -0000 1.1 +++ openacs-4/etc/keepalive/keepalive-config.tcl 28 Apr 2004 15:33:47 -0000 1.2 @@ -3,7 +3,10 @@ # @author Peter Marklund # The servers_to_monitor variable should be a flat list with URLs to monitor -# on even indices and the commands to execute if the servers don't respond +# on even indices and the commands to execute if the server doesn't respond # on odd indices, like this: # {server_url1 restart_command1 server_url2 restart_command2 ...} set servers_to_monitor {} + +# How long the keepalive script waits until it attempts another restart +set seconds_between_restarts [expr 10*60] Index: openacs-4/etc/keepalive/keepalive.sh =================================================================== RCS file: /usr/local/cvsroot/openacs-4/etc/keepalive/keepalive.sh,v diff -u -N -r1.1 -r1.2 --- openacs-4/etc/keepalive/keepalive.sh 12 Feb 2004 12:28:55 -0000 1.1 +++ openacs-4/etc/keepalive/keepalive.sh 28 Apr 2004 15:33:47 -0000 1.2 @@ -16,6 +16,17 @@ source $script_dir/keepalive-config.tcl +global restart_time_file +set restart_time_file $script_dir/last-restart-time + +proc read_file { file_path } { + set file_id [open $file_path r] + set file_contents [read $file_id] + close $file_id + + return $file_contents +} + proc server_responds_p { server_url } { set script_dir [file dirname [info script]] @@ -24,34 +35,71 @@ file delete -force $wget_file } - catch {exec wget --tries=5 ${server_url}/SYSTEM/dbtest} + if { [catch {exec /usr/local/bin/wget --timeout 6 --output-document $wget_file --tries=3 ${server_url}/SYSTEM/dbtest} errmsg] } { + #puts "wget threw error $errmsg" + } if { [file exists $wget_file] } { - set wget_file_id [open $wget_file r] - set wget_file_contents [read $wget_file_id] - close $wget_file_id - if { [regexp {success} $wget_file_contents] } { - set responds_p 1 - } else { - set responds_p 0 - } - } else { - set responds_p 0 - } + set wget_file_contents [read_file $wget_file] + + if { [regexp -nocase {^\s*success\s*$} $wget_file_contents] } { + set responds_p 1 + } else { + set responds_p 0 + } + } else { + set responds_p 0 + } - return $responds_p + return $responds_p } +proc waiting_for_restart_p { seconds_between_restarts } { + + global restart_time_file + + if { [file exists $restart_time_file] } { + set last_restart_time [string trim [read_file $restart_time_file]] + set current_time [clock seconds] + set time_since_restart [expr $current_time - $last_restart_time] + + if { [expr $time_since_restart > $seconds_between_restarts] } { + return 0 + } else { + return 1 + } + } else { + # This is the first restart + return 0 + } +} + +proc record_current_time { file } { + set fd [open $file w] + puts $fd [clock seconds] + close $fd +} + foreach {server_url restart_command} $servers_to_monitor { - puts -nonewline "Checking server at $server_url - " + #puts -nonewline "Checking server at $server_url - " if { [server_responds_p $server_url] } { - puts "server responds." + #puts "server responds." } else { - puts -nonewline "no response. " - puts "Executing command \"$restart_command\" to restart server." - if { [catch {eval exec $restart_command} errmsg] } { - puts "Error executing restart_command: $errmsg" - } + #puts -nonewline "no response. " + + # Only restart server if we didn't recently restart it + if { ![waiting_for_restart_p $seconds_between_restarts] } { + puts "Executing command \"$restart_command\" to restart server at $server_url." + if { [catch {eval exec $restart_command} errmsg] } { + puts "Error executing restart_command: $errmsg" + } + + # Record new restart time + global restart_time_file + record_current_time $restart_time_file + } else { + puts "Server at $server_url has been restarted within last $seconds_between_restarts seconds so not restarting yet" + } } }