Mike Gerwitz

Activist for User Freedom

aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorMike Gerwitz <mike.gerwitz@rtspecialty.com>2018-10-16 08:53:04 -0400
committerMike Gerwitz <mike.gerwitz@rtspecialty.com>2018-10-16 08:53:04 -0400
commitdb1c03dfd9c26150ad1faa0392c482c2ac682856 (patch)
tree56e456d19f56c2c93d1042ac29f4ad12d8a4b2ad
parent5679be281ae7fcf1abbbcab10ca5c9b235e353e3 (diff)
downloadtame-db1c03dfd9c26150ad1faa0392c482c2ac682856.tar.gz
tame-db1c03dfd9c26150ad1faa0392c482c2ac682856.tar.bz2
tame-db1c03dfd9c26150ad1faa0392c482c2ac682856.zip
tame{,d}: Reload runner when unresponsivev3.3.3
This tries to be a bit more resilient in case a runner becomes unresponsive, rather than waiting for tamed to kill itself. * bin/tame (RUNNER_CMD_WAITTIME): New variable. (command-runner): Tell runner to reload if it does not respond in RUNNER_CMD_WAITTIME seconds. (verify-runner-ack): New function. * bin/tamed (mkfifos): Only keep stdin open. stdout isn't necessary, and may have actually been causing subtle issues. (spawn-runner): Support restarting dslc on SIGHUP.
-rwxr-xr-xbin/tame34
-rwxr-xr-xbin/tamed15
2 files changed, 43 insertions, 6 deletions
diff --git a/bin/tame b/bin/tame
index ecb2bed..e7a5e99 100755
--- a/bin/tame
+++ b/bin/tame
@@ -24,6 +24,10 @@ declare -r mypath=$( dirname "$( readlink -f "$0" )" )
declare -ri EX_NOTAMED=1 # tried to start tamed but failed
declare -ri EX_USAGE=64 # incorrect usage; sysexits.h
+# maximum amount of time in seconds to wait for runner to ack
+# before forcibly restarting it
+declare -ri RUNNER_CMD_WAITTIME=3
+
# Send a single command to a runner and observe the result
#
@@ -48,7 +52,18 @@ command-runner()
trap 'kill -TERM $pid &>/dev/null' INT TERM
# all remaining arguments are passed to the runner
- echo "$@" > "$base/0"
+ echo "$*" > "$base/0"
+
+ # we should immediately get a response from the runner;
+ # if not, then it may have stalled for some reason
+ verify-runner-ack "$*" < "$base/1" || {
+ echo "warning: failed runner $id ack; requesting reload" >&2
+ kill -HUP "$pid"
+ sleep "$RUNNER_CMD_WAITTIME"
+
+ # try once more
+ verify-runner-ack "$*" < "$base/1" || exit
+ }
# output lines from runner until we reach a line stating "DONE"
while read line; do
@@ -85,6 +100,23 @@ verify-runner()
}
+# Wait for command acknowledgment from runner
+#
+# The runner must respond within RUNNER_CMD_WAITTIME seconds
+# and must echo back the command that was given. Otherwise,
+# this function returns with a non-zero status.
+verify-runner-ack()
+{
+ local -r cmd="${1?Missing command}"
+
+ read -t"$RUNNER_CMD_WAITTIME" -r ack || return
+ test "COMMAND $cmd" == "$ack" || {
+ # TODO check for ack mismatch once output race condition is fixed
+ :
+ }
+}
+
+
# Wait somewhat impatiently for tamed
#
# Assumes that tamed's runner 0 is running once the pidfile becomes
diff --git a/bin/tamed b/bin/tamed
index 25b2077..3b8b48b 100755
--- a/bin/tamed
+++ b/bin/tamed
@@ -53,10 +53,10 @@ mkfifos()
echo "fatal: failed to create FIFO at $in"
exit $EX_CANTCREAT
}
-
- # keep FIFOs open so we don't get EOF from writers
- tail -f >"$root/$n" &
done
+
+ # keep FIFOs open so we don't get EOF from writers
+ tail -f >"$root/0" &
}
@@ -80,8 +80,13 @@ spawn-runner()
# loop to restart runner in case of crash
while true; do
- "$mypath/dslc" < "$base/0" &> "$base/1"
- echo "warning: runner $id exited with code ${PIPESTATUS[0]}; restarting" >&2
+ declare -i job=0
+ trap 'kill -INT $job' HUP
+ "$mypath/dslc" < "$base/0" &> "$base/1" & job=$!
+
+ declare -i status=0
+ wait -n 2>/dev/null || status=$?
+ echo "warning: runner $id exited with code $status; restarting" >&2
done &
echo "$!" > "$base/pid"