Run processes without screen

This introduces new run_process() and screen_service() functions and sets the
groundwork to change how DevStack starts services.  screen_service() is simply a
direct call to the screen portion of the old screen_it() function and is intended
to run commands that only need to run under screen, such as log file watchers.

run_process() is a replacement for screen_it() (which remains until all of the
services are updated).  The usage is similar but requires updates to every current
screen_it() call to remove everything that requires the command to be interpreted
by a shell.

The old run_process() and _run_process() functions are still present as
old_run_process() and _old_run_process() to support the deprecated screen_it()
function.  These will all go away in the future once all services have been
confirmed to have been changed over.

There is a similar new set of stop process functions stop_process() and
screen_stop_service().  The old screen_stop() will also remain for the deprecation
period.

As an initial test/demostration this review also includes the changes for
lib/cinder to demonstrate what is required for every service.

I included the scripts I used to test this; tests/fake-service.sh and
tests/run-process.sh are quite rough around the edges and may bite.  They should
mature into productive members of the testing ecosystem someday.

Change-Id: I03322bf0208353ebd267811735c66f13a516637b
diff --git a/functions-common b/functions-common
index 3d66a0b..bf9447c 100644
--- a/functions-common
+++ b/functions-common
@@ -1135,8 +1135,8 @@
 # fork.  It includes the dirty work of closing extra filehandles and preparing log
 # files to produce the same logs as screen_it().  The log filename is derived
 # from the service name and global-and-now-misnamed ``SCREEN_LOGDIR``
-# Uses globals ``CURRENT_LOG_TIME``, ``SCREEN_LOGDIR``
-# _run_process service "command-line"
+# Uses globals ``CURRENT_LOG_TIME``, ``SCREEN_LOGDIR``, ``SCREEN_NAME``, ``SERVICE_DIR``
+# _old_run_process service "command-line"
 function _run_process {
     local service=$1
     local command="$2"
@@ -1155,8 +1155,12 @@
         export PYTHONUNBUFFERED=1
     fi
 
-    exec /bin/bash -c "$command"
-    die "$service exec failure: $command"
+    # Run under ``setsid`` to force the process to become a session and group leader.
+    # The pid saved can be used with pkill -g to get the entire process group.
+    setsid $command & echo $! >$SERVICE_DIR/$SCREEN_NAME/$1.pid
+
+    # Just silently exit this process
+    exit 0
 }
 
 # Helper to remove the ``*.failure`` files under ``$SERVICE_DIR/$SCREEN_NAME``.
@@ -1184,61 +1188,63 @@
     return $exitcode
 }
 
-# run_process() launches a child process that closes all file descriptors and
-# then exec's the passed in command.  This is meant to duplicate the semantics
-# of screen_it() without screen.  PIDs are written to
-# ``$SERVICE_DIR/$SCREEN_NAME/$service.pid``
+# Run a single service under screen or directly
+# If the command includes shell metachatacters (;<>*) it must be run using a shell
 # run_process service "command-line"
 function run_process {
     local service=$1
     local command="$2"
 
-    # Spawn the child process
-    _run_process "$service" "$command" &
-    echo $!
+    if is_service_enabled $service; then
+        if [[ "$USE_SCREEN" = "True" ]]; then
+            screen_service "$service" "$command"
+        else
+            # Spawn directly without screen
+            _run_process "$service" "$command" &
+        fi
+    fi
 }
 
 # Helper to launch a service in a named screen
 # Uses globals ``CURRENT_LOG_TIME``, ``SCREEN_NAME``, ``SCREEN_LOGDIR``,
 # ``SERVICE_DIR``, ``USE_SCREEN``
-# screen_it service "command-line"
-function screen_it {
+# screen_service service "command-line"
+# Run a command in a shell in a screen window
+function screen_service {
+    local service=$1
+    local command="$2"
+
     SCREEN_NAME=${SCREEN_NAME:-stack}
     SERVICE_DIR=${SERVICE_DIR:-${DEST}/status}
     USE_SCREEN=$(trueorfalse True $USE_SCREEN)
 
-    if is_service_enabled $1; then
+    if is_service_enabled $service; then
         # Append the service to the screen rc file
-        screen_rc "$1" "$2"
+        screen_rc "$service" "$command"
 
-        if [[ "$USE_SCREEN" = "True" ]]; then
-            screen -S $SCREEN_NAME -X screen -t $1
+        screen -S $SCREEN_NAME -X screen -t $service
 
-            if [[ -n ${SCREEN_LOGDIR} ]]; then
-                screen -S $SCREEN_NAME -p $1 -X logfile ${SCREEN_LOGDIR}/screen-${1}.${CURRENT_LOG_TIME}.log
-                screen -S $SCREEN_NAME -p $1 -X log on
-                ln -sf ${SCREEN_LOGDIR}/screen-${1}.${CURRENT_LOG_TIME}.log ${SCREEN_LOGDIR}/screen-${1}.log
-            fi
-
-            # sleep to allow bash to be ready to be send the command - we are
-            # creating a new window in screen and then sends characters, so if
-            # bash isn't running by the time we send the command, nothing happens
-            sleep 3
-
-            NL=`echo -ne '\015'`
-            # This fun command does the following:
-            # - the passed server command is backgrounded
-            # - the pid of the background process is saved in the usual place
-            # - the server process is brought back to the foreground
-            # - if the server process exits prematurely the fg command errors
-            #   and a message is written to stdout and the service failure file
-            # The pid saved can be used in screen_stop() as a process group
-            # id to kill off all child processes
-            screen -S $SCREEN_NAME -p $1 -X stuff "$2 & echo \$! >$SERVICE_DIR/$SCREEN_NAME/$1.pid; fg || echo \"$1 failed to start\" | tee \"$SERVICE_DIR/$SCREEN_NAME/$1.failure\"$NL"
-        else
-            # Spawn directly without screen
-            run_process "$1" "$2" >$SERVICE_DIR/$SCREEN_NAME/$1.pid
+        if [[ -n ${SCREEN_LOGDIR} ]]; then
+            screen -S $SCREEN_NAME -p $service -X logfile ${SCREEN_LOGDIR}/screen-${service}.${CURRENT_LOG_TIME}.log
+            screen -S $SCREEN_NAME -p $service -X log on
+            ln -sf ${SCREEN_LOGDIR}/screen-${service}.${CURRENT_LOG_TIME}.log ${SCREEN_LOGDIR}/screen-${service}.log
         fi
+
+        # sleep to allow bash to be ready to be send the command - we are
+        # creating a new window in screen and then sends characters, so if
+        # bash isn't running by the time we send the command, nothing happens
+        sleep 3
+
+        NL=`echo -ne '\015'`
+        # This fun command does the following:
+        # - the passed server command is backgrounded
+        # - the pid of the background process is saved in the usual place
+        # - the server process is brought back to the foreground
+        # - if the server process exits prematurely the fg command errors
+        #   and a message is written to stdout and the service failure file
+        # The pid saved can be used in screen_stop() as a process group
+        # id to kill off all child processes
+        screen -S $SCREEN_NAME -p $service -X stuff "$command & echo \$! >$SERVICE_DIR/$SCREEN_NAME/${service}.pid; fg || echo \"$service failed to start\" | tee \"$SERVICE_DIR/$SCREEN_NAME/${service}.failure\"$NL"
     fi
 }
 
@@ -1276,20 +1282,40 @@
 # that did not leave a PID behind
 # Uses globals ``SCREEN_NAME``, ``SERVICE_DIR``, ``USE_SCREEN``
 # screen_stop service
-function screen_stop {
+function screen_stop_service {
+    local service=$1
+
     SCREEN_NAME=${SCREEN_NAME:-stack}
     SERVICE_DIR=${SERVICE_DIR:-${DEST}/status}
     USE_SCREEN=$(trueorfalse True $USE_SCREEN)
 
-    if is_service_enabled $1; then
+    if is_service_enabled $service; then
+        # Clean up the screen window
+        screen -S $SCREEN_NAME -p $service -X kill
+    fi
+}
+
+# Stop a service process
+# If a PID is available use it, kill the whole process group via TERM
+# If screen is being used kill the screen window; this will catch processes
+# that did not leave a PID behind
+# Uses globals ``SERVICE_DIR``, ``USE_SCREEN``
+# stop_process service
+function stop_process {
+    local service=$1
+
+    SERVICE_DIR=${SERVICE_DIR:-${DEST}/status}
+    USE_SCREEN=$(trueorfalse True $USE_SCREEN)
+
+    if is_service_enabled $service; then
         # Kill via pid if we have one available
-        if [[ -r $SERVICE_DIR/$SCREEN_NAME/$1.pid ]]; then
-            pkill -TERM -P -$(cat $SERVICE_DIR/$SCREEN_NAME/$1.pid)
-            rm $SERVICE_DIR/$SCREEN_NAME/$1.pid
+        if [[ -r $SERVICE_DIR/$SCREEN_NAME/$service.pid ]]; then
+            pkill -g $(cat $SERVICE_DIR/$SCREEN_NAME/$service.pid)
+            rm $SERVICE_DIR/$SCREEN_NAME/$service.pid
         fi
         if [[ "$USE_SCREEN" = "True" ]]; then
             # Clean up the screen window
-            screen -S $SCREEN_NAME -p $1 -X kill
+            screen_stop_service $service
         fi
     fi
 }
@@ -1325,6 +1351,80 @@
 }
 
 
+# Deprecated Functions
+# --------------------
+
+# _old_run_process() is designed to be backgrounded by old_run_process() to simulate a
+# fork.  It includes the dirty work of closing extra filehandles and preparing log
+# files to produce the same logs as screen_it().  The log filename is derived
+# from the service name and global-and-now-misnamed ``SCREEN_LOGDIR``
+# Uses globals ``CURRENT_LOG_TIME``, ``SCREEN_LOGDIR``, ``SCREEN_NAME``, ``SERVICE_DIR``
+# _old_run_process service "command-line"
+function _old_run_process {
+    local service=$1
+    local command="$2"
+
+    # Undo logging redirections and close the extra descriptors
+    exec 1>&3
+    exec 2>&3
+    exec 3>&-
+    exec 6>&-
+
+    if [[ -n ${SCREEN_LOGDIR} ]]; then
+        exec 1>&${SCREEN_LOGDIR}/screen-${1}.${CURRENT_LOG_TIME}.log 2>&1
+        ln -sf ${SCREEN_LOGDIR}/screen-${1}.${CURRENT_LOG_TIME}.log ${SCREEN_LOGDIR}/screen-${1}.log
+
+        # TODO(dtroyer): Hack to get stdout from the Python interpreter for the logs.
+        export PYTHONUNBUFFERED=1
+    fi
+
+    exec /bin/bash -c "$command"
+    die "$service exec failure: $command"
+}
+
+# old_run_process() launches a child process that closes all file descriptors and
+# then exec's the passed in command.  This is meant to duplicate the semantics
+# of screen_it() without screen.  PIDs are written to
+# ``$SERVICE_DIR/$SCREEN_NAME/$service.pid`` by the spawned child process.
+# old_run_process service "command-line"
+function old_run_process {
+    local service=$1
+    local command="$2"
+
+    # Spawn the child process
+    _old_run_process "$service" "$command" &
+    echo $!
+}
+
+# Compatibility for existing start_XXXX() functions
+# Uses global ``USE_SCREEN``
+# screen_it service "command-line"
+function screen_it {
+    if is_service_enabled $1; then
+        # Append the service to the screen rc file
+        screen_rc "$1" "$2"
+
+        if [[ "$USE_SCREEN" = "True" ]]; then
+            screen_service "$1" "$2"
+        else
+            # Spawn directly without screen
+            old_run_process "$1" "$2" >$SERVICE_DIR/$SCREEN_NAME/$1.pid
+        fi
+    fi
+}
+
+# Compatibility for existing stop_XXXX() functions
+# Stop a service in screen
+# If a PID is available use it, kill the whole process group via TERM
+# If screen is being used kill the screen window; this will catch processes
+# that did not leave a PID behind
+# screen_stop service
+function screen_stop {
+    # Clean up the screen window
+    stop_process $1
+}
+
+
 # Python Functions
 # ================
 
diff --git a/lib/cinder b/lib/cinder
index ce13b86..e54bd2a 100644
--- a/lib/cinder
+++ b/lib/cinder
@@ -431,15 +431,15 @@
         sudo tgtadm --mode system --op update --name debug --value on
     fi
 
-    screen_it c-api "cd $CINDER_DIR && $CINDER_BIN_DIR/cinder-api --config-file $CINDER_CONF"
+    run_process c-api "$CINDER_BIN_DIR/cinder-api --config-file $CINDER_CONF"
     echo "Waiting for Cinder API to start..."
     if ! wait_for_service $SERVICE_TIMEOUT $CINDER_SERVICE_PROTOCOL://$CINDER_SERVICE_HOST:$CINDER_SERVICE_PORT; then
         die $LINENO "c-api did not start"
     fi
 
-    screen_it c-sch "cd $CINDER_DIR && $CINDER_BIN_DIR/cinder-scheduler --config-file $CINDER_CONF"
-    screen_it c-bak "cd $CINDER_DIR && $CINDER_BIN_DIR/cinder-backup --config-file $CINDER_CONF"
-    screen_it c-vol "cd $CINDER_DIR && $CINDER_BIN_DIR/cinder-volume --config-file $CINDER_CONF"
+    run_process c-sch "$CINDER_BIN_DIR/cinder-scheduler --config-file $CINDER_CONF"
+    run_process c-bak "$CINDER_BIN_DIR/cinder-backup --config-file $CINDER_CONF"
+    run_process c-vol "$CINDER_BIN_DIR/cinder-volume --config-file $CINDER_CONF"
 
     # NOTE(jdg): For cinder, startup order matters.  To ensure that repor_capabilities is received
     # by the scheduler start the cinder-volume service last (or restart it) after the scheduler
diff --git a/tests/fake-service.sh b/tests/fake-service.sh
new file mode 100755
index 0000000..d4b9b56
--- /dev/null
+++ b/tests/fake-service.sh
@@ -0,0 +1,19 @@
+#!/bin/bash
+# fake-service.sh - a fake service for start/stop testing
+# $1 - sleep time
+
+SLEEP_TIME=${1:-3}
+
+LOG=/tmp/fake-service.log
+TIMESTAMP_FORMAT=${TIMESTAMP_FORMAT:-"%F-%H%M%S"}
+
+# duplicate output
+exec 1> >(tee -a ${LOG})
+
+echo ""
+echo "Starting fake-service for ${SLEEP_TIME}"
+while true; do
+    echo "$(date +${TIMESTAMP_FORMAT}) [$$]"
+    sleep ${SLEEP_TIME}
+done
+
diff --git a/tests/run-process.sh b/tests/run-process.sh
new file mode 100755
index 0000000..cdffc3a
--- /dev/null
+++ b/tests/run-process.sh
@@ -0,0 +1,109 @@
+#!/bin/bash
+# tests/exec.sh - Test DevStack screen_it() and screen_stop()
+#
+# exec.sh start|stop|status
+#
+# Set USE_SCREEN to change the default
+#
+# This script emulates the basic exec envirnment in ``stack.sh`` to test
+# the process spawn and kill operations.
+
+if [[ -z $1 ]]; then
+    echo "$0 start|stop"
+    exit 1
+fi
+
+TOP_DIR=$(cd $(dirname "$0")/.. && pwd)
+source $TOP_DIR/functions
+
+USE_SCREEN=${USE_SCREEN:-False}
+
+ENABLED_SERVICES=fake-service
+
+SERVICE_DIR=/tmp
+SCREEN_NAME=test
+SCREEN_LOGDIR=${SERVICE_DIR}/${SCREEN_NAME}
+
+
+# Kill background processes on exit
+trap clean EXIT
+clean() {
+    local r=$?
+    jobs -p
+    kill >/dev/null 2>&1 $(jobs -p)
+    exit $r
+}
+
+
+# Exit on any errors so that errors don't compound
+trap failed ERR
+failed() {
+    local r=$?
+    jobs -p
+    kill >/dev/null 2>&1 $(jobs -p)
+    set +o xtrace
+    [ -n "$LOGFILE" ] && echo "${0##*/} failed: full log in $LOGFILE"
+    exit $r
+}
+
+function status {
+    if [[ -r $SERVICE_DIR/$SCREEN_NAME/fake-service.pid ]]; then
+        pstree -pg $(cat $SERVICE_DIR/$SCREEN_NAME/fake-service.pid)
+    fi
+    ps -ef | grep fake
+}
+
+function setup_screen {
+if [[ ! -d $SERVICE_DIR/$SCREEN_NAME ]]; then
+    rm -rf $SERVICE_DIR/$SCREEN_NAME
+    mkdir -p $SERVICE_DIR/$SCREEN_NAME
+fi
+
+if [[ "$USE_SCREEN" == "True" ]]; then
+    # Create a new named screen to run processes in
+    screen -d -m -S $SCREEN_NAME -t shell -s /bin/bash
+    sleep 1
+
+    # Set a reasonable status bar
+    if [ -z "$SCREEN_HARDSTATUS" ]; then
+        SCREEN_HARDSTATUS='%{= .} %-Lw%{= .}%> %n%f %t*%{= .}%+Lw%< %-=%{g}(%{d}%H/%l%{g})'
+    fi
+    screen -r $SCREEN_NAME -X hardstatus alwayslastline "$SCREEN_HARDSTATUS"
+fi
+
+# Clear screen rc file
+SCREENRC=$TOP_DIR/tests/$SCREEN_NAME-screenrc
+if [[ -e $SCREENRC ]]; then
+    echo -n > $SCREENRC
+fi
+}
+
+# Mimic logging
+    # Set up output redirection without log files
+    # Copy stdout to fd 3
+    exec 3>&1
+    if [[ "$VERBOSE" != "True" ]]; then
+        # Throw away stdout and stderr
+        #exec 1>/dev/null 2>&1
+        :
+    fi
+    # Always send summary fd to original stdout
+    exec 6>&3
+
+
+if [[ "$1" == "start" ]]; then
+    echo "Start service"
+    setup_screen
+    screen_it fake-service "$TOP_DIR/tests/fake-service.sh"
+    sleep 1
+    status
+elif [[ "$1" == "stop" ]]; then
+    echo "Stop service"
+    screen_stop fake-service
+    status
+elif [[ "$1" == "status" ]]; then
+    status
+else
+    echo "Unknown command"
+    exit 1
+fi