XenAPI: Fix another race condition

Fix it properly this time by forcing a PID from run.sh and using that to track.

A second issue is that upstart may run services twice, introduce a flock test
to ensure that we only run stack.sh once as running in parallel causes issues.

Change-Id: I05990c7154366350b0f9cc3e6c70d6f34238486f
diff --git a/tools/xen/build_xva.sh b/tools/xen/build_xva.sh
index 7c8e620..7002e6e 100755
--- a/tools/xen/build_xva.sh
+++ b/tools/xen/build_xva.sh
@@ -119,9 +119,7 @@
 
     chown -R $STACK_USER /opt/stack
 
-    if su -c "/opt/stack/run.sh" $STACK_USER; then
-        touch /var/run/devstack.succeeded
-    fi
+    su -c "/opt/stack/run.sh" $STACK_USER
 
     # Update /etc/issue
     {
@@ -177,8 +175,19 @@
 cat <<EOF >$STAGING_DIR/opt/stack/run.sh
 #!/bin/bash
 set -eux
-cd /opt/stack/devstack
-./unstack.sh || true
-./stack.sh
+(
+  flock -n 9 || exit 1
+
+  [ -e /opt/stack/runsh.succeeded ] && rm /opt/stack/runsh.succeeded
+  echo \$\$ >> /opt/stack/run_sh.pid
+
+  cd /opt/stack/devstack
+  ./unstack.sh || true
+  ./stack.sh
+
+  # Got to the end - success
+  touch /opt/stack/runsh.succeeded
+  rm /opt/stack/run_sh.pid
+) 9> /opt/stack/.runsh_lock
 EOF
 chmod 755 $STAGING_DIR/opt/stack/run.sh
diff --git a/tools/xen/install_os_domU.sh b/tools/xen/install_os_domU.sh
index 3a63473..753f06a 100755
--- a/tools/xen/install_os_domU.sh
+++ b/tools/xen/install_os_domU.sh
@@ -365,8 +365,8 @@
 if [ "$WAIT_TILL_LAUNCH" = "1" ]  && [ -e ~/.ssh/id_rsa.pub  ] && [ "$COPYENV" = "1" ]; then
     set +x
 
-    echo "VM Launched - Waiting for devstack to start"
-    while ! ssh_no_check -q stack@$OS_VM_MANAGEMENT_ADDRESS "service devstack status | grep -q running"; do
+    echo "VM Launched - Waiting for run.sh"
+    while ! ssh_no_check -q stack@$OS_VM_MANAGEMENT_ADDRESS "test -e /opt/stack/run_sh.pid"; do
         sleep 10
     done
     echo -n "devstack service is running, waiting for stack.sh to start logging..."
@@ -376,14 +376,11 @@
     done
     set -x
 
-    # Watch devstack's output (which doesn't start until stack.sh is running,
-    # but wait for run.sh (which starts stack.sh) to exit as that is what
-    # hopefully writes the succeeded cookie.
-    pid=`ssh_no_check -q stack@$OS_VM_MANAGEMENT_ADDRESS pgrep run.sh`
+    pid=`ssh_no_check -q stack@$OS_VM_MANAGEMENT_ADDRESS "cat /opt/stack/run_sh.pid"`
     ssh_no_check -q stack@$OS_VM_MANAGEMENT_ADDRESS "tail --pid $pid -n +1 -f /tmp/devstack/log/stack.log"
 
     # Fail if devstack did not succeed
-    ssh_no_check -q stack@$OS_VM_MANAGEMENT_ADDRESS 'test -e /var/run/devstack.succeeded'
+    ssh_no_check -q stack@$OS_VM_MANAGEMENT_ADDRESS 'test -e /opt/stack/runsh.succeeded'
 
     set +x
     echo "################################################################################"
@@ -401,7 +398,7 @@
     echo ""
     echo "ssh into your domU now: 'ssh stack@$OS_VM_MANAGEMENT_ADDRESS' using your password"
     echo "and then do: 'sudo service devstack status' to check if devstack is still running."
-    echo "Check that /var/run/devstack.succeeded exists"
+    echo "Check that /opt/stack/runsh.succeeded exists"
     echo ""
     echo "When devstack completes, you can visit the OpenStack Dashboard"
     echo "at http://$OS_VM_SERVICES_ADDRESS, and contact other services at the usual ports."