Added list of mlock-using processes to peakmem_tracker output

The change makes peakmem_tracker list processes that lock memory pages
from swapping to disk. It may be helpful when debugging oom-killer job
failures in gate in case when dstat shows that swap is not fully used
when oom-killer is triggered.

The peakmem_tracker service was renamed into memory_tracker to reflect
its new broader scope.

Needed-By: I5862d92478397eac2e61b8a61ce3437b698678be
Change-Id: I1dca120448ee87930fe903fd81277b58efaefc92
diff --git a/lib/dstat b/lib/dstat
index b705948..62795f5 100644
--- a/lib/dstat
+++ b/lib/dstat
@@ -21,16 +21,22 @@
     # A better kind of sysstat, with the top process per time slice
     run_process dstat "$TOP_DIR/tools/dstat.sh $LOGDIR"
 
-    # To enable peakmem_tracker add:
-    #    enable_service peakmem_tracker
+    # To enable memory_tracker add:
+    #    enable_service memory_tracker
     # to your localrc
-    run_process peakmem_tracker "$TOP_DIR/tools/peakmem_tracker.sh"
+    run_process memory_tracker "$TOP_DIR/tools/memory_tracker.sh"
+
+    # remove support for the old name when it's no longer used (sometime in Queens)
+    if is_service_enabled peakmem_tracker; then
+        deprecated "Use of peakmem_tracker in devstack is deprecated, use memory_tracker instead"
+        run_process peakmem_tracker "$TOP_DIR/tools/memory_tracker.sh"
+    fi
 }
 
 # stop_dstat() stop dstat process
 function stop_dstat {
     stop_process dstat
-    stop_process peakmem_tracker
+    stop_process memory_tracker
 }
 
 # Restore xtrace
diff --git a/tools/memory_tracker.sh b/tools/memory_tracker.sh
new file mode 100755
index 0000000..dac0267
--- /dev/null
+++ b/tools/memory_tracker.sh
@@ -0,0 +1,118 @@
+#!/bin/bash
+#
+# Licensed under the Apache License, Version 2.0 (the "License"); you may
+# not use this file except in compliance with the License. You may obtain
+# a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+# License for the specific language governing permissions and limitations
+# under the License.
+
+set -o errexit
+
+# time to sleep between checks
+SLEEP_TIME=20
+
+# MemAvailable is the best estimation and has built-in heuristics
+# around reclaimable memory.  However, it is not available until 3.14
+# kernel (i.e. Ubuntu LTS Trusty misses it).  In that case, we fall
+# back to free+buffers+cache as the available memory.
+USE_MEM_AVAILABLE=0
+if grep -q '^MemAvailable:' /proc/meminfo; then
+    USE_MEM_AVAILABLE=1
+fi
+
+function get_mem_unevictable {
+    awk '/^Unevictable:/ {print $2}' /proc/meminfo
+}
+
+function get_mem_available {
+    if [[ $USE_MEM_AVAILABLE -eq 1 ]]; then
+        awk '/^MemAvailable:/ {print $2}' /proc/meminfo
+    else
+        awk '/^MemFree:/ {free=$2}
+            /^Buffers:/ {buffers=$2}
+            /^Cached:/  {cached=$2}
+            END { print free+buffers+cached }' /proc/meminfo
+    fi
+}
+
+function tracker {
+    local low_point
+    local unevictable_point
+    low_point=$(get_mem_available)
+    # log mlocked memory at least on first iteration
+    unevictable_point=0
+    while [ 1 ]; do
+
+        local mem_available
+        mem_available=$(get_mem_available)
+
+        local unevictable
+        unevictable=$(get_mem_unevictable)
+
+        if [ $mem_available -lt $low_point -o $unevictable -ne $unevictable_point ]; then
+            echo "[[["
+            date
+
+            # whenever we see less memory available than last time, dump the
+            # snapshot of current usage; i.e. checking the latest entry in the file
+            # will give the peak-memory usage
+            if [[ $mem_available -lt $low_point ]]; then
+                low_point=$mem_available
+                echo "---"
+                # always available greppable output; given difference in
+                # meminfo output as described above...
+                echo "memory_tracker low_point: $mem_available"
+                echo "---"
+                cat /proc/meminfo
+                echo "---"
+                # would hierarchial view be more useful (-H)?  output is
+                # not sorted by usage then, however, and the first
+                # question is "what's using up the memory"
+                #
+                # there are a lot of kernel threads, especially on a 8-cpu
+                # system.  do a best-effort removal to improve
+                # signal/noise ratio of output.
+                ps --sort=-pmem -eo pid:10,pmem:6,rss:15,ppid:10,cputime:10,nlwp:8,wchan:25,args:100 |
+                    grep -v ']$'
+            fi
+            echo "---"
+
+            # list processes that lock memory from swap
+            if [[ $unevictable -ne $unevictable_point ]]; then
+                unevictable_point=$unevictable
+                sudo ./tools/mlock_report.py
+            fi
+
+            echo "]]]"
+        fi
+        sleep $SLEEP_TIME
+    done
+}
+
+function usage {
+    echo "Usage: $0 [-x] [-s N]" 1>&2
+    exit 1
+}
+
+while getopts ":s:x" opt; do
+    case $opt in
+        s)
+            SLEEP_TIME=$OPTARG
+            ;;
+        x)
+            set -o xtrace
+            ;;
+        *)
+            usage
+            ;;
+    esac
+done
+shift $((OPTIND-1))
+
+tracker
diff --git a/tools/mlock_report.py b/tools/mlock_report.py
new file mode 100755
index 0000000..1d23af9
--- /dev/null
+++ b/tools/mlock_report.py
@@ -0,0 +1,59 @@
+#!/usr/bin/env python
+
+# This tool lists processes that lock memory pages from swapping to disk.
+
+import re
+import subprocess
+
+import psutil
+
+
+SUMMARY_REGEX = re.compile(r".*\s+(?P<locked>[\d]+)\s+KB")
+
+
+def main():
+    try:
+        print _get_report()
+    except Exception as e:
+        print "Failure listing processes locking memory: %s" % str(e)
+
+
+def _get_report():
+    mlock_users = []
+    for proc in psutil.process_iter():
+        pid = proc.pid
+        # sadly psutil does not expose locked pages info, that's why we
+        # call to pmap and parse the output here
+        try:
+            out = subprocess.check_output(['pmap', '-XX', str(pid)])
+        except subprocess.CalledProcessError as e:
+            # 42 means process just vanished, which is ok
+            if e.returncode == 42:
+                continue
+            raise
+        last_line = out.splitlines()[-1]
+
+        # some processes don't provide a memory map, for example those
+        # running as kernel services, so we need to skip those that don't
+        # match
+        result = SUMMARY_REGEX.match(last_line)
+        if result:
+            locked = int(result.group('locked'))
+            if locked:
+                mlock_users.append({'name': proc.name(),
+                                    'pid': pid,
+                                    'locked': locked})
+
+    # produce a single line log message with per process mlock stats
+    if mlock_users:
+        return "; ".join(
+            "[%(name)s (pid:%(pid)s)]=%(locked)dKB" % args
+            # log heavy users first
+            for args in sorted(mlock_users, key=lambda d: d['locked'])
+        )
+    else:
+        return "no locked memory"
+
+
+if __name__ == "__main__":
+    main()
diff --git a/tools/peakmem_tracker.sh b/tools/peakmem_tracker.sh
deleted file mode 100755
index ecbd79a..0000000
--- a/tools/peakmem_tracker.sh
+++ /dev/null
@@ -1,98 +0,0 @@
-#!/bin/bash
-#
-# Licensed under the Apache License, Version 2.0 (the "License"); you may
-# not use this file except in compliance with the License. You may obtain
-# a copy of the License at
-#
-#    http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
-# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
-# License for the specific language governing permissions and limitations
-# under the License.
-
-set -o errexit
-
-# time to sleep between checks
-SLEEP_TIME=20
-
-# MemAvailable is the best estimation and has built-in heuristics
-# around reclaimable memory.  However, it is not available until 3.14
-# kernel (i.e. Ubuntu LTS Trusty misses it).  In that case, we fall
-# back to free+buffers+cache as the available memory.
-USE_MEM_AVAILBLE=0
-if grep -q '^MemAvailable:' /proc/meminfo; then
-    USE_MEM_AVAILABLE=1
-fi
-
-function get_mem_available {
-    if [[ $USE_MEM_AVAILABLE -eq 1 ]]; then
-        awk '/^MemAvailable:/ {print $2}' /proc/meminfo
-    else
-        awk '/^MemFree:/ {free=$2}
-            /^Buffers:/ {buffers=$2}
-            /^Cached:/  {cached=$2}
-            END { print free+buffers+cached }' /proc/meminfo
-    fi
-}
-
-# whenever we see less memory available than last time, dump the
-# snapshot of current usage; i.e. checking the latest entry in the
-# file will give the peak-memory usage
-function tracker {
-    local low_point
-    low_point=$(get_mem_available)
-    while [ 1 ]; do
-
-        local mem_available
-        mem_available=$(get_mem_available)
-
-        if [[ $mem_available -lt $low_point ]]; then
-            low_point=$mem_available
-            echo "[[["
-            date
-            echo "---"
-            # always available greppable output; given difference in
-            # meminfo output as described above...
-            echo "peakmem_tracker low_point: $mem_available"
-            echo "---"
-            cat /proc/meminfo
-            echo "---"
-            # would hierarchial view be more useful (-H)?  output is
-            # not sorted by usage then, however, and the first
-            # question is "what's using up the memory"
-            #
-            # there are a lot of kernel threads, especially on a 8-cpu
-            # system.  do a best-effort removal to improve
-            # signal/noise ratio of output.
-            ps --sort=-pmem -eo pid:10,pmem:6,rss:15,ppid:10,cputime:10,nlwp:8,wchan:25,args:100 |
-                grep -v ']$'
-            echo "]]]"
-        fi
-
-        sleep $SLEEP_TIME
-    done
-}
-
-function usage {
-    echo "Usage: $0 [-x] [-s N]" 1>&2
-    exit 1
-}
-
-while getopts ":s:x" opt; do
-    case $opt in
-        s)
-            SLEEP_TIME=$OPTARG
-            ;;
-        x)
-            set -o xtrace
-            ;;
-        *)
-            usage
-            ;;
-    esac
-done
-shift $((OPTIND-1))
-
-tracker