| Sean Dague | 97fcc7b | 2014-06-16 17:24:14 -0400 | [diff] [blame] | 1 | #!/usr/bin/env python | 
|  | 2 | # | 
|  | 3 | # Copyright 2014 Hewlett-Packard Development Company, L.P. | 
|  | 4 | # | 
|  | 5 | # Licensed under the Apache License, Version 2.0 (the "License"); you may | 
|  | 6 | # not use this file except in compliance with the License. You may obtain | 
|  | 7 | # a copy of the License at | 
|  | 8 | # | 
|  | 9 | #      http://www.apache.org/licenses/LICENSE-2.0 | 
|  | 10 | # | 
|  | 11 | # Unless required by applicable law or agreed to in writing, software | 
|  | 12 | # distributed under the License is distributed on an "AS IS" BASIS, WITHOUT | 
|  | 13 | # WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the | 
|  | 14 | # License for the specific language governing permissions and limitations | 
|  | 15 | # under the License. | 
|  | 16 |  | 
| Eyal | e736177 | 2016-04-05 16:18:56 +0300 | [diff] [blame] | 17 |  | 
| Sean Dague | 97fcc7b | 2014-06-16 17:24:14 -0400 | [diff] [blame] | 18 | """Dump the state of the world for post mortem.""" | 
|  | 19 |  | 
| John L. Villalovos | 09949e0 | 2017-02-06 13:46:32 -0800 | [diff] [blame] | 20 | from __future__ import print_function | 
|  | 21 |  | 
| Sean Dague | 97fcc7b | 2014-06-16 17:24:14 -0400 | [diff] [blame] | 22 | import argparse | 
|  | 23 | import datetime | 
| Chris Dent | 57d7967 | 2016-02-23 15:38:43 +0000 | [diff] [blame] | 24 | from distutils import spawn | 
| Sean Dague | 737e942 | 2015-05-12 19:51:39 -0400 | [diff] [blame] | 25 | import fnmatch | 
| Sean Dague | 97fcc7b | 2014-06-16 17:24:14 -0400 | [diff] [blame] | 26 | import os | 
|  | 27 | import os.path | 
| Ian Wienand | 99440f9 | 2015-07-01 06:14:01 +1000 | [diff] [blame] | 28 | import subprocess | 
| Sean Dague | 97fcc7b | 2014-06-16 17:24:14 -0400 | [diff] [blame] | 29 | import sys | 
|  | 30 |  | 
|  | 31 |  | 
| Ihar Hrachyshka | ef219bf | 2016-02-11 13:54:48 +0100 | [diff] [blame] | 32 | GMR_PROCESSES = ( | 
|  | 33 | 'nova-compute', | 
|  | 34 | 'neutron-dhcp-agent', | 
|  | 35 | 'neutron-l3-agent', | 
|  | 36 | 'neutron-linuxbridge-agent', | 
|  | 37 | 'neutron-metadata-agent', | 
|  | 38 | 'neutron-openvswitch-agent', | 
| Eric Harney | d8682db | 2016-10-14 14:36:29 -0400 | [diff] [blame] | 39 | 'cinder-volume', | 
| Ihar Hrachyshka | ef219bf | 2016-02-11 13:54:48 +0100 | [diff] [blame] | 40 | ) | 
|  | 41 |  | 
|  | 42 |  | 
| Sean Dague | 97fcc7b | 2014-06-16 17:24:14 -0400 | [diff] [blame] | 43 | def get_options(): | 
|  | 44 | parser = argparse.ArgumentParser( | 
|  | 45 | description='Dump world state for debugging') | 
|  | 46 | parser.add_argument('-d', '--dir', | 
|  | 47 | default='.', | 
|  | 48 | help='Output directory for worlddump') | 
| Sean Dague | ac9313e | 2015-07-27 13:33:30 -0400 | [diff] [blame] | 49 | parser.add_argument('-n', '--name', | 
|  | 50 | default='', | 
|  | 51 | help='Additional name to tag into file') | 
| Sean Dague | 97fcc7b | 2014-06-16 17:24:14 -0400 | [diff] [blame] | 52 | return parser.parse_args() | 
|  | 53 |  | 
|  | 54 |  | 
| Sean Dague | ac9313e | 2015-07-27 13:33:30 -0400 | [diff] [blame] | 55 | def filename(dirname, name=""): | 
| Sean Dague | 97fcc7b | 2014-06-16 17:24:14 -0400 | [diff] [blame] | 56 | now = datetime.datetime.utcnow() | 
| Sean Dague | ac9313e | 2015-07-27 13:33:30 -0400 | [diff] [blame] | 57 | fmt = "worlddump-%Y-%m-%d-%H%M%S" | 
|  | 58 | if name: | 
|  | 59 | fmt += "-" + name | 
|  | 60 | fmt += ".txt" | 
|  | 61 | return os.path.join(dirname, now.strftime(fmt)) | 
| Sean Dague | 97fcc7b | 2014-06-16 17:24:14 -0400 | [diff] [blame] | 62 |  | 
|  | 63 |  | 
|  | 64 | def warn(msg): | 
| Eyal | e736177 | 2016-04-05 16:18:56 +0300 | [diff] [blame] | 65 | print("WARN: %s" % msg) | 
| Sean Dague | 97fcc7b | 2014-06-16 17:24:14 -0400 | [diff] [blame] | 66 |  | 
|  | 67 |  | 
| Sean Dague | 60a1405 | 2015-05-11 14:53:39 -0400 | [diff] [blame] | 68 | def _dump_cmd(cmd): | 
| Eyal | e736177 | 2016-04-05 16:18:56 +0300 | [diff] [blame] | 69 | print(cmd) | 
|  | 70 | print("-" * len(cmd)) | 
|  | 71 | print() | 
| Ian Wienand | 99440f9 | 2015-07-01 06:14:01 +1000 | [diff] [blame] | 72 | try: | 
|  | 73 | subprocess.check_call(cmd, shell=True) | 
| Eyal | e736177 | 2016-04-05 16:18:56 +0300 | [diff] [blame] | 74 | print() | 
| Ihar Hrachyshka | 7976aac | 2016-03-03 15:30:49 +0100 | [diff] [blame] | 75 | except subprocess.CalledProcessError as e: | 
| Eyal | e736177 | 2016-04-05 16:18:56 +0300 | [diff] [blame] | 76 | print("*** Failed to run '%(cmd)s': %(err)s" % {'cmd': cmd, 'err': e}) | 
| Sean Dague | 60a1405 | 2015-05-11 14:53:39 -0400 | [diff] [blame] | 77 |  | 
|  | 78 |  | 
| Chris Dent | 57d7967 | 2016-02-23 15:38:43 +0000 | [diff] [blame] | 79 | def _find_cmd(cmd): | 
|  | 80 | if not spawn.find_executable(cmd): | 
| Eyal | e736177 | 2016-04-05 16:18:56 +0300 | [diff] [blame] | 81 | print("*** %s not found: skipping" % cmd) | 
| Chris Dent | 57d7967 | 2016-02-23 15:38:43 +0000 | [diff] [blame] | 82 | return False | 
|  | 83 | return True | 
|  | 84 |  | 
|  | 85 |  | 
| Sean Dague | 60a1405 | 2015-05-11 14:53:39 -0400 | [diff] [blame] | 86 | def _header(name): | 
| Eyal | e736177 | 2016-04-05 16:18:56 +0300 | [diff] [blame] | 87 | print() | 
|  | 88 | print(name) | 
|  | 89 | print("=" * len(name)) | 
|  | 90 | print() | 
| Sean Dague | 60a1405 | 2015-05-11 14:53:39 -0400 | [diff] [blame] | 91 |  | 
|  | 92 |  | 
| fumihiko kakuma | 578459f | 2016-04-07 08:15:45 +0900 | [diff] [blame] | 93 | def _bridge_list(): | 
| yan.haifeng | 6ba17f7 | 2016-04-29 15:59:56 +0800 | [diff] [blame] | 94 | process = subprocess.Popen(['sudo', 'ovs-vsctl', 'list-br'], | 
|  | 95 | stdout=subprocess.PIPE) | 
| fumihiko kakuma | 578459f | 2016-04-07 08:15:45 +0900 | [diff] [blame] | 96 | stdout, _ = process.communicate() | 
|  | 97 | return stdout.split() | 
|  | 98 |  | 
|  | 99 |  | 
| fumihiko kakuma | 6099401 | 2016-03-08 20:55:01 +0900 | [diff] [blame] | 100 | # This method gets a max openflow version supported by openvswitch. | 
|  | 101 | # For example 'ovs-ofctl --version' displays the following: | 
|  | 102 | # | 
|  | 103 | #     ovs-ofctl (Open vSwitch) 2.0.2 | 
|  | 104 | #     Compiled Dec  9 2015 14:08:08 | 
|  | 105 | #     OpenFlow versions 0x1:0x4 | 
|  | 106 | # | 
| fumihiko kakuma | 2bd2568 | 2016-04-05 10:33:50 +0900 | [diff] [blame] | 107 | # The above shows that openvswitch supports from OpenFlow10 to OpenFlow13. | 
| fumihiko kakuma | 6099401 | 2016-03-08 20:55:01 +0900 | [diff] [blame] | 108 | # This method gets max version searching 'OpenFlow versions 0x1:0x'. | 
|  | 109 | # And return a version value converted to an integer type. | 
|  | 110 | def _get_ofp_version(): | 
|  | 111 | process = subprocess.Popen(['ovs-ofctl', '--version'], stdout=subprocess.PIPE) | 
|  | 112 | stdout, _ = process.communicate() | 
|  | 113 | find_str = 'OpenFlow versions 0x1:0x' | 
|  | 114 | offset = stdout.find(find_str) | 
|  | 115 | return int(stdout[offset + len(find_str):-1]) - 1 | 
|  | 116 |  | 
|  | 117 |  | 
| Sean Dague | 97fcc7b | 2014-06-16 17:24:14 -0400 | [diff] [blame] | 118 | def disk_space(): | 
|  | 119 | # the df output | 
| Sean Dague | 60a1405 | 2015-05-11 14:53:39 -0400 | [diff] [blame] | 120 | _header("File System Summary") | 
|  | 121 |  | 
| Sean Dague | 97fcc7b | 2014-06-16 17:24:14 -0400 | [diff] [blame] | 122 | dfraw = os.popen("df -Ph").read() | 
|  | 123 | df = [s.split() for s in dfraw.splitlines()] | 
|  | 124 | for fs in df: | 
|  | 125 | try: | 
|  | 126 | if int(fs[4][:-1]) > 95: | 
|  | 127 | warn("Device %s (%s) is %s full, might be an issue" % ( | 
|  | 128 | fs[0], fs[5], fs[4])) | 
|  | 129 | except ValueError: | 
|  | 130 | # if it doesn't look like an int, that's fine | 
|  | 131 | pass | 
|  | 132 |  | 
| Eyal | e736177 | 2016-04-05 16:18:56 +0300 | [diff] [blame] | 133 | print(dfraw) | 
| Sean Dague | 97fcc7b | 2014-06-16 17:24:14 -0400 | [diff] [blame] | 134 |  | 
|  | 135 |  | 
| Sean Dague | 2da606d | 2015-08-06 10:02:43 -0400 | [diff] [blame] | 136 | def ebtables_dump(): | 
| Sean Dague | 5c5e086 | 2015-11-09 14:08:15 -0500 | [diff] [blame] | 137 | tables = ['filter', 'nat', 'broute'] | 
| Sean Dague | 2da606d | 2015-08-06 10:02:43 -0400 | [diff] [blame] | 138 | _header("EB Tables Dump") | 
| Chris Dent | 57d7967 | 2016-02-23 15:38:43 +0000 | [diff] [blame] | 139 | if not _find_cmd('ebtables'): | 
|  | 140 | return | 
| Sean Dague | 5c5e086 | 2015-11-09 14:08:15 -0500 | [diff] [blame] | 141 | for table in tables: | 
|  | 142 | _dump_cmd("sudo ebtables -t %s -L" % table) | 
| Sean Dague | 2da606d | 2015-08-06 10:02:43 -0400 | [diff] [blame] | 143 |  | 
|  | 144 |  | 
| Sean Dague | 168b7c2 | 2015-05-07 08:57:28 -0400 | [diff] [blame] | 145 | def iptables_dump(): | 
|  | 146 | tables = ['filter', 'nat', 'mangle'] | 
| Sean Dague | 60a1405 | 2015-05-11 14:53:39 -0400 | [diff] [blame] | 147 | _header("IP Tables Dump") | 
|  | 148 |  | 
| Sean Dague | 168b7c2 | 2015-05-07 08:57:28 -0400 | [diff] [blame] | 149 | for table in tables: | 
| Sean Dague | 60a1405 | 2015-05-11 14:53:39 -0400 | [diff] [blame] | 150 | _dump_cmd("sudo iptables --line-numbers -L -nv -t %s" % table) | 
|  | 151 |  | 
|  | 152 |  | 
| Ihar Hrachyshka | 72c34ee | 2016-01-30 16:18:01 +0100 | [diff] [blame] | 153 | def _netns_list(): | 
|  | 154 | process = subprocess.Popen(['ip', 'netns'], stdout=subprocess.PIPE) | 
|  | 155 | stdout, _ = process.communicate() | 
| John L. Villalovos | c6e6939 | 2017-02-06 14:24:42 -0800 | [diff] [blame] | 156 | # NOTE(jlvillal): Sometimes 'ip netns list' can return output like: | 
|  | 157 | #   qrouter-0805fd7d-c493-4fa6-82ca-1c6c9b23cd9e (id: 1) | 
|  | 158 | #   qdhcp-bb2cc6ae-2ae8-474f-adda-a94059b872b5 (id: 0) | 
|  | 159 | output = [x.split()[0] for x in stdout.splitlines()] | 
|  | 160 | return output | 
| Ihar Hrachyshka | 72c34ee | 2016-01-30 16:18:01 +0100 | [diff] [blame] | 161 |  | 
|  | 162 |  | 
| Sean Dague | 60a1405 | 2015-05-11 14:53:39 -0400 | [diff] [blame] | 163 | def network_dump(): | 
|  | 164 | _header("Network Dump") | 
|  | 165 |  | 
|  | 166 | _dump_cmd("brctl show") | 
|  | 167 | _dump_cmd("arp -n") | 
| Ihar Hrachyshka | 72c34ee | 2016-01-30 16:18:01 +0100 | [diff] [blame] | 168 | ip_cmds = ["addr", "link", "route"] | 
|  | 169 | for cmd in ip_cmds + ['netns']: | 
|  | 170 | _dump_cmd("ip %s" % cmd) | 
|  | 171 | for netns_ in _netns_list(): | 
|  | 172 | for cmd in ip_cmds: | 
|  | 173 | args = {'netns': netns_, 'cmd': cmd} | 
|  | 174 | _dump_cmd('sudo ip netns exec %(netns)s ip %(cmd)s' % args) | 
| Sean Dague | 168b7c2 | 2015-05-07 08:57:28 -0400 | [diff] [blame] | 175 |  | 
|  | 176 |  | 
| Ihar Hrachyshka | c1b7cb1 | 2016-02-11 13:50:46 +0100 | [diff] [blame] | 177 | def ovs_dump(): | 
|  | 178 | _header("Open vSwitch Dump") | 
|  | 179 |  | 
| Chris Dent | 57d7967 | 2016-02-23 15:38:43 +0000 | [diff] [blame] | 180 | # NOTE(cdent): If we're not using neutron + ovs these commands | 
|  | 181 | # will not be present so | 
|  | 182 | if not _find_cmd('ovs-vsctl'): | 
|  | 183 | return | 
|  | 184 |  | 
| fumihiko kakuma | 578459f | 2016-04-07 08:15:45 +0900 | [diff] [blame] | 185 | bridges = _bridge_list() | 
| fumihiko kakuma | 6099401 | 2016-03-08 20:55:01 +0900 | [diff] [blame] | 186 | ofctl_cmds = ('show', 'dump-ports-desc', 'dump-ports', 'dump-flows') | 
|  | 187 | ofp_max = _get_ofp_version() | 
|  | 188 | vers = 'OpenFlow10' | 
| fumihiko kakuma | 578459f | 2016-04-07 08:15:45 +0900 | [diff] [blame] | 189 | for i in range(1, ofp_max + 1): | 
| fumihiko kakuma | 6099401 | 2016-03-08 20:55:01 +0900 | [diff] [blame] | 190 | vers += ',OpenFlow1' + str(i) | 
| Ihar Hrachyshka | c1b7cb1 | 2016-02-11 13:50:46 +0100 | [diff] [blame] | 191 | _dump_cmd("sudo ovs-vsctl show") | 
| fumihiko kakuma | 6099401 | 2016-03-08 20:55:01 +0900 | [diff] [blame] | 192 | for ofctl_cmd in ofctl_cmds: | 
|  | 193 | for bridge in bridges: | 
|  | 194 | args = {'vers': vers, 'cmd': ofctl_cmd, 'bridge': bridge} | 
|  | 195 | _dump_cmd("sudo ovs-ofctl --protocols=%(vers)s %(cmd)s %(bridge)s" % args) | 
| Ihar Hrachyshka | c1b7cb1 | 2016-02-11 13:50:46 +0100 | [diff] [blame] | 196 |  | 
|  | 197 |  | 
| Sean Dague | 97fcc7b | 2014-06-16 17:24:14 -0400 | [diff] [blame] | 198 | def process_list(): | 
| Sean Dague | 60a1405 | 2015-05-11 14:53:39 -0400 | [diff] [blame] | 199 | _header("Process Listing") | 
|  | 200 | _dump_cmd("ps axo " | 
|  | 201 | "user,ppid,pid,pcpu,pmem,vsz,rss,tty,stat,start,time,args") | 
| Sean Dague | 97fcc7b | 2014-06-16 17:24:14 -0400 | [diff] [blame] | 202 |  | 
|  | 203 |  | 
| Sean Dague | 737e942 | 2015-05-12 19:51:39 -0400 | [diff] [blame] | 204 | def compute_consoles(): | 
|  | 205 | _header("Compute consoles") | 
|  | 206 | for root, dirnames, filenames in os.walk('/opt/stack'): | 
|  | 207 | for filename in fnmatch.filter(filenames, 'console.log'): | 
|  | 208 | fullpath = os.path.join(root, filename) | 
|  | 209 | _dump_cmd("sudo cat %s" % fullpath) | 
|  | 210 |  | 
|  | 211 |  | 
| Ihar Hrachyshka | ef219bf | 2016-02-11 13:54:48 +0100 | [diff] [blame] | 212 | def guru_meditation_reports(): | 
|  | 213 | for service in GMR_PROCESSES: | 
|  | 214 | _header("%s Guru Meditation Report" % service) | 
| Ian Wienand | 3a9df1d | 2015-07-01 06:18:47 +1000 | [diff] [blame] | 215 |  | 
| Ihar Hrachyshka | ef219bf | 2016-02-11 13:54:48 +0100 | [diff] [blame] | 216 | try: | 
|  | 217 | subprocess.check_call(['pgrep', '-f', service]) | 
|  | 218 | except subprocess.CalledProcessError: | 
|  | 219 | print("Skipping as %s does not appear to be running" % service) | 
|  | 220 | continue | 
| Ian Wienand | 3a9df1d | 2015-07-01 06:18:47 +1000 | [diff] [blame] | 221 |  | 
| Ihar Hrachyshka | ef219bf | 2016-02-11 13:54:48 +0100 | [diff] [blame] | 222 | _dump_cmd("killall -e -USR2 %s" % service) | 
|  | 223 | print("guru meditation report in %s log" % service) | 
| Joe Gordon | 2ebe993 | 2015-06-07 16:57:34 +0900 | [diff] [blame] | 224 |  | 
|  | 225 |  | 
| Ian Wienand | bfcc760 | 2017-03-29 11:52:06 +1100 | [diff] [blame] | 226 | def var_core(): | 
|  | 227 | if os.path.exists('/var/core'): | 
|  | 228 | _header("/var/core dumps") | 
|  | 229 | # NOTE(ianw) : see DEBUG_LIBVIRT_COREDUMPS.  We could think | 
|  | 230 | # about getting backtraces out of these.  There are other | 
|  | 231 | # tools out there that can do that sort of thing though. | 
|  | 232 | _dump_cmd("ls -ltrah /var/core") | 
|  | 233 |  | 
| Sean Dague | 97fcc7b | 2014-06-16 17:24:14 -0400 | [diff] [blame] | 234 | def main(): | 
|  | 235 | opts = get_options() | 
| Sean Dague | ac9313e | 2015-07-27 13:33:30 -0400 | [diff] [blame] | 236 | fname = filename(opts.dir, opts.name) | 
| Eyal | e736177 | 2016-04-05 16:18:56 +0300 | [diff] [blame] | 237 | print("World dumping... see %s for details" % fname) | 
| Sean Dague | 97fcc7b | 2014-06-16 17:24:14 -0400 | [diff] [blame] | 238 | sys.stdout = os.fdopen(sys.stdout.fileno(), 'w', 0) | 
|  | 239 | with open(fname, 'w') as f: | 
|  | 240 | os.dup2(f.fileno(), sys.stdout.fileno()) | 
|  | 241 | disk_space() | 
|  | 242 | process_list() | 
| Sean Dague | 60a1405 | 2015-05-11 14:53:39 -0400 | [diff] [blame] | 243 | network_dump() | 
| Ihar Hrachyshka | c1b7cb1 | 2016-02-11 13:50:46 +0100 | [diff] [blame] | 244 | ovs_dump() | 
| Sean Dague | 168b7c2 | 2015-05-07 08:57:28 -0400 | [diff] [blame] | 245 | iptables_dump() | 
| Sean Dague | 2da606d | 2015-08-06 10:02:43 -0400 | [diff] [blame] | 246 | ebtables_dump() | 
| Sean Dague | 737e942 | 2015-05-12 19:51:39 -0400 | [diff] [blame] | 247 | compute_consoles() | 
| Ihar Hrachyshka | ef219bf | 2016-02-11 13:54:48 +0100 | [diff] [blame] | 248 | guru_meditation_reports() | 
| Ian Wienand | bfcc760 | 2017-03-29 11:52:06 +1100 | [diff] [blame] | 249 | var_core() | 
| Sean Dague | 97fcc7b | 2014-06-16 17:24:14 -0400 | [diff] [blame] | 250 |  | 
|  | 251 |  | 
|  | 252 | if __name__ == '__main__': | 
|  | 253 | try: | 
|  | 254 | sys.exit(main()) | 
|  | 255 | except KeyboardInterrupt: | 
|  | 256 | sys.exit(1) |