Federico Ressi | 21a10d3 | 2020-01-31 07:43:30 +0100 | [diff] [blame] | 1 | #!/usr/bin/env python3 |
Sean Dague | 97fcc7b | 2014-06-16 17:24:14 -0400 | [diff] [blame] | 2 | # |
| 3 | # Copyright 2014 Hewlett-Packard Development Company, L.P. |
| 4 | # |
| 5 | # Licensed under the Apache License, Version 2.0 (the "License"); you may |
| 6 | # not use this file except in compliance with the License. You may obtain |
| 7 | # a copy of the License at |
| 8 | # |
| 9 | # http://www.apache.org/licenses/LICENSE-2.0 |
| 10 | # |
| 11 | # Unless required by applicable law or agreed to in writing, software |
| 12 | # distributed under the License is distributed on an "AS IS" BASIS, WITHOUT |
| 13 | # WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the |
| 14 | # License for the specific language governing permissions and limitations |
| 15 | # under the License. |
| 16 | |
Eyal | e736177 | 2016-04-05 16:18:56 +0300 | [diff] [blame] | 17 | |
Sean Dague | 97fcc7b | 2014-06-16 17:24:14 -0400 | [diff] [blame] | 18 | """Dump the state of the world for post mortem.""" |
| 19 | |
| 20 | import argparse |
| 21 | import datetime |
Sean Dague | 737e942 | 2015-05-12 19:51:39 -0400 | [diff] [blame] | 22 | import fnmatch |
Federico Ressi | 21a10d3 | 2020-01-31 07:43:30 +0100 | [diff] [blame] | 23 | import io |
Sean Dague | 97fcc7b | 2014-06-16 17:24:14 -0400 | [diff] [blame] | 24 | import os |
Jens Harbott | ce396d3 | 2019-09-05 08:51:33 +0000 | [diff] [blame] | 25 | import shutil |
Ian Wienand | 99440f9 | 2015-07-01 06:14:01 +1000 | [diff] [blame] | 26 | import subprocess |
Sean Dague | 97fcc7b | 2014-06-16 17:24:14 -0400 | [diff] [blame] | 27 | import sys |
| 28 | |
| 29 | |
Ihar Hrachyshka | ef219bf | 2016-02-11 13:54:48 +0100 | [diff] [blame] | 30 | GMR_PROCESSES = ( |
| 31 | 'nova-compute', |
| 32 | 'neutron-dhcp-agent', |
| 33 | 'neutron-l3-agent', |
Ihar Hrachyshka | ef219bf | 2016-02-11 13:54:48 +0100 | [diff] [blame] | 34 | 'neutron-metadata-agent', |
| 35 | 'neutron-openvswitch-agent', |
Eric Harney | d8682db | 2016-10-14 14:36:29 -0400 | [diff] [blame] | 36 | 'cinder-volume', |
Ihar Hrachyshka | ef219bf | 2016-02-11 13:54:48 +0100 | [diff] [blame] | 37 | ) |
| 38 | |
| 39 | |
Sean Dague | 97fcc7b | 2014-06-16 17:24:14 -0400 | [diff] [blame] | 40 | def get_options(): |
| 41 | parser = argparse.ArgumentParser( |
| 42 | description='Dump world state for debugging') |
| 43 | parser.add_argument('-d', '--dir', |
| 44 | default='.', |
| 45 | help='Output directory for worlddump') |
Sean Dague | ac9313e | 2015-07-27 13:33:30 -0400 | [diff] [blame] | 46 | parser.add_argument('-n', '--name', |
| 47 | default='', |
| 48 | help='Additional name to tag into file') |
Sean Dague | 97fcc7b | 2014-06-16 17:24:14 -0400 | [diff] [blame] | 49 | return parser.parse_args() |
| 50 | |
| 51 | |
Sean Dague | ac9313e | 2015-07-27 13:33:30 -0400 | [diff] [blame] | 52 | def filename(dirname, name=""): |
Brian Haley | 9be4cee | 2024-04-23 15:37:37 -0400 | [diff] [blame] | 53 | now = datetime.datetime.now(datetime.timezone.utc) |
Sean Dague | ac9313e | 2015-07-27 13:33:30 -0400 | [diff] [blame] | 54 | fmt = "worlddump-%Y-%m-%d-%H%M%S" |
| 55 | if name: |
| 56 | fmt += "-" + name |
| 57 | fmt += ".txt" |
| 58 | return os.path.join(dirname, now.strftime(fmt)) |
Sean Dague | 97fcc7b | 2014-06-16 17:24:14 -0400 | [diff] [blame] | 59 | |
| 60 | |
| 61 | def warn(msg): |
Eyal | e736177 | 2016-04-05 16:18:56 +0300 | [diff] [blame] | 62 | print("WARN: %s" % msg) |
Sean Dague | 97fcc7b | 2014-06-16 17:24:14 -0400 | [diff] [blame] | 63 | |
| 64 | |
Sean Dague | 60a1405 | 2015-05-11 14:53:39 -0400 | [diff] [blame] | 65 | def _dump_cmd(cmd): |
Eyal | e736177 | 2016-04-05 16:18:56 +0300 | [diff] [blame] | 66 | print(cmd) |
| 67 | print("-" * len(cmd)) |
| 68 | print() |
Ian Wienand | 99440f9 | 2015-07-01 06:14:01 +1000 | [diff] [blame] | 69 | try: |
| 70 | subprocess.check_call(cmd, shell=True) |
Eyal | e736177 | 2016-04-05 16:18:56 +0300 | [diff] [blame] | 71 | print() |
Ihar Hrachyshka | 7976aac | 2016-03-03 15:30:49 +0100 | [diff] [blame] | 72 | except subprocess.CalledProcessError as e: |
Eyal | e736177 | 2016-04-05 16:18:56 +0300 | [diff] [blame] | 73 | print("*** Failed to run '%(cmd)s': %(err)s" % {'cmd': cmd, 'err': e}) |
Sean Dague | 60a1405 | 2015-05-11 14:53:39 -0400 | [diff] [blame] | 74 | |
| 75 | |
Chris Dent | 57d7967 | 2016-02-23 15:38:43 +0000 | [diff] [blame] | 76 | def _find_cmd(cmd): |
Martin Kopec | a37b6ab | 2023-05-26 13:46:42 +0200 | [diff] [blame] | 77 | if not shutil.which(cmd): |
Eyal | e736177 | 2016-04-05 16:18:56 +0300 | [diff] [blame] | 78 | print("*** %s not found: skipping" % cmd) |
Chris Dent | 57d7967 | 2016-02-23 15:38:43 +0000 | [diff] [blame] | 79 | return False |
| 80 | return True |
| 81 | |
| 82 | |
Sean Dague | 60a1405 | 2015-05-11 14:53:39 -0400 | [diff] [blame] | 83 | def _header(name): |
Eyal | e736177 | 2016-04-05 16:18:56 +0300 | [diff] [blame] | 84 | print() |
| 85 | print(name) |
| 86 | print("=" * len(name)) |
| 87 | print() |
Sean Dague | 60a1405 | 2015-05-11 14:53:39 -0400 | [diff] [blame] | 88 | |
| 89 | |
fumihiko kakuma | 578459f | 2016-04-07 08:15:45 +0900 | [diff] [blame] | 90 | def _bridge_list(): |
yan.haifeng | 6ba17f7 | 2016-04-29 15:59:56 +0800 | [diff] [blame] | 91 | process = subprocess.Popen(['sudo', 'ovs-vsctl', 'list-br'], |
| 92 | stdout=subprocess.PIPE) |
fumihiko kakuma | 578459f | 2016-04-07 08:15:45 +0900 | [diff] [blame] | 93 | stdout, _ = process.communicate() |
| 94 | return stdout.split() |
| 95 | |
| 96 | |
fumihiko kakuma | 6099401 | 2016-03-08 20:55:01 +0900 | [diff] [blame] | 97 | # This method gets a max openflow version supported by openvswitch. |
| 98 | # For example 'ovs-ofctl --version' displays the following: |
| 99 | # |
| 100 | # ovs-ofctl (Open vSwitch) 2.0.2 |
| 101 | # Compiled Dec 9 2015 14:08:08 |
| 102 | # OpenFlow versions 0x1:0x4 |
| 103 | # |
fumihiko kakuma | 2bd2568 | 2016-04-05 10:33:50 +0900 | [diff] [blame] | 104 | # The above shows that openvswitch supports from OpenFlow10 to OpenFlow13. |
fumihiko kakuma | 6099401 | 2016-03-08 20:55:01 +0900 | [diff] [blame] | 105 | # This method gets max version searching 'OpenFlow versions 0x1:0x'. |
| 106 | # And return a version value converted to an integer type. |
| 107 | def _get_ofp_version(): |
Federico Ressi | 21a10d3 | 2020-01-31 07:43:30 +0100 | [diff] [blame] | 108 | process = subprocess.Popen(['ovs-ofctl', '--version'], |
| 109 | stdout=subprocess.PIPE) |
fumihiko kakuma | 6099401 | 2016-03-08 20:55:01 +0900 | [diff] [blame] | 110 | stdout, _ = process.communicate() |
Federico Ressi | 21a10d3 | 2020-01-31 07:43:30 +0100 | [diff] [blame] | 111 | find_str = b'OpenFlow versions 0x1:0x' |
fumihiko kakuma | 6099401 | 2016-03-08 20:55:01 +0900 | [diff] [blame] | 112 | offset = stdout.find(find_str) |
| 113 | return int(stdout[offset + len(find_str):-1]) - 1 |
| 114 | |
| 115 | |
Sean Dague | 97fcc7b | 2014-06-16 17:24:14 -0400 | [diff] [blame] | 116 | def disk_space(): |
| 117 | # the df output |
Sean Dague | 60a1405 | 2015-05-11 14:53:39 -0400 | [diff] [blame] | 118 | _header("File System Summary") |
| 119 | |
Sean Dague | 97fcc7b | 2014-06-16 17:24:14 -0400 | [diff] [blame] | 120 | dfraw = os.popen("df -Ph").read() |
| 121 | df = [s.split() for s in dfraw.splitlines()] |
| 122 | for fs in df: |
| 123 | try: |
| 124 | if int(fs[4][:-1]) > 95: |
| 125 | warn("Device %s (%s) is %s full, might be an issue" % ( |
| 126 | fs[0], fs[5], fs[4])) |
| 127 | except ValueError: |
| 128 | # if it doesn't look like an int, that's fine |
| 129 | pass |
| 130 | |
Eyal | e736177 | 2016-04-05 16:18:56 +0300 | [diff] [blame] | 131 | print(dfraw) |
Sean Dague | 97fcc7b | 2014-06-16 17:24:14 -0400 | [diff] [blame] | 132 | |
| 133 | |
Sean Dague | 2da606d | 2015-08-06 10:02:43 -0400 | [diff] [blame] | 134 | def ebtables_dump(): |
Jens Harbott | 5a684eb | 2021-06-09 09:37:34 +0200 | [diff] [blame] | 135 | tables = ['filter', 'nat'] |
Sean Dague | 2da606d | 2015-08-06 10:02:43 -0400 | [diff] [blame] | 136 | _header("EB Tables Dump") |
Chris Dent | 57d7967 | 2016-02-23 15:38:43 +0000 | [diff] [blame] | 137 | if not _find_cmd('ebtables'): |
| 138 | return |
Sean Dague | 5c5e086 | 2015-11-09 14:08:15 -0500 | [diff] [blame] | 139 | for table in tables: |
| 140 | _dump_cmd("sudo ebtables -t %s -L" % table) |
Sean Dague | 2da606d | 2015-08-06 10:02:43 -0400 | [diff] [blame] | 141 | |
| 142 | |
Sean Dague | 168b7c2 | 2015-05-07 08:57:28 -0400 | [diff] [blame] | 143 | def iptables_dump(): |
| 144 | tables = ['filter', 'nat', 'mangle'] |
Sean Dague | 60a1405 | 2015-05-11 14:53:39 -0400 | [diff] [blame] | 145 | _header("IP Tables Dump") |
| 146 | |
Sean Dague | 168b7c2 | 2015-05-07 08:57:28 -0400 | [diff] [blame] | 147 | for table in tables: |
Sean Dague | 60a1405 | 2015-05-11 14:53:39 -0400 | [diff] [blame] | 148 | _dump_cmd("sudo iptables --line-numbers -L -nv -t %s" % table) |
| 149 | |
| 150 | |
Ihar Hrachyshka | 72c34ee | 2016-01-30 16:18:01 +0100 | [diff] [blame] | 151 | def _netns_list(): |
| 152 | process = subprocess.Popen(['ip', 'netns'], stdout=subprocess.PIPE) |
| 153 | stdout, _ = process.communicate() |
John L. Villalovos | c6e6939 | 2017-02-06 14:24:42 -0800 | [diff] [blame] | 154 | # NOTE(jlvillal): Sometimes 'ip netns list' can return output like: |
| 155 | # qrouter-0805fd7d-c493-4fa6-82ca-1c6c9b23cd9e (id: 1) |
| 156 | # qdhcp-bb2cc6ae-2ae8-474f-adda-a94059b872b5 (id: 0) |
| 157 | output = [x.split()[0] for x in stdout.splitlines()] |
| 158 | return output |
Ihar Hrachyshka | 72c34ee | 2016-01-30 16:18:01 +0100 | [diff] [blame] | 159 | |
| 160 | |
Sean Dague | 60a1405 | 2015-05-11 14:53:39 -0400 | [diff] [blame] | 161 | def network_dump(): |
| 162 | _header("Network Dump") |
| 163 | |
Nate Johnston | 56946cf | 2018-11-12 11:17:07 -0500 | [diff] [blame] | 164 | _dump_cmd("bridge link") |
Nate Johnston | 56946cf | 2018-11-12 11:17:07 -0500 | [diff] [blame] | 165 | _dump_cmd("ip link show type bridge") |
Sean Mooney | 7de6e0b | 2020-10-21 13:59:50 +0100 | [diff] [blame] | 166 | ip_cmds = ["neigh", "addr", "route", "-6 route"] |
Ihar Hrachyshka | 72c34ee | 2016-01-30 16:18:01 +0100 | [diff] [blame] | 167 | for cmd in ip_cmds + ['netns']: |
| 168 | _dump_cmd("ip %s" % cmd) |
| 169 | for netns_ in _netns_list(): |
| 170 | for cmd in ip_cmds: |
LuyaoZhong | 8d4ae4f | 2020-02-19 08:16:03 +0000 | [diff] [blame] | 171 | args = {'netns': bytes.decode(netns_), 'cmd': cmd} |
Ihar Hrachyshka | 72c34ee | 2016-01-30 16:18:01 +0100 | [diff] [blame] | 172 | _dump_cmd('sudo ip netns exec %(netns)s ip %(cmd)s' % args) |
Sean Dague | 168b7c2 | 2015-05-07 08:57:28 -0400 | [diff] [blame] | 173 | |
| 174 | |
Ihar Hrachyshka | c1b7cb1 | 2016-02-11 13:50:46 +0100 | [diff] [blame] | 175 | def ovs_dump(): |
| 176 | _header("Open vSwitch Dump") |
| 177 | |
Chris Dent | 57d7967 | 2016-02-23 15:38:43 +0000 | [diff] [blame] | 178 | # NOTE(cdent): If we're not using neutron + ovs these commands |
| 179 | # will not be present so |
| 180 | if not _find_cmd('ovs-vsctl'): |
| 181 | return |
| 182 | |
fumihiko kakuma | 578459f | 2016-04-07 08:15:45 +0900 | [diff] [blame] | 183 | bridges = _bridge_list() |
fumihiko kakuma | 6099401 | 2016-03-08 20:55:01 +0900 | [diff] [blame] | 184 | ofctl_cmds = ('show', 'dump-ports-desc', 'dump-ports', 'dump-flows') |
| 185 | ofp_max = _get_ofp_version() |
| 186 | vers = 'OpenFlow10' |
fumihiko kakuma | 578459f | 2016-04-07 08:15:45 +0900 | [diff] [blame] | 187 | for i in range(1, ofp_max + 1): |
fumihiko kakuma | 6099401 | 2016-03-08 20:55:01 +0900 | [diff] [blame] | 188 | vers += ',OpenFlow1' + str(i) |
Ihar Hrachyshka | c1b7cb1 | 2016-02-11 13:50:46 +0100 | [diff] [blame] | 189 | _dump_cmd("sudo ovs-vsctl show") |
fumihiko kakuma | 6099401 | 2016-03-08 20:55:01 +0900 | [diff] [blame] | 190 | for ofctl_cmd in ofctl_cmds: |
| 191 | for bridge in bridges: |
LuyaoZhong | 8d4ae4f | 2020-02-19 08:16:03 +0000 | [diff] [blame] | 192 | args = {'vers': vers, 'cmd': ofctl_cmd, 'bridge': bytes.decode(bridge)} |
fumihiko kakuma | 6099401 | 2016-03-08 20:55:01 +0900 | [diff] [blame] | 193 | _dump_cmd("sudo ovs-ofctl --protocols=%(vers)s %(cmd)s %(bridge)s" % args) |
Ihar Hrachyshka | c1b7cb1 | 2016-02-11 13:50:46 +0100 | [diff] [blame] | 194 | |
| 195 | |
Sean Dague | 97fcc7b | 2014-06-16 17:24:14 -0400 | [diff] [blame] | 196 | def process_list(): |
Sean Dague | 60a1405 | 2015-05-11 14:53:39 -0400 | [diff] [blame] | 197 | _header("Process Listing") |
| 198 | _dump_cmd("ps axo " |
| 199 | "user,ppid,pid,pcpu,pmem,vsz,rss,tty,stat,start,time,args") |
Sean Dague | 97fcc7b | 2014-06-16 17:24:14 -0400 | [diff] [blame] | 200 | |
| 201 | |
Sean Dague | 737e942 | 2015-05-12 19:51:39 -0400 | [diff] [blame] | 202 | def compute_consoles(): |
| 203 | _header("Compute consoles") |
Federico Ressi | 21a10d3 | 2020-01-31 07:43:30 +0100 | [diff] [blame] | 204 | for root, _, filenames in os.walk('/opt/stack'): |
Sean Dague | 737e942 | 2015-05-12 19:51:39 -0400 | [diff] [blame] | 205 | for filename in fnmatch.filter(filenames, 'console.log'): |
| 206 | fullpath = os.path.join(root, filename) |
| 207 | _dump_cmd("sudo cat %s" % fullpath) |
| 208 | |
| 209 | |
Ihar Hrachyshka | ef219bf | 2016-02-11 13:54:48 +0100 | [diff] [blame] | 210 | def guru_meditation_reports(): |
| 211 | for service in GMR_PROCESSES: |
| 212 | _header("%s Guru Meditation Report" % service) |
Ian Wienand | 3a9df1d | 2015-07-01 06:18:47 +1000 | [diff] [blame] | 213 | |
Ihar Hrachyshka | ef219bf | 2016-02-11 13:54:48 +0100 | [diff] [blame] | 214 | try: |
| 215 | subprocess.check_call(['pgrep', '-f', service]) |
| 216 | except subprocess.CalledProcessError: |
| 217 | print("Skipping as %s does not appear to be running" % service) |
| 218 | continue |
Ian Wienand | 3a9df1d | 2015-07-01 06:18:47 +1000 | [diff] [blame] | 219 | |
Ihar Hrachyshka | ef219bf | 2016-02-11 13:54:48 +0100 | [diff] [blame] | 220 | _dump_cmd("killall -e -USR2 %s" % service) |
| 221 | print("guru meditation report in %s log" % service) |
Joe Gordon | 2ebe993 | 2015-06-07 16:57:34 +0900 | [diff] [blame] | 222 | |
| 223 | |
Ian Wienand | bfcc760 | 2017-03-29 11:52:06 +1100 | [diff] [blame] | 224 | def var_core(): |
| 225 | if os.path.exists('/var/core'): |
| 226 | _header("/var/core dumps") |
| 227 | # NOTE(ianw) : see DEBUG_LIBVIRT_COREDUMPS. We could think |
| 228 | # about getting backtraces out of these. There are other |
| 229 | # tools out there that can do that sort of thing though. |
| 230 | _dump_cmd("ls -ltrah /var/core") |
| 231 | |
Federico Ressi | 21a10d3 | 2020-01-31 07:43:30 +0100 | [diff] [blame] | 232 | |
| 233 | def disable_stdio_buffering(): |
| 234 | # re-open STDOUT as binary, then wrap it in a |
| 235 | # TextIOWrapper, and write through everything. |
| 236 | binary_stdout = io.open(sys.stdout.fileno(), 'wb', 0) |
| 237 | sys.stdout = io.TextIOWrapper(binary_stdout, write_through=True) |
| 238 | |
| 239 | |
Sean Dague | 97fcc7b | 2014-06-16 17:24:14 -0400 | [diff] [blame] | 240 | def main(): |
| 241 | opts = get_options() |
Sean Dague | ac9313e | 2015-07-27 13:33:30 -0400 | [diff] [blame] | 242 | fname = filename(opts.dir, opts.name) |
Eyal | e736177 | 2016-04-05 16:18:56 +0300 | [diff] [blame] | 243 | print("World dumping... see %s for details" % fname) |
Federico Ressi | 21a10d3 | 2020-01-31 07:43:30 +0100 | [diff] [blame] | 244 | |
| 245 | disable_stdio_buffering() |
| 246 | |
| 247 | with io.open(fname, 'w') as f: |
Sean Dague | 97fcc7b | 2014-06-16 17:24:14 -0400 | [diff] [blame] | 248 | os.dup2(f.fileno(), sys.stdout.fileno()) |
| 249 | disk_space() |
| 250 | process_list() |
Sean Dague | 60a1405 | 2015-05-11 14:53:39 -0400 | [diff] [blame] | 251 | network_dump() |
Ihar Hrachyshka | c1b7cb1 | 2016-02-11 13:50:46 +0100 | [diff] [blame] | 252 | ovs_dump() |
Sean Dague | 168b7c2 | 2015-05-07 08:57:28 -0400 | [diff] [blame] | 253 | iptables_dump() |
Sean Dague | 2da606d | 2015-08-06 10:02:43 -0400 | [diff] [blame] | 254 | ebtables_dump() |
Sean Dague | 737e942 | 2015-05-12 19:51:39 -0400 | [diff] [blame] | 255 | compute_consoles() |
Ihar Hrachyshka | ef219bf | 2016-02-11 13:54:48 +0100 | [diff] [blame] | 256 | guru_meditation_reports() |
Ian Wienand | bfcc760 | 2017-03-29 11:52:06 +1100 | [diff] [blame] | 257 | var_core() |
Jens Harbott | ce396d3 | 2019-09-05 08:51:33 +0000 | [diff] [blame] | 258 | # Singular name for ease of log retrieval |
| 259 | copyname = os.path.join(opts.dir, 'worlddump') |
| 260 | if opts.name: |
| 261 | copyname += '-' + opts.name |
| 262 | copyname += '-latest.txt' |
| 263 | # We make a full copy to deal with jobs that may or may not |
| 264 | # gzip logs breaking symlinks. |
| 265 | shutil.copyfile(fname, copyname) |
Sean Dague | 97fcc7b | 2014-06-16 17:24:14 -0400 | [diff] [blame] | 266 | |
| 267 | |
| 268 | if __name__ == '__main__': |
| 269 | try: |
| 270 | sys.exit(main()) |
| 271 | except KeyboardInterrupt: |
| 272 | sys.exit(1) |