blob: 7634d2cbfcbdef5fd253999260e6b8ec630f1ba7 [file] [log] [blame]
David Kranzb9d97502013-05-01 15:55:04 -04001# Copyright 2013 Quanta Research Cambridge, Inc.
2#
3# Licensed under the Apache License, Version 2.0 (the "License");
4# you may not use this file except in compliance with the License.
5# You may obtain a copy of the License at
6#
7# http://www.apache.org/licenses/LICENSE-2.0
8#
9# Unless required by applicable law or agreed to in writing, software
10# distributed under the License is distributed on an "AS IS" BASIS,
11# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12# See the License for the specific language governing permissions and
13# limitations under the License.
14
David Kranzb9d97502013-05-01 15:55:04 -040015import multiprocessing
Marc Koderer8f940ab2013-09-25 17:31:50 +020016import os
Marc Koderer3414d732013-07-31 08:36:36 +020017import signal
David Kranzb9d97502013-05-01 15:55:04 -040018import time
19
Doug Hellmann583ce2c2015-03-11 14:55:46 +000020from oslo_log import log as logging
21from oslo_utils import importutils
Matthew Treinish71426682015-04-23 11:19:38 -040022import six
llg821243b20502014-02-22 10:32:49 +080023from six import moves
Andrey Pavlov64723762015-04-29 06:24:58 +030024from tempest_lib.common import ssh
Fei Long Wangd39431f2015-05-14 11:30:48 +120025
llg821243b20502014-02-22 10:32:49 +080026
David Kranzb9d97502013-05-01 15:55:04 -040027from tempest import clients
Jamie Lennox15350172015-08-17 10:54:25 +100028from tempest.common import cred_client
Fei Long Wangd39431f2015-05-14 11:30:48 +120029from tempest.common.utils import data_utils
Matthew Treinish88f49ef2014-01-29 18:36:27 +000030from tempest import config
David Kranzb9d97502013-05-01 15:55:04 -040031from tempest import exceptions
32from tempest.stress import cleanup
33
Matthew Treinish88f49ef2014-01-29 18:36:27 +000034CONF = config.CONF
35
Marc Kodererb714de52013-08-08 09:21:46 +020036LOG = logging.getLogger(__name__)
Marc Koderer3414d732013-07-31 08:36:36 +020037processes = []
David Kranzb9d97502013-05-01 15:55:04 -040038
39
Marc Kodererf13e4872013-11-25 14:50:33 +010040def do_ssh(command, host, ssh_user, ssh_key=None):
41 ssh_client = ssh.Client(host, ssh_user, key_filename=ssh_key)
David Kranzb9d97502013-05-01 15:55:04 -040042 try:
43 return ssh_client.exec_command(command)
44 except exceptions.SSHExecCommandFailed:
DennyZhang6baa6672013-09-24 17:49:30 -070045 LOG.error('do_ssh raise exception. command:%s, host:%s.'
46 % (command, host))
David Kranzb9d97502013-05-01 15:55:04 -040047 return None
48
49
Marc Kodererf13e4872013-11-25 14:50:33 +010050def _get_compute_nodes(controller, ssh_user, ssh_key=None):
David Kranzb9d97502013-05-01 15:55:04 -040051 """
52 Returns a list of active compute nodes. List is generated by running
53 nova-manage on the controller.
54 """
55 nodes = []
56 cmd = "nova-manage service list | grep ^nova-compute"
Marc Kodererf13e4872013-11-25 14:50:33 +010057 output = do_ssh(cmd, controller, ssh_user, ssh_key)
David Kranzb9d97502013-05-01 15:55:04 -040058 if not output:
59 return nodes
60 # For example: nova-compute xg11eth0 nova enabled :-) 2011-10-31 18:57:46
61 # This is fragile but there is, at present, no other way to get this info.
62 for line in output.split('\n'):
63 words = line.split()
64 if len(words) > 0 and words[4] == ":-)":
65 nodes.append(words[1])
66 return nodes
67
68
Marc Kodererf13e4872013-11-25 14:50:33 +010069def _has_error_in_logs(logfiles, nodes, ssh_user, ssh_key=None,
70 stop_on_error=False):
David Kranzb9d97502013-05-01 15:55:04 -040071 """
72 Detect errors in the nova log files on the controller and compute nodes.
73 """
74 grep = 'egrep "ERROR|TRACE" %s' % logfiles
DennyZhang49b21ab2013-09-24 16:24:23 -050075 ret = False
David Kranzb9d97502013-05-01 15:55:04 -040076 for node in nodes:
Marc Kodererf13e4872013-11-25 14:50:33 +010077 errors = do_ssh(grep, node, ssh_user, ssh_key)
David Kranzb9d97502013-05-01 15:55:04 -040078 if len(errors) > 0:
Marc Kodererb714de52013-08-08 09:21:46 +020079 LOG.error('%s: %s' % (node, errors))
DennyZhang49b21ab2013-09-24 16:24:23 -050080 ret = True
81 if stop_on_error:
82 break
83 return ret
David Kranzb9d97502013-05-01 15:55:04 -040084
85
Attila Fazekasd047d1d2014-04-19 21:58:47 +020086def sigchld_handler(signalnum, frame):
Marc Koderer3414d732013-07-31 08:36:36 +020087 """
88 Signal handler (only active if stop_on_error is True).
89 """
Attila Fazekasd047d1d2014-04-19 21:58:47 +020090 for process in processes:
91 if (not process['process'].is_alive() and
92 process['process'].exitcode != 0):
93 signal.signal(signalnum, signal.SIG_DFL)
94 terminate_all_processes()
95 break
Marc Koderer3414d732013-07-31 08:36:36 +020096
97
Marc Kodererf13e4872013-11-25 14:50:33 +010098def terminate_all_processes(check_interval=20):
Marc Koderer3414d732013-07-31 08:36:36 +020099 """
100 Goes through the process list and terminates all child processes.
101 """
Pavel Sedlák400c4132014-04-29 16:31:48 +0200102 LOG.info("Stopping all processes.")
Marc Koderer3414d732013-07-31 08:36:36 +0200103 for process in processes:
104 if process['process'].is_alive():
105 try:
106 process['process'].terminate()
107 except Exception:
108 pass
Marc Kodererf13e4872013-11-25 14:50:33 +0100109 time.sleep(check_interval)
Marc Koderer8f940ab2013-09-25 17:31:50 +0200110 for process in processes:
111 if process['process'].is_alive():
112 try:
113 pid = process['process'].pid
114 LOG.warn("Process %d hangs. Send SIGKILL." % pid)
115 os.kill(pid, signal.SIGKILL)
116 except Exception:
117 pass
Marc Koderer3414d732013-07-31 08:36:36 +0200118 process['process'].join()
119
120
121def stress_openstack(tests, duration, max_runs=None, stop_on_error=False):
David Kranzb9d97502013-05-01 15:55:04 -0400122 """
123 Workload driver. Executes an action function against a nova-cluster.
David Kranzb9d97502013-05-01 15:55:04 -0400124 """
Marc Kodererf13e4872013-11-25 14:50:33 +0100125 admin_manager = clients.AdminManager()
126
Matthew Treinish88f49ef2014-01-29 18:36:27 +0000127 ssh_user = CONF.stress.target_ssh_user
128 ssh_key = CONF.stress.target_private_key_path
129 logfiles = CONF.stress.target_logfiles
130 log_check_interval = int(CONF.stress.log_check_interval)
131 default_thread_num = int(CONF.stress.default_thread_number_per_action)
David Kranzb9d97502013-05-01 15:55:04 -0400132 if logfiles:
Matthew Treinish88f49ef2014-01-29 18:36:27 +0000133 controller = CONF.stress.target_controller
Marc Kodererf13e4872013-11-25 14:50:33 +0100134 computes = _get_compute_nodes(controller, ssh_user, ssh_key)
David Kranzb9d97502013-05-01 15:55:04 -0400135 for node in computes:
Marc Kodererf13e4872013-11-25 14:50:33 +0100136 do_ssh("rm -f %s" % logfiles, node, ssh_user, ssh_key)
David Kranz6c3fc152015-03-13 14:47:44 -0400137 skip = False
David Kranzb9d97502013-05-01 15:55:04 -0400138 for test in tests:
David Kranz6c3fc152015-03-13 14:47:44 -0400139 for service in test.get('required_services', []):
140 if not CONF.service_available.get(service):
141 skip = True
142 break
143 if skip:
144 break
David Kranzb9d97502013-05-01 15:55:04 -0400145 if test.get('use_admin', False):
146 manager = admin_manager
147 else:
148 manager = clients.Manager()
llg821243b20502014-02-22 10:32:49 +0800149 for p_number in moves.xrange(test.get('threads', default_thread_num)):
David Kranzb9d97502013-05-01 15:55:04 -0400150 if test.get('use_isolated_tenants', False):
Masayuki Igawa259c1132013-10-31 17:48:44 +0900151 username = data_utils.rand_name("stress_user")
152 tenant_name = data_utils.rand_name("stress_tenant")
David Kranzb9d97502013-05-01 15:55:04 -0400153 password = "pass"
Andrea Frittolif2f7a372015-03-04 15:07:39 +0000154 if CONF.identity.auth_version == 'v2':
155 identity_client = admin_manager.identity_client
156 else:
157 identity_client = admin_manager.identity_v3_client
Jamie Lennox15350172015-08-17 10:54:25 +1000158 credentials_client = cred_client.get_creds_client(
Andrea Frittolif2f7a372015-03-04 15:07:39 +0000159 identity_client)
160 project = credentials_client.create_project(
161 name=tenant_name, description=tenant_name)
162 user = credentials_client.create_user(username, password,
163 project['id'], "email")
164 # Add roles specified in config file
165 for conf_role in CONF.auth.tempest_roles:
166 credentials_client.assign_user_role(user, project,
167 conf_role)
168 creds = credentials_client.get_credentials(user, project,
169 password)
Andrea Frittoli422fbdf2014-03-20 10:05:18 +0000170 manager = clients.Manager(credentials=creds)
Walter A. Boring IVb725e622013-07-11 17:21:33 -0700171
Attila Fazekas1e30d5d2013-07-30 14:38:20 +0200172 test_obj = importutils.import_class(test['action'])
Marc Kodererb714de52013-08-08 09:21:46 +0200173 test_run = test_obj(manager, max_runs, stop_on_error)
Walter A. Boring IVb725e622013-07-11 17:21:33 -0700174
175 kwargs = test.get('kwargs', {})
Matthew Treinish71426682015-04-23 11:19:38 -0400176 test_run.setUp(**dict(six.iteritems(kwargs)))
Walter A. Boring IVb725e622013-07-11 17:21:33 -0700177
Marc Kodererb714de52013-08-08 09:21:46 +0200178 LOG.debug("calling Target Object %s" %
179 test_run.__class__.__name__)
Walter A. Boring IVb725e622013-07-11 17:21:33 -0700180
Marc Koderer69d3bea2013-07-18 08:32:11 +0200181 mp_manager = multiprocessing.Manager()
182 shared_statistic = mp_manager.dict()
183 shared_statistic['runs'] = 0
184 shared_statistic['fails'] = 0
185
186 p = multiprocessing.Process(target=test_run.execute,
187 args=(shared_statistic,))
188
189 process = {'process': p,
190 'p_number': p_number,
Marc Koderer33ca6ee2013-08-29 09:06:36 +0200191 'action': test_run.action,
Marc Koderer69d3bea2013-07-18 08:32:11 +0200192 'statistic': shared_statistic}
193
194 processes.append(process)
David Kranzb9d97502013-05-01 15:55:04 -0400195 p.start()
Marc Koderer3414d732013-07-31 08:36:36 +0200196 if stop_on_error:
197 # NOTE(mkoderer): only the parent should register the handler
198 signal.signal(signal.SIGCHLD, sigchld_handler)
David Kranzb9d97502013-05-01 15:55:04 -0400199 end_time = time.time() + duration
200 had_errors = False
Pavel Sedlák400c4132014-04-29 16:31:48 +0200201 try:
202 while True:
203 if max_runs is None:
204 remaining = end_time - time.time()
205 if remaining <= 0:
Marc Koderer69d3bea2013-07-18 08:32:11 +0200206 break
Pavel Sedlák400c4132014-04-29 16:31:48 +0200207 else:
208 remaining = log_check_interval
209 all_proc_term = True
210 for process in processes:
211 if process['process'].is_alive():
212 all_proc_term = False
213 break
214 if all_proc_term:
Marc Koderer3414d732013-07-31 08:36:36 +0200215 break
216
Pavel Sedlák400c4132014-04-29 16:31:48 +0200217 time.sleep(min(remaining, log_check_interval))
218 if stop_on_error:
Pavel Sedlákfa6666c2014-04-29 16:56:48 +0200219 if any([True for proc in processes
220 if proc['statistic']['fails'] > 0]):
221 break
Pavel Sedlák400c4132014-04-29 16:31:48 +0200222
223 if not logfiles:
224 continue
225 if _has_error_in_logs(logfiles, computes, ssh_user, ssh_key,
226 stop_on_error):
227 had_errors = True
228 break
229 except KeyboardInterrupt:
230 LOG.warning("Interrupted, going to print statistics and exit ...")
Walter A. Boring IVb725e622013-07-11 17:21:33 -0700231
Attila Fazekasd047d1d2014-04-19 21:58:47 +0200232 if stop_on_error:
233 signal.signal(signal.SIGCHLD, signal.SIG_DFL)
Marc Koderer3414d732013-07-31 08:36:36 +0200234 terminate_all_processes()
Marc Koderer69d3bea2013-07-18 08:32:11 +0200235
236 sum_fails = 0
237 sum_runs = 0
238
Marc Kodererb714de52013-08-08 09:21:46 +0200239 LOG.info("Statistics (per process):")
Marc Koderer69d3bea2013-07-18 08:32:11 +0200240 for process in processes:
241 if process['statistic']['fails'] > 0:
242 had_errors = True
243 sum_runs += process['statistic']['runs']
244 sum_fails += process['statistic']['fails']
Marc Kodererb714de52013-08-08 09:21:46 +0200245 LOG.info(" Process %d (%s): Run %d actions (%d failed)" %
246 (process['p_number'],
247 process['action'],
248 process['statistic']['runs'],
Marc Koderer69d3bea2013-07-18 08:32:11 +0200249 process['statistic']['fails']))
Marc Kodererb714de52013-08-08 09:21:46 +0200250 LOG.info("Summary:")
251 LOG.info("Run %d actions (%d failed)" %
252 (sum_runs, sum_fails))
Walter A. Boring IVb725e622013-07-11 17:21:33 -0700253
Julien Leloupa5ee5422014-02-13 14:29:02 +0100254 if not had_errors and CONF.stress.full_clean_stack:
Marc Kodererb714de52013-08-08 09:21:46 +0200255 LOG.info("cleaning up")
256 cleanup.cleanup()
Marc Koderer888ddc42013-07-23 16:13:07 +0200257 if had_errors:
258 return 1
259 else:
260 return 0