blob: efc57a9ae6aaef4b9dbaa115847d58e2c27027c6 [file] [log] [blame]
David Kranzb9d97502013-05-01 15:55:04 -04001# Copyright 2013 Quanta Research Cambridge, Inc.
2#
3# Licensed under the Apache License, Version 2.0 (the "License");
4# you may not use this file except in compliance with the License.
5# You may obtain a copy of the License at
6#
7# http://www.apache.org/licenses/LICENSE-2.0
8#
9# Unless required by applicable law or agreed to in writing, software
10# distributed under the License is distributed on an "AS IS" BASIS,
11# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12# See the License for the specific language governing permissions and
13# limitations under the License.
14
David Kranzb9d97502013-05-01 15:55:04 -040015import multiprocessing
Marc Koderer3414d732013-07-31 08:36:36 +020016import signal
David Kranzb9d97502013-05-01 15:55:04 -040017import time
18
19from tempest import clients
20from tempest.common import ssh
21from tempest.common.utils.data_utils import rand_name
22from tempest import exceptions
Attila Fazekas1e30d5d2013-07-30 14:38:20 +020023from tempest.openstack.common import importutils
Marc Kodererb714de52013-08-08 09:21:46 +020024from tempest.openstack.common import log as logging
David Kranzb9d97502013-05-01 15:55:04 -040025from tempest.stress import cleanup
26
27admin_manager = clients.AdminManager()
28
Marc Kodererb714de52013-08-08 09:21:46 +020029LOG = logging.getLogger(__name__)
Marc Koderer3414d732013-07-31 08:36:36 +020030processes = []
David Kranzb9d97502013-05-01 15:55:04 -040031
32
33def do_ssh(command, host):
34 username = admin_manager.config.stress.target_ssh_user
35 key_filename = admin_manager.config.stress.target_private_key_path
36 if not (username and key_filename):
37 return None
38 ssh_client = ssh.Client(host, username, key_filename=key_filename)
39 try:
40 return ssh_client.exec_command(command)
41 except exceptions.SSHExecCommandFailed:
42 return None
43
44
45def _get_compute_nodes(controller):
46 """
47 Returns a list of active compute nodes. List is generated by running
48 nova-manage on the controller.
49 """
50 nodes = []
51 cmd = "nova-manage service list | grep ^nova-compute"
52 output = do_ssh(cmd, controller)
53 if not output:
54 return nodes
55 # For example: nova-compute xg11eth0 nova enabled :-) 2011-10-31 18:57:46
56 # This is fragile but there is, at present, no other way to get this info.
57 for line in output.split('\n'):
58 words = line.split()
59 if len(words) > 0 and words[4] == ":-)":
60 nodes.append(words[1])
61 return nodes
62
63
64def _error_in_logs(logfiles, nodes):
65 """
66 Detect errors in the nova log files on the controller and compute nodes.
67 """
68 grep = 'egrep "ERROR|TRACE" %s' % logfiles
69 for node in nodes:
70 errors = do_ssh(grep, node)
71 if not errors:
72 return None
73 if len(errors) > 0:
Marc Kodererb714de52013-08-08 09:21:46 +020074 LOG.error('%s: %s' % (node, errors))
David Kranzb9d97502013-05-01 15:55:04 -040075 return errors
76 return None
77
78
Marc Koderer3414d732013-07-31 08:36:36 +020079def sigchld_handler(signal, frame):
80 """
81 Signal handler (only active if stop_on_error is True).
82 """
83 terminate_all_processes()
84
85
86def terminate_all_processes():
87 """
88 Goes through the process list and terminates all child processes.
89 """
90 for process in processes:
91 if process['process'].is_alive():
92 try:
93 process['process'].terminate()
94 except Exception:
95 pass
96 process['process'].join()
97
98
99def stress_openstack(tests, duration, max_runs=None, stop_on_error=False):
David Kranzb9d97502013-05-01 15:55:04 -0400100 """
101 Workload driver. Executes an action function against a nova-cluster.
David Kranzb9d97502013-05-01 15:55:04 -0400102 """
103 logfiles = admin_manager.config.stress.target_logfiles
104 log_check_interval = int(admin_manager.config.stress.log_check_interval)
105 if logfiles:
106 controller = admin_manager.config.stress.target_controller
107 computes = _get_compute_nodes(controller)
108 for node in computes:
109 do_ssh("rm -f %s" % logfiles, node)
David Kranzb9d97502013-05-01 15:55:04 -0400110 for test in tests:
111 if test.get('use_admin', False):
112 manager = admin_manager
113 else:
114 manager = clients.Manager()
Marc Koderer69d3bea2013-07-18 08:32:11 +0200115 for p_number in xrange(test.get('threads', 1)):
David Kranzb9d97502013-05-01 15:55:04 -0400116 if test.get('use_isolated_tenants', False):
117 username = rand_name("stress_user")
118 tenant_name = rand_name("stress_tenant")
119 password = "pass"
120 identity_client = admin_manager.identity_client
121 _, tenant = identity_client.create_tenant(name=tenant_name)
122 identity_client.create_user(username,
123 password,
124 tenant['id'],
125 "email")
126 manager = clients.Manager(username=username,
127 password="pass",
128 tenant_name=tenant_name)
Walter A. Boring IVb725e622013-07-11 17:21:33 -0700129
Attila Fazekas1e30d5d2013-07-30 14:38:20 +0200130 test_obj = importutils.import_class(test['action'])
Marc Kodererb714de52013-08-08 09:21:46 +0200131 test_run = test_obj(manager, max_runs, stop_on_error)
Walter A. Boring IVb725e622013-07-11 17:21:33 -0700132
133 kwargs = test.get('kwargs', {})
134 test_run.setUp(**dict(kwargs.iteritems()))
135
Marc Kodererb714de52013-08-08 09:21:46 +0200136 LOG.debug("calling Target Object %s" %
137 test_run.__class__.__name__)
Walter A. Boring IVb725e622013-07-11 17:21:33 -0700138
Marc Koderer69d3bea2013-07-18 08:32:11 +0200139 mp_manager = multiprocessing.Manager()
140 shared_statistic = mp_manager.dict()
141 shared_statistic['runs'] = 0
142 shared_statistic['fails'] = 0
143
144 p = multiprocessing.Process(target=test_run.execute,
145 args=(shared_statistic,))
146
147 process = {'process': p,
148 'p_number': p_number,
149 'action': test['action'],
150 'statistic': shared_statistic}
151
152 processes.append(process)
David Kranzb9d97502013-05-01 15:55:04 -0400153 p.start()
Marc Koderer3414d732013-07-31 08:36:36 +0200154 if stop_on_error:
155 # NOTE(mkoderer): only the parent should register the handler
156 signal.signal(signal.SIGCHLD, sigchld_handler)
David Kranzb9d97502013-05-01 15:55:04 -0400157 end_time = time.time() + duration
158 had_errors = False
159 while True:
Marc Koderer69d3bea2013-07-18 08:32:11 +0200160 if max_runs is None:
161 remaining = end_time - time.time()
162 if remaining <= 0:
163 break
164 else:
165 remaining = log_check_interval
166 all_proc_term = True
167 for process in processes:
168 if process['process'].is_alive():
169 all_proc_term = False
170 break
171 if all_proc_term:
172 break
173
David Kranzb9d97502013-05-01 15:55:04 -0400174 time.sleep(min(remaining, log_check_interval))
Marc Koderer3414d732013-07-31 08:36:36 +0200175 if stop_on_error:
176 for process in processes:
177 if process['statistic']['fails'] > 0:
178 break
179
David Kranzb9d97502013-05-01 15:55:04 -0400180 if not logfiles:
181 continue
182 errors = _error_in_logs(logfiles, computes)
183 if errors:
184 had_errors = True
185 break
Walter A. Boring IVb725e622013-07-11 17:21:33 -0700186
Marc Koderer3414d732013-07-31 08:36:36 +0200187 terminate_all_processes()
Marc Koderer69d3bea2013-07-18 08:32:11 +0200188
189 sum_fails = 0
190 sum_runs = 0
191
Marc Kodererb714de52013-08-08 09:21:46 +0200192 LOG.info("Statistics (per process):")
Marc Koderer69d3bea2013-07-18 08:32:11 +0200193 for process in processes:
194 if process['statistic']['fails'] > 0:
195 had_errors = True
196 sum_runs += process['statistic']['runs']
197 sum_fails += process['statistic']['fails']
Marc Kodererb714de52013-08-08 09:21:46 +0200198 LOG.info(" Process %d (%s): Run %d actions (%d failed)" %
199 (process['p_number'],
200 process['action'],
201 process['statistic']['runs'],
Marc Koderer69d3bea2013-07-18 08:32:11 +0200202 process['statistic']['fails']))
Marc Kodererb714de52013-08-08 09:21:46 +0200203 LOG.info("Summary:")
204 LOG.info("Run %d actions (%d failed)" %
205 (sum_runs, sum_fails))
Walter A. Boring IVb725e622013-07-11 17:21:33 -0700206
David Kranzb9d97502013-05-01 15:55:04 -0400207 if not had_errors:
Marc Kodererb714de52013-08-08 09:21:46 +0200208 LOG.info("cleaning up")
209 cleanup.cleanup()
Marc Koderer888ddc42013-07-23 16:13:07 +0200210 if had_errors:
211 return 1
212 else:
213 return 0