corosync 3.1.7
mon.c
Go to the documentation of this file.
1/*
2 * Copyright (c) 2010-2012 Red Hat, Inc.
3 *
4 * All rights reserved.
5 *
6 * Author: Angus Salkeld <asalkeld@redhat.com>
7 *
8 * This software licensed under BSD license, the text of which follows:
9 *
10 * Redistribution and use in source and binary forms, with or without
11 * modification, are permitted provided that the following conditions are met:
12 *
13 * - Redistributions of source code must retain the above copyright notice,
14 * this list of conditions and the following disclaimer.
15 * - Redistributions in binary form must reproduce the above copyright notice,
16 * this list of conditions and the following disclaimer in the documentation
17 * and/or other materials provided with the distribution.
18 * - Neither the name of the MontaVista Software, Inc. nor the names of its
19 * contributors may be used to endorse or promote products derived from this
20 * software without specific prior written permission.
21 *
22 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
23 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
24 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
25 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
26 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
27 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
28 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
29 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
30 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
31 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
32 * THE POSSIBILITY OF SUCH DAMAGE.
33 */
34
35#include <config.h>
36
37#include <unistd.h>
38#include <statgrab.h>
39
40#include <corosync/corotypes.h>
41#include <corosync/corodefs.h>
42#include <corosync/coroapi.h>
43#include <qb/qblist.h>
44#include <corosync/logsys.h>
45#include <corosync/icmap.h>
46#include "fsm.h"
47
48#include "service.h"
49
51
52/*
53 * Service Interfaces required by service_message_handler struct
54 */
55static char *mon_exec_init_fn (struct corosync_api_v1 *corosync_api);
56
57static struct corosync_api_v1 *api;
58#define MON_DEFAULT_PERIOD 3000
59#define MON_MIN_PERIOD 500
60#define MON_MAX_PERIOD (120 * CS_TIME_MS_IN_SEC)
61
63 .name = "corosync resource monitoring service",
64 .id = MON_SERVICE,
65 .priority = 1,
66 .private_data_size = 0,
68 .lib_init_fn = NULL,
69 .lib_exit_fn = NULL,
70 .lib_engine = NULL,
71 .lib_engine_count = 0,
72 .exec_engine = NULL,
73 .exec_engine_count = 0,
74 .confchg_fn = NULL,
75 .exec_init_fn = mon_exec_init_fn,
76 .exec_dump_fn = NULL
77};
78
79static QB_LIST_DECLARE (confchg_notify);
80
81
83 const char *icmap_path;
84 const char *name;
86 void (*update_stats_fn) (void *data);
87 struct cs_fsm fsm;
88 uint64_t period;
90 union {
91 int32_t int32;
92 double dbl;
93 } max;
94};
95
96static void mem_update_stats_fn (void *data);
97static void load_update_stats_fn (void *data);
98
99static struct resource_instance memory_used_inst = {
100 .name = "memory_used",
101 .icmap_path = "resources.system.memory_used.",
102 .update_stats_fn = mem_update_stats_fn,
103 .max_type = ICMAP_VALUETYPE_INT32,
104 .max.int32 = INT32_MAX,
105 .period = MON_DEFAULT_PERIOD,
106};
107
108static struct resource_instance load_15min_inst = {
109 .name = "load_15min",
110 .icmap_path = "resources.system.load_15min.",
111 .update_stats_fn = load_update_stats_fn,
112 .max_type = ICMAP_VALUETYPE_DOUBLE,
113 .max.dbl = INT32_MAX,
114 .period = MON_DEFAULT_PERIOD,
115};
116
117
118/*
119 * F S M
120 */
121static void mon_config_changed (struct cs_fsm* fsm, int32_t event, void * data);
122static void mon_resource_failed (struct cs_fsm* fsm, int32_t event, void * data);
123
124const char * mon_running_str = "running";
125const char * mon_failed_str = "failed";
126const char * mon_failure_str = "failure";
127const char * mon_stopped_str = "stopped";
128const char * mon_config_changed_str = "config_changed";
129
139
141 { MON_S_STOPPED, MON_E_CONFIG_CHANGED, mon_config_changed, {MON_S_STOPPED, MON_S_RUNNING, -1} },
142 { MON_S_STOPPED, MON_E_FAILURE, NULL, {-1} },
143 { MON_S_RUNNING, MON_E_CONFIG_CHANGED, mon_config_changed, {MON_S_RUNNING, MON_S_STOPPED, -1} },
144 { MON_S_RUNNING, MON_E_FAILURE, mon_resource_failed, {MON_S_FAILED, -1} },
145 { MON_S_FAILED, MON_E_CONFIG_CHANGED, mon_config_changed, {MON_S_RUNNING, MON_S_STOPPED, -1} },
146 { MON_S_FAILED, MON_E_FAILURE, NULL, {-1} },
147};
148
150{
151 return (&mon_service_engine);
152}
153
154static const char * mon_res_state_to_str(struct cs_fsm* fsm,
155 int32_t state)
156{
157 switch (state) {
158 case MON_S_STOPPED:
159 return mon_stopped_str;
160 break;
161 case MON_S_RUNNING:
162 return mon_running_str;
163 break;
164 case MON_S_FAILED:
165 return mon_failed_str;
166 break;
167 }
168 return NULL;
169}
170
171static const char * mon_res_event_to_str(struct cs_fsm* fsm,
172 int32_t event)
173{
174 switch (event) {
177 break;
178 case MON_E_FAILURE:
179 return mon_failure_str;
180 break;
181 }
182 return NULL;
183}
184
185static void mon_fsm_cb (struct cs_fsm *fsm, int cb_event, int32_t curr_state,
186 int32_t next_state, int32_t fsm_event, void *data)
187{
188 switch (cb_event) {
190 log_printf (LOGSYS_LEVEL_ERROR, "Fsm:%s could not find event \"%s\" in state \"%s\"",
191 fsm->name, fsm->event_to_str(fsm, fsm_event), fsm->state_to_str(fsm, curr_state));
193 break;
195 log_printf (LOGSYS_LEVEL_INFO, "Fsm:%s event \"%s\", state \"%s\" --> \"%s\"",
196 fsm->name,
197 fsm->event_to_str(fsm, fsm_event),
198 fsm->state_to_str(fsm, fsm->table[fsm->curr_entry].curr_state),
199 fsm->state_to_str(fsm, next_state));
200 break;
202 log_printf (LOGSYS_LEVEL_CRIT, "Fsm:%s Can't change state from \"%s\" to \"%s\" (event was \"%s\")",
203 fsm->name,
204 fsm->state_to_str(fsm, fsm->table[fsm->curr_entry].curr_state),
205 fsm->state_to_str(fsm, next_state),
206 fsm->event_to_str(fsm, fsm_event));
208 break;
209 default:
210 log_printf (LOGSYS_LEVEL_CRIT, "Fsm: Can't find callback event!");
212 break;
213 }
214}
215
216static void mon_fsm_state_set (struct cs_fsm* fsm,
217 enum mon_resource_state next_state, struct resource_instance* inst)
218{
219 enum mon_resource_state prev_state = fsm->curr_state;
220 const char *state_str;
221 char key_name[ICMAP_KEYNAME_MAXLEN];
222
223 ENTER();
224
225 cs_fsm_state_set(fsm, next_state, inst, mon_fsm_cb);
226
227 if (prev_state == fsm->curr_state) {
228 return;
229 }
230 state_str = mon_res_state_to_str(fsm, fsm->curr_state);
231
232 snprintf(key_name, ICMAP_KEYNAME_MAXLEN, "%s%s", inst->icmap_path, "state");
233 icmap_set_string(key_name, state_str);
234}
235
236
237static void mon_config_changed (struct cs_fsm* fsm, int32_t event, void * data)
238{
239 struct resource_instance * inst = (struct resource_instance *)data;
240 char *tmp_str;
241 uint64_t tmp_value;
242 char key_name[ICMAP_KEYNAME_MAXLEN];
243 int run_updater;
244 int scanf_res = 0;
245 int32_t i32;
246 double dbl;
247
248 ENTER();
249
250 snprintf(key_name, ICMAP_KEYNAME_MAXLEN, "%s%s", inst->icmap_path, "poll_period");
251 if (icmap_get_string(key_name, &tmp_str) == CS_OK) {
252 scanf_res = sscanf(tmp_str, "%"PRIu64, &tmp_value);
253 if (scanf_res != 1) {
255 "Could NOT use poll_period: %s (not uint64 type) for resource %s",
256 tmp_str, inst->name);
257 }
258 free(tmp_str);
259
260 if (tmp_value >= MON_MIN_PERIOD && tmp_value <= MON_MAX_PERIOD) {
262 "poll_period changing from:%"PRIu64" to %"PRIu64".",
263 inst->period, tmp_value);
264 inst->period = tmp_value;
265 } else {
267 "Could NOT use poll_period:%"PRIu64" ms for resource %s",
268 tmp_value, inst->name);
269 }
270 }
271
272 if (inst->timer_handle) {
273 api->timer_delete(inst->timer_handle);
274 inst->timer_handle = 0;
275 }
276
277 run_updater = 0;
278
279 snprintf(key_name, ICMAP_KEYNAME_MAXLEN, "%s%s", inst->icmap_path, "max");
280
281 if (icmap_get_string(key_name, &tmp_str) == CS_OK) {
282 if (inst->max_type == ICMAP_VALUETYPE_INT32) {
283 if (sscanf(tmp_str, "%"PRId32, &i32) != 1) {
284 inst->max.int32 = INT32_MAX;
285
286 mon_fsm_state_set (fsm, MON_S_STOPPED, inst);
287 } else {
288 inst->max.int32 = i32;
289 run_updater = 1;
290 }
291 }
292 if (inst->max_type == ICMAP_VALUETYPE_DOUBLE) {
293 if (sscanf(tmp_str, "%lf", &dbl) != 1) {
294 inst->max.dbl = INT32_MAX;
295
296 mon_fsm_state_set (fsm, MON_S_STOPPED, inst);
297 } else {
298 inst->max.dbl = dbl;
299 run_updater = 1;
300 }
301 }
302 free(tmp_str);
303 }
304
305 if (run_updater) {
306 mon_fsm_state_set (fsm, MON_S_RUNNING, inst);
307 /*
308 * run the updater, incase the period has shortened
309 * and to start the timer.
310 */
311 inst->update_stats_fn (inst);
312 }
313}
314
315void mon_resource_failed (struct cs_fsm* fsm, int32_t event, void * data)
316{
317 struct resource_instance * inst = (struct resource_instance *)data;
318 ENTER();
319 mon_fsm_state_set (fsm, MON_S_FAILED, inst);
320}
321
322static int32_t percent_mem_used_get(void)
323{
324 sg_mem_stats *mem_stats;
325 sg_swap_stats *swap_stats;
326 long long total, freemem;
327
328#ifdef HAVE_LIBSTATGRAB_GE_090
329 mem_stats = sg_get_mem_stats(NULL);
330 swap_stats = sg_get_swap_stats(NULL);
331#else
332 mem_stats = sg_get_mem_stats();
333 swap_stats = sg_get_swap_stats();
334#endif
335
336 if (mem_stats == NULL || swap_stats == NULL) {
337 log_printf (LOGSYS_LEVEL_ERROR, "Unable to get memory stats: %s",
338 sg_str_error(sg_get_error()));
339 return -1;
340 }
341 total = mem_stats->total + swap_stats->total;
342 freemem = mem_stats->free + swap_stats->free;
343 return ((total - freemem) * 100) / total;
344}
345
346static void mem_update_stats_fn (void *data)
347{
348 struct resource_instance * inst = (struct resource_instance *)data;
349 int32_t new_value;
350 uint64_t timestamp;
351 char key_name[ICMAP_KEYNAME_MAXLEN];
352
353 new_value = percent_mem_used_get();
354 if (new_value > 0) {
355 snprintf(key_name, ICMAP_KEYNAME_MAXLEN, "%s%s", inst->icmap_path, "current");
356 icmap_set_uint32(key_name, new_value);
357
358 timestamp = cs_timestamp_get();
359
360 snprintf(key_name, ICMAP_KEYNAME_MAXLEN, "%s%s", inst->icmap_path, "last_updated");
361 icmap_set_uint64(key_name, timestamp);
362
363 if (new_value > inst->max.int32 && inst->fsm.curr_state != MON_S_FAILED) {
364 cs_fsm_process (&inst->fsm, MON_E_FAILURE, inst, mon_fsm_cb);
365 }
366 }
368 inst, inst->update_stats_fn, &inst->timer_handle);
369}
370
371static double min15_loadavg_get(void)
372{
373 sg_load_stats *load_stats;
374
375#ifdef HAVE_LIBSTATGRAB_GE_090
376 load_stats = sg_get_load_stats (NULL);
377#else
378 load_stats = sg_get_load_stats ();
379#endif
380 if (load_stats == NULL) {
381 log_printf (LOGSYS_LEVEL_ERROR, "Unable to get load stats: %s",
382 sg_str_error (sg_get_error()));
383 return -1;
384 }
385 return load_stats->min15;
386}
387
388static void load_update_stats_fn (void *data)
389{
390 struct resource_instance * inst = (struct resource_instance *)data;
391 uint64_t timestamp;
392 char key_name[ICMAP_KEYNAME_MAXLEN];
393 double min15 = min15_loadavg_get();
394
395 if (min15 > 0) {
396 snprintf(key_name, ICMAP_KEYNAME_MAXLEN, "%s%s", inst->icmap_path, "current");
397 icmap_set_double(key_name, min15);
398
399 timestamp = cs_timestamp_get();
400
401 snprintf(key_name, ICMAP_KEYNAME_MAXLEN, "%s%s", inst->icmap_path, "last_updated");
402 icmap_set_uint64(key_name, timestamp);
403
404 if (min15 > inst->max.dbl && inst->fsm.curr_state != MON_S_FAILED) {
405 cs_fsm_process (&inst->fsm, MON_E_FAILURE, inst, mon_fsm_cb);
406 }
407 }
408
410 inst, inst->update_stats_fn, &inst->timer_handle);
411}
412
413static void mon_key_changed_cb (
414 int32_t event,
415 const char *key_name,
416 struct icmap_notify_value new_value,
417 struct icmap_notify_value old_value,
418 void *user_data)
419{
420 struct resource_instance* inst = (struct resource_instance*)user_data;
421 char *last_key_part;
422
423 if (event == ICMAP_TRACK_DELETE && inst) {
425 "resource \"%s\" deleted from cmap!",
426 inst->name);
427
428 cs_fsm_process (&inst->fsm, MON_E_CONFIG_CHANGED, inst, mon_fsm_cb);
429 }
430
431 if (event == ICMAP_TRACK_MODIFY) {
432 last_key_part = strrchr(key_name, '.');
433 if (last_key_part == NULL)
434 return ;
435
436 last_key_part++;
437 if (strcmp(last_key_part, "max") == 0 ||
438 strcmp(last_key_part, "poll_period") == 0) {
439 ENTER();
440 cs_fsm_process (&inst->fsm, MON_E_CONFIG_CHANGED, inst, mon_fsm_cb);
441 }
442 }
443}
444
445static void mon_instance_init (struct resource_instance* inst)
446{
447 uint64_t tmp_value;
448 char key_name[ICMAP_KEYNAME_MAXLEN];
450 char *tmp_str;
451
452 snprintf(key_name, ICMAP_KEYNAME_MAXLEN, "%s%s", inst->icmap_path, "current");
453 if (inst->max_type == ICMAP_VALUETYPE_INT32) {
454 icmap_set_int32(key_name, 0);
455 } else {
456 icmap_set_double(key_name, 0);
457 }
458
459 snprintf(key_name, ICMAP_KEYNAME_MAXLEN, "%s%s", inst->icmap_path, "last_updated");
460 icmap_set_uint64(key_name, 0);
461
462 snprintf(key_name, ICMAP_KEYNAME_MAXLEN, "%s%s", inst->icmap_path, "state");
464
465 inst->fsm.name = inst->name;
466 inst->fsm.curr_entry = 0;
468 inst->fsm.table = mon_fsm_table;
469 inst->fsm.entries = sizeof(mon_fsm_table) / sizeof(struct cs_fsm_entry);
470 inst->fsm.state_to_str = mon_res_state_to_str;
471 inst->fsm.event_to_str = mon_res_event_to_str;
472
473 snprintf(key_name, ICMAP_KEYNAME_MAXLEN, "%s%s", inst->icmap_path, "poll_period");
474 if (icmap_get_string(key_name, &tmp_str) != CS_OK ||
475 sscanf(tmp_str, "%"PRIu64, &tmp_value) != 1) {
476 icmap_set_uint64(key_name, inst->period);
477 }
478 else {
479 if (tmp_value >= MON_MIN_PERIOD && tmp_value <= MON_MAX_PERIOD) {
480 inst->period = tmp_value;
481 } else {
483 "Could NOT use poll_period:%"PRIu64" ms for resource %s",
484 tmp_value, inst->name);
485 }
486 free(tmp_str);
487 }
488 cs_fsm_process (&inst->fsm, MON_E_CONFIG_CHANGED, inst, mon_fsm_cb);
489
492 mon_key_changed_cb, inst, &icmap_track);
493}
494
495static char *mon_exec_init_fn (struct corosync_api_v1 *corosync_api)
496{
497#ifdef HAVE_LIBSTATGRAB_GE_090
498 sg_init(1);
499#else
500 sg_init();
501#endif
502
503 api = corosync_api;
504
505 mon_instance_init (&memory_used_inst);
506 mon_instance_init (&load_15min_inst);
507
508 return NULL;
509}
510
511
qb_loop_timer_handle corosync_timer_handle_t
corosync_timer_handle_t
Definition: coroapi.h:74
@ CS_LIB_FLOW_CONTROL_NOT_REQUIRED
Definition: coroapi.h:153
#define MILLI_2_NANO_SECONDS
Definition: coroapi.h:105
@ MON_SERVICE
Definition: corodefs.h:50
@ CS_OK
Definition: corotypes.h:99
QB_LIST_DECLARE(cpg_pd_list_head)
#define corosync_exit_error(err)
Definition: exec/util.h:72
@ COROSYNC_DONE_FATAL_ERR
Definition: exec/util.h:55
#define CS_FSM_CB_EVENT_PROCESS_NF
Definition: fsm.h:54
#define CS_FSM_CB_EVENT_STATE_SET
Definition: fsm.h:55
#define CS_FSM_CB_EVENT_STATE_SET_NF
Definition: fsm.h:56
#define ICMAP_TRACK_MODIFY
Definition: icmap.h:78
cs_error_t icmap_set_int32(const char *key_name, int32_t value)
Definition: icmap.c:591
icmap_value_types_t
Possible types of value.
Definition: icmap.h:58
@ ICMAP_VALUETYPE_DOUBLE
Definition: icmap.h:68
@ ICMAP_VALUETYPE_INT32
Definition: icmap.h:63
#define ICMAP_TRACK_DELETE
Definition: icmap.h:77
cs_error_t icmap_track_add(const char *key_name, int32_t track_type, icmap_notify_fn_t notify_fn, void *user_data, icmap_track_t *icmap_track)
Add tracking function for given key_name.
Definition: icmap.c:1159
cs_error_t icmap_set_string(const char *key_name, const char *value)
Definition: icmap.c:627
#define ICMAP_TRACK_PREFIX
Whole prefix is tracked, instead of key only (so "totem." tracking means that "totem....
Definition: icmap.h:85
cs_error_t icmap_set_double(const char *key_name, double value)
Definition: icmap.c:621
cs_error_t icmap_set_uint64(const char *key_name, uint64_t value)
Definition: icmap.c:609
#define ICMAP_KEYNAME_MAXLEN
Maximum length of key in icmap.
Definition: icmap.h:48
cs_error_t icmap_set_uint32(const char *key_name, uint32_t value)
Definition: icmap.c:597
#define ICMAP_TRACK_ADD
Definition: icmap.h:76
cs_error_t icmap_get_string(const char *key_name, char **str)
Shortcut for icmap_get for string type.
Definition: icmap.c:856
#define LOGSYS_LEVEL_ERROR
Definition: logsys.h:72
#define log_printf(level, format, args...)
Definition: logsys.h:332
#define LOGSYS_LEVEL_INFO
Definition: logsys.h:75
#define LOGSYS_LEVEL_CRIT
Definition: logsys.h:71
#define LOGSYS_LEVEL_WARNING
Definition: logsys.h:73
#define LOGSYS_LEVEL_DEBUG
Definition: logsys.h:76
#define ENTER
Definition: logsys.h:333
mon_resource_state
Definition: mon.c:130
@ MON_S_FAILED
Definition: mon.c:133
@ MON_S_STOPPED
Definition: mon.c:131
@ MON_S_RUNNING
Definition: mon.c:132
#define MON_MIN_PERIOD
Definition: mon.c:59
#define MON_DEFAULT_PERIOD
Definition: mon.c:58
struct cs_fsm_entry mon_fsm_table[]
Definition: mon.c:140
struct corosync_service_engine * mon_get_service_engine_ver0(void)
Definition: mon.c:149
struct corosync_service_engine mon_service_engine
Definition: mon.c:62
#define MON_MAX_PERIOD
Definition: mon.c:60
const char * mon_failed_str
Definition: mon.c:125
const char * mon_stopped_str
Definition: mon.c:127
const char * mon_failure_str
Definition: mon.c:126
LOGSYS_DECLARE_SUBSYS("MON")
mon_resource_event
Definition: mon.c:135
@ MON_E_FAILURE
Definition: mon.c:137
@ MON_E_CONFIG_CHANGED
Definition: mon.c:136
const char * mon_config_changed_str
Definition: mon.c:128
const char * mon_running_str
Definition: mon.c:124
void * user_data
Definition: sam.c:127
The corosync_api_v1 struct.
Definition: coroapi.h:225
int(* timer_add_duration)(unsigned long long nanoseconds_in_future, void *data, void(*timer_nf)(void *data), corosync_timer_handle_t *handle)
Definition: coroapi.h:229
void(* timer_delete)(corosync_timer_handle_t timer_handle)
Definition: coroapi.h:241
The corosync_service_engine struct.
Definition: coroapi.h:490
const char * name
Definition: coroapi.h:491
Definition: fsm.h:58
int32_t curr_state
Definition: fsm.h:59
Definition: fsm.h:65
int32_t curr_entry
Definition: fsm.h:68
int32_t curr_state
Definition: fsm.h:67
cs_fsm_state_to_str_fn state_to_str
Definition: fsm.h:71
size_t entries
Definition: fsm.h:69
const char * name
Definition: fsm.h:66
struct cs_fsm_entry * table
Definition: fsm.h:70
cs_fsm_event_to_str_fn event_to_str
Definition: fsm.h:72
Structure passed as new_value and old_value in change callback.
Definition: icmap.h:91
corosync_timer_handle_t timer_handle
Definition: mon.c:85
int32_t int32
Definition: mon.c:91
const char * icmap_path
Definition: mon.c:83
icmap_value_types_t max_type
Definition: mon.c:89
uint64_t period
Definition: mon.c:88
const char * name
Definition: mon.c:84
union resource_instance::@9 max
void(* update_stats_fn)(void *data)
Definition: mon.c:86
struct cs_fsm fsm
Definition: mon.c:87
double dbl
Definition: mon.c:92