40#include <linux/types.h>
41#include <linux/watchdog.h>
42#include <sys/reboot.h>
79static int wd_exec_exit_fn (
void);
80static void wd_resource_check_fn (
void* resource_ref);
83#define WD_DEFAULT_TIMEOUT_SEC 6
84#define WD_DEFAULT_TIMEOUT_MS (WD_DEFAULT_TIMEOUT_SEC * CS_TIME_MS_IN_SEC)
85#define WD_MIN_TIMEOUT_MS 500
86#define WD_MAX_TIMEOUT_MS (120 * CS_TIME_MS_IN_SEC)
91static int watchdog_ok = 1;
92static char *watchdog_device = NULL;
95 .
name =
"corosync watchdog service",
98 .private_data_size = 0,
103 .lib_engine_count = 0,
105 .exec_engine_count = 0,
107 .exec_init_fn = wd_exec_init_fn,
108 .exec_exit_fn = wd_exec_exit_fn,
117static void wd_config_changed (
struct cs_fsm* fsm, int32_t event,
void * data);
118static void wd_resource_failed (
struct cs_fsm* fsm, int32_t event,
void * data);
151static const char * wd_res_state_to_str(
struct cs_fsm* fsm,
168static const char * wd_res_event_to_str(
struct cs_fsm* fsm,
182static void wd_fsm_cb (
struct cs_fsm *fsm,
int cb_event, int32_t curr_state,
183 int32_t next_state, int32_t fsm_event,
void *data)
216static int32_t wd_resource_state_is_ok (
struct resource *ref)
219 uint64_t last_updated;
221 uint64_t allowed_period;
241 if (last_updated == 0) {
247 my_time = cs_timestamp_get();
254 if ((last_updated + allowed_period) < my_time) {
256 "last_updated %"PRIu64
" ms too late, period:%"PRIu64
".",
272static void wd_config_changed (
struct cs_fsm* fsm, int32_t event,
void * data)
276 uint64_t next_timeout;
286 "poll_period changing from:%"PRIu64
" to %"PRIu64
".",
297 "Could NOT use poll_period:%"PRIu64
" ms for resource %s",
298 tmp_value, ref->
name);
307 "resource %s missing a recovery key.", ref->
name);
316 "resource %s missing a state key.", ref->
name);
335static void wd_resource_failed (
struct cs_fsm*
fsm, int32_t event,
void * data)
346 if (strcmp (ref->
recovery,
"watchdog") == 0 ||
347 strcmp (ref->
recovery,
"quit") == 0) {
350 else if (strcmp (ref->
recovery,
"reboot") == 0) {
353 else if (strcmp (ref->
recovery,
"shutdown") == 0) {
354 reboot(RB_POWER_OFF);
359static void wd_key_changed(
361 const char *key_name,
373 last_key_part = strrchr(key_name,
'.');
374 if (last_key_part == NULL) {
380 if (strcmp(last_key_part,
"last_updated") == 0 ||
381 strcmp(last_key_part,
"current") == 0) {
389 if (strcmp(last_key_part,
"state") != 0) {
394 "resource \"%s\" deleted from cmap!",
405static void wd_resource_check_fn (
void* resource_ref)
409 if (wd_resource_state_is_ok (ref) ==
CS_FALSE) {
421static int32_t wd_resource_create (
char *
res_path,
char *res_name)
432 strcpy(ref->
name, res_name);
449 "Could NOT use poll_period:%"PRIu64
" ms for resource %s",
450 tmp_value, ref->
name);
464 "resource %s missing a recovery key.", ref->
name);
472 "resource %s missing a state key.", ref->
name);
498static void wd_tickle_fn (
void* arg)
504 ioctl(dog, WDIOC_KEEPALIVE, &watchdog_ok);
507 wd_tickle_fn, &wd_timer);
515static void wd_resource_created_cb(
517 const char *key_name,
531 res = sscanf(key_name,
"resources.%[^.].%[^.].%[^.]", res_type, res_name, tmp_key);
536 if (strcmp(tmp_key,
"state") != 0) {
541 wd_resource_create (tmp_key, res_name);
544static void wd_scan_resources (
void)
549 const char *key_name;
559 res = sscanf(key_name,
"resources.%[^.].%[^.].%[^.]", res_type, res_name, tmp_key);
564 if (strcmp(tmp_key,
"state") != 0) {
569 if (wd_resource_create (tmp_key, res_name) == 0) {
580 if (res_count == 0) {
586static void watchdog_timeout_apply (uint32_t
new)
588 struct watchdog_info ident;
589 uint32_t original_timeout = 0;
592 ioctl(dog, WDIOC_GETTIMEOUT, &original_timeout);
595 if (
new == original_timeout) {
599 watchdog_timeout =
new;
602 ioctl(dog, WDIOC_GETSUPPORT, &ident);
603 if (ident.options & WDIOF_SETTIMEOUT) {
606 ioctl(dog, WDIOC_SETTIMEOUT, &watchdog_timeout);
608 ioctl(dog, WDIOC_GETTIMEOUT, &watchdog_timeout);
611 if (watchdog_timeout ==
new) {
618 wd_tickle_fn, &wd_timer);
624 "Could not change the Watchdog timeout from %d to %d seconds",
625 original_timeout,
new);
630static int setup_watchdog(
void)
632 struct watchdog_info ident;
638 if (str[0] == 0 || strcmp (str,
"off") == 0) {
644 watchdog_device = str;
652 if (access (watchdog_device, W_OK) != 0) {
660 dog = open(watchdog_device, O_WRONLY);
672 ioctl(dog, WDIOC_GETSUPPORT, &ident);
676 watchdog_timeout_apply (watchdog_timeout);
678 ioctl(dog, WDIOC_SETOPTIONS, WDIOS_ENABLECARD);
683static void wd_top_level_key_changed(
685 const char *key_name,
690 uint32_t tmp_value_32;
695 if (tmp_value_32 >= 2 && tmp_value_32 <= 120) {
696 watchdog_timeout_apply (tmp_value_32);
702 "Set watchdog_timeout is out of range (2..120).");
706static void watchdog_timeout_get_initial (
void)
708 uint32_t tmp_value_32;
719 if (tmp_value_32 >= 2 && tmp_value_32 <= 120) {
720 watchdog_timeout_apply (tmp_value_32);
724 "Set watchdog_timeout is out of range (2..120).");
744 watchdog_timeout_get_initial();
753static int wd_exec_exit_fn (
void)
760 if (write (dog, &
magic, 1) == -1) {
qb_loop_timer_handle corosync_timer_handle_t
corosync_timer_handle_t
@ CS_LIB_FLOW_CONTROL_NOT_REQUIRED
#define MILLI_2_NANO_SECONDS
#define CS_TIME_MS_IN_SEC
#define CS_MAX_NAME_LENGTH
QB_LIST_DECLARE(cpg_pd_list_head)
#define corosync_exit_error(err)
@ COROSYNC_DONE_FATAL_ERR
#define CS_FSM_CB_EVENT_PROCESS_NF
#define CS_FSM_CB_EVENT_STATE_SET
#define CS_FSM_CB_EVENT_STATE_SET_NF
#define ICMAP_TRACK_MODIFY
cs_error_t icmap_get_uint32(const char *key_name, uint32_t *u32)
#define ICMAP_TRACK_DELETE
cs_error_t icmap_track_add(const char *key_name, int32_t track_type, icmap_notify_fn_t notify_fn, void *user_data, icmap_track_t *icmap_track)
Add tracking function for given key_name.
#define ICMAP_TRACK_PREFIX
Whole prefix is tracked, instead of key only (so "totem." tracking means that "totem....
icmap_iter_t icmap_iter_init(const char *prefix)
Initialize iterator with given prefix.
const char * icmap_iter_next(icmap_iter_t iter, size_t *value_len, icmap_value_types_t *type)
Return next item in iterator iter.
qb_map_iter_t * icmap_iter_t
Itterator type.
void icmap_iter_finalize(icmap_iter_t iter)
Finalize iterator.
cs_error_t icmap_track_delete(icmap_track_t icmap_track)
Remove previously added track.
cs_error_t icmap_set_uint64(const char *key_name, uint64_t value)
#define ICMAP_KEYNAME_MAXLEN
Maximum length of key in icmap.
cs_error_t icmap_set_uint32(const char *key_name, uint32_t value)
cs_error_t icmap_get_uint64(const char *key_name, uint64_t *u64)
cs_error_t icmap_get_string(const char *key_name, char **str)
Shortcut for icmap_get for string type.
#define LOGSYS_LEVEL_ERROR
#define log_printf(level, format, args...)
#define LOGSYS_LEVEL_INFO
#define LOGSYS_LEVEL_CRIT
#define LOGSYS_LEVEL_WARNING
#define LOGSYS_LEVEL_DEBUG
#define LOGSYS_LEVEL_ALERT
The corosync_api_v1 struct.
int(* timer_add_duration)(unsigned long long nanoseconds_in_future, void *data, void(*timer_nf)(void *data), corosync_timer_handle_t *handle)
void(* timer_delete)(corosync_timer_handle_t timer_handle)
The corosync_service_engine struct.
cs_fsm_state_to_str_fn state_to_str
struct cs_fsm_entry * table
cs_fsm_event_to_str_fn event_to_str
Structure passed as new_value and old_value in change callback.
icmap_track_t icmap_track
corosync_timer_handle_t check_timer
char name[CS_MAX_NAME_LENGTH]
char res_path[ICMAP_KEYNAME_MAXLEN]
#define WD_MIN_TIMEOUT_MS
LOGSYS_DECLARE_SUBSYS("WD")
const char * wd_running_str
#define WD_DEFAULT_TIMEOUT_SEC
struct corosync_service_engine wd_service_engine
const char * wd_failure_str
struct cs_fsm_entry wd_fsm_table[]
const char * wd_config_changed_str
@ WD_RESOURCE_STATE_UNKNOWN
@ WD_RESOURCE_NOT_MONITORED
const char * wd_stopped_str
#define WD_MAX_TIMEOUT_MS
struct corosync_service_engine * wd_get_service_engine_ver0(void)
#define WD_DEFAULT_TIMEOUT_MS
const char * wd_failed_str