47#include <sys/socket.h>
66#define SAM_CMAP_S_FAILED "failed"
67#define SAM_CMAP_S_REGISTERED "stopped"
68#define SAM_CMAP_S_STARTED "running"
69#define SAM_CMAP_S_Q_WAIT "waiting for quorum"
71#define SAM_RP_MASK_Q(pol) (pol & (~SAM_RECOVERY_POLICY_QUORUM))
72#define SAM_RP_MASK_C(pol) (pol & (~SAM_RECOVERY_POLICY_CMAP))
73#define SAM_RP_MASK(pol) (pol & (~(SAM_RECOVERY_POLICY_QUORUM | SAM_RECOVERY_POLICY_CMAP)))
147 uint64_t hc_period, last_hc;
154 svalue = ssvalue[
SAM_RP_MASK (sam_internal_data.recovery_policy)];
168 hc_period = sam_internal_data.time_interval;
182 last_hc = cs_timestamp_get();
215static cs_error_t sam_cmap_destroy_pid_path (
void)
221 err =
cmap_iter_init(sam_internal_data.cmap_handle, sam_internal_data.cmap_pid_path, &iter);
226 while ((err =
cmap_iter_next(sam_internal_data.cmap_handle, iter, key_name, NULL, NULL)) ==
CS_OK) {
227 cmap_delete(sam_internal_data.cmap_handle, key_name);
245 snprintf(sam_internal_data.cmap_pid_path,
CMAP_KEYNAME_MAXLEN,
"resources.process.%d.", getpid());
250 goto destroy_finalize_error;
254 goto destroy_finalize_error;
259destroy_finalize_error:
260 sam_cmap_destroy_pid_path ();
265static void quorum_notification_fn (
269 uint32_t view_list_entries,
272 sam_internal_data.quorate =
quorate;
280 uint32_t quorum_type;
297 if ((err =
quorum_initialize (&sam_internal_data.quorum_handle, &quorum_callbacks, &quorum_type)) !=
CS_OK) {
302 goto exit_error_quorum;
305 if ((err =
quorum_fd_get (sam_internal_data.quorum_handle, &sam_internal_data.quorum_fd)) !=
CS_OK) {
306 goto exit_error_quorum;
313 goto exit_error_quorum;
322 sam_internal_data.warn_signal = SIGTERM;
324 sam_internal_data.am_i_child = 0;
326 sam_internal_data.user_data = NULL;
327 sam_internal_data.user_data_size = 0;
328 sam_internal_data.user_data_allocated = 0;
330 pthread_mutex_init (&sam_internal_data.lock, NULL);
343static size_t sam_safe_write (
349 ssize_t tmp_bytes_write;
354 tmp_bytes_write = write (d, (
const char *)buf + bytes_write,
355 (nbyte - bytes_write > SSIZE_MAX) ? SSIZE_MAX : nbyte - bytes_write);
357 if (tmp_bytes_write == -1) {
358 if (!(errno == EAGAIN || errno == EINTR))
361 bytes_write += tmp_bytes_write;
363 }
while (bytes_write != nbyte);
365 return (bytes_write);
371static size_t sam_safe_read (
377 ssize_t tmp_bytes_read;
382 tmp_bytes_read = read (d, (
char *)buf + bytes_read,
383 (nbyte - bytes_read > SSIZE_MAX) ? SSIZE_MAX : nbyte - bytes_read);
385 if (tmp_bytes_read == -1) {
386 if (!(errno == EAGAIN || errno == EINTR))
389 bytes_read += tmp_bytes_read;
392 }
while (bytes_read != nbyte && tmp_bytes_read != 0);
403 if (sam_safe_read (sam_internal_data.child_fd_in, &reply, sizeof (reply)) !=
sizeof (reply)) {
412 if (sam_safe_read (sam_internal_data.child_fd_in, &err, sizeof (err)) !=
sizeof (err)) {
444 pthread_mutex_lock (&sam_internal_data.lock);
446 *size = sam_internal_data.user_data_size;
448 pthread_mutex_unlock (&sam_internal_data.lock);
472 pthread_mutex_lock (&sam_internal_data.lock);
474 if (sam_internal_data.user_data_size == 0) {
480 if (size < sam_internal_data.user_data_size) {
486 memcpy (data, sam_internal_data.user_data, sam_internal_data.user_data_size);
488 pthread_mutex_unlock (&sam_internal_data.lock);
493 pthread_mutex_unlock (&sam_internal_data.lock);
518 pthread_mutex_lock (&sam_internal_data.lock);
520 if (sam_internal_data.am_i_child) {
525 if (sam_safe_write (sam_internal_data.child_fd_out, &command, sizeof (command)) !=
sizeof (command)) {
531 if (sam_safe_write (sam_internal_data.child_fd_out, &size, sizeof (size)) !=
sizeof (size)) {
537 if (data != NULL && sam_safe_write (sam_internal_data.child_fd_out, data, size) != size) {
546 if ((err = sam_read_reply (sam_internal_data.child_fd_in)) !=
CS_OK) {
555 free (sam_internal_data.user_data);
556 sam_internal_data.user_data = NULL;
557 sam_internal_data.user_data_allocated = 0;
558 sam_internal_data.user_data_size = 0;
560 if (sam_internal_data.user_data_allocated < size) {
561 if ((new_data = realloc (sam_internal_data.user_data, size)) == NULL) {
567 sam_internal_data.user_data_allocated = size;
569 new_data = sam_internal_data.user_data;
571 sam_internal_data.user_data = new_data;
572 sam_internal_data.user_data_size = size;
574 memcpy (sam_internal_data.user_data, data, size);
577 pthread_mutex_unlock (&sam_internal_data.lock);
582 pthread_mutex_unlock (&sam_internal_data.lock);
597 recpol = sam_internal_data.recovery_policy;
600 pthread_mutex_lock (&sam_internal_data.lock);
605 if (sam_safe_write (sam_internal_data.child_fd_out, &command, sizeof (command)) !=
sizeof (command)) {
607 pthread_mutex_unlock (&sam_internal_data.lock);
617 if ((err = sam_read_reply (sam_internal_data.child_fd_in)) !=
CS_OK) {
618 pthread_mutex_unlock (&sam_internal_data.lock);
623 pthread_mutex_unlock (&sam_internal_data.lock);
626 if (sam_internal_data.hc_callback)
627 if (sam_safe_write (sam_internal_data.cb_wpipe_fd, &command, sizeof (command)) !=
sizeof (command))
647 pthread_mutex_lock (&sam_internal_data.lock);
650 if (sam_safe_write (sam_internal_data.child_fd_out, &command, sizeof (command)) !=
sizeof (command)) {
652 pthread_mutex_unlock (&sam_internal_data.lock);
662 if ((err = sam_read_reply (sam_internal_data.child_fd_in)) !=
CS_OK) {
663 pthread_mutex_unlock (&sam_internal_data.lock);
668 pthread_mutex_unlock (&sam_internal_data.lock);
671 if (sam_internal_data.hc_callback)
672 if (sam_safe_write (sam_internal_data.cb_wpipe_fd, &command, sizeof (command)) !=
sizeof (command))
690 if (sam_safe_write (sam_internal_data.child_fd_out, &command, sizeof (command)) !=
sizeof (command))
714 free (sam_internal_data.user_data);
735 if (sam_safe_write (sam_internal_data.child_fd_out, &command, sizeof (command)) !=
sizeof (command))
752 pthread_mutex_lock (&sam_internal_data.lock);
754 if (sam_internal_data.am_i_child) {
759 if (sam_safe_write (sam_internal_data.child_fd_out, &command, sizeof (command)) !=
sizeof (command)) {
775 if ((err = sam_read_reply (sam_internal_data.child_fd_in)) !=
CS_OK) {
785 pthread_mutex_unlock (&sam_internal_data.lock);
790 pthread_mutex_unlock (&sam_internal_data.lock);
805 if (sam_safe_write (parent_fd_out, &reply,
sizeof (reply)) !=
sizeof (reply)) {
815 if (sam_safe_write (parent_fd_out, &reply,
sizeof (reply)) !=
sizeof (reply)) {
818 if (sam_safe_write (parent_fd_out, &err,
sizeof (err)) !=
sizeof (err)) {
846 return (sam_parent_reply_send (
CS_OK, parent_fd_in, parent_fd_out));
849 return (sam_parent_reply_send (err, parent_fd_in, parent_fd_out));
857 struct pollfd pfds[2];
876 while (!sam_internal_data.quorate) {
877 pfds[0].fd = parent_fd_in;
881 pfds[1].fd = sam_internal_data.quorum_fd;
882 pfds[1].events = POLLIN;
885 poll_err = poll (pfds, 2, -1);
887 if (poll_err == -1) {
892 if (errno != EINTR) {
898 if (pfds[0].revents != 0) {
899 if (pfds[0].revents == POLLERR || pfds[0].revents == POLLHUP ||pfds[0].revents == POLLNVAL) {
907 if (pfds[1].revents != 0) {
920 return (sam_parent_reply_send (
CS_OK, parent_fd_in, parent_fd_out));
927 return (sam_parent_reply_send (err, parent_fd_in, parent_fd_out));
948 return (sam_parent_reply_send (
CS_OK, parent_fd_in, parent_fd_out));
951 return (sam_parent_reply_send (err, parent_fd_in, parent_fd_out));
961 if (!sam_internal_data.term_send) {
965 kill (child_pid, sam_internal_data.warn_signal);
967 sam_internal_data.term_send = 1;
972 kill (child_pid, SIGKILL);
979static cs_error_t sam_parent_mark_child_failed (
985 recpol = sam_internal_data.recovery_policy;
987 sam_internal_data.term_send = 1;
992 return (sam_parent_kill_child (action, child_pid));
1006 if (sam_safe_read (parent_fd_in, &size,
sizeof (size)) !=
sizeof (size)) {
1018 if (sam_safe_read (parent_fd_in,
user_data, size) != size) {
1020 goto free_error_reply;
1026 goto free_error_reply;
1031 return (sam_parent_reply_send (
CS_OK, parent_fd_in, parent_fd_out));
1036 return (sam_parent_reply_send (err, parent_fd_in, parent_fd_out));
1050 struct pollfd pfds[2];
1058 recpol = sam_internal_data.recovery_policy;
1061 pfds[0].fd = parent_fd_in;
1062 pfds[0].events = POLLIN;
1063 pfds[0].revents = 0;
1066 if (status == 1 && sam_internal_data.time_interval != 0) {
1073 pfds[nfds].fd = sam_internal_data.quorum_fd;
1074 pfds[nfds].events = POLLIN;
1075 pfds[nfds].revents = 0;
1081 if (poll_error == -1) {
1086 if (errno != EINTR) {
1091 if (poll_error == 0) {
1098 sam_parent_kill_child (&action, child_pid);
1102 if (poll_error > 0) {
1103 if (pfds[0].revents != 0) {
1107 bytes_read = sam_safe_read (parent_fd_in, &command, 1);
1109 if (bytes_read == 0) {
1121 if (bytes_read == -1) {
1140 if (sam_parent_wait_for_quorum (parent_fd_in,
1141 parent_fd_out) !=
CS_OK) {
1147 if (sam_parent_cmap_state_set (parent_fd_in,
1148 parent_fd_out, 1) !=
CS_OK) {
1162 if (sam_parent_cmap_state_set (parent_fd_in,
1163 parent_fd_out, 0) !=
CS_OK) {
1172 sam_parent_data_store (parent_fd_in, parent_fd_out);
1175 sam_parent_warn_signal_set (parent_fd_in, parent_fd_out);
1179 sam_parent_mark_child_failed (&action, child_pid);
1185 pfds[1].revents != 0) {
1193 sam_parent_kill_child (&action, child_pid);
1209 int pipe_fd_out[2], pipe_fd_in[2];
1218 recpol = sam_internal_data.recovery_policy;
1224 if ((error = sam_cmap_register ()) !=
CS_OK) {
1232 if ((pipe_error = pipe (pipe_fd_out)) != 0) {
1237 if ((pipe_error = pipe (pipe_fd_in)) != 0) {
1238 close (pipe_fd_out[0]);
1239 close (pipe_fd_out[1]);
1251 sam_internal_data.instance_id++;
1253 sam_internal_data.term_send = 0;
1261 sam_internal_data.instance_id--;
1271 close (pipe_fd_out[0]);
1272 close (pipe_fd_in[1]);
1274 sam_internal_data.child_fd_out = pipe_fd_out[1];
1275 sam_internal_data.child_fd_in = pipe_fd_in[0];
1280 sam_internal_data.am_i_child = 1;
1283 pthread_mutex_init (&sam_internal_data.lock, NULL);
1290 close (pipe_fd_out[1]);
1291 close (pipe_fd_in[0]);
1293 action = sam_parent_handler (pipe_fd_out[0], pipe_fd_in[1], pid);
1295 close (pipe_fd_out[0]);
1296 close (pipe_fd_in[1]);
1306 while (waitpid (pid, &child_status, 0) == -1 && errno == EINTR)
1309 old_action = action;
1329 sam_cmap_destroy_pid_path ();
1333 exit (WEXITSTATUS (child_status));
1344static void *hc_callback_thread (
void *unused_param)
1348 ssize_t bytes_readed;
1360 pfds.fd = sam_internal_data.cb_rpipe_fd;
1361 pfds.events = POLLIN;
1367 tmp_time_interval = -1;
1370 poll_error = poll (&pfds, 1, tmp_time_interval);
1372 if (poll_error == 0) {
1378 if (sam_internal_data.hc_callback () != 0) {
1386 if (poll_error > 0) {
1387 bytes_readed = sam_safe_read (sam_internal_data.cb_rpipe_fd, &command, 1);
1389 if (bytes_readed > 0) {
1403 return (unused_param);
1409 pthread_attr_t thread_attr;
1417 if (sam_internal_data.time_interval == 0) {
1421 if (sam_internal_data.cb_registered) {
1422 sam_internal_data.hc_callback = cb;
1435 pipe_error = pipe (pipe_fd);
1437 if (pipe_error != 0) {
1445 sam_internal_data.cb_rpipe_fd = pipe_fd[0];
1446 sam_internal_data.cb_wpipe_fd = pipe_fd[1];
1451 error = pthread_attr_init (&thread_attr);
1454 goto error_close_fd_exit;
1458 pthread_attr_setdetachstate (&thread_attr, PTHREAD_CREATE_DETACHED);
1459 pthread_attr_setstacksize (&thread_attr, 32768);
1464 error = pthread_create (&sam_internal_data.cb_thread, &thread_attr, hc_callback_thread, NULL);
1468 goto error_attr_destroy_exit;
1474 pthread_attr_destroy(&thread_attr);
1476 sam_internal_data.cb_registered = 1;
1477 sam_internal_data.hc_callback = cb;
1481error_attr_destroy_exit:
1482 pthread_attr_destroy(&thread_attr);
1484 sam_internal_data.cb_rpipe_fd = sam_internal_data.cb_wpipe_fd = 0;
cs_error_t
The cs_error_t enum.
uint64_t cmap_iter_handle_t
cs_error_t cmap_finalize(cmap_handle_t handle)
Close the cmap handle.
#define CMAP_KEYNAME_MAXLEN
cs_error_t cmap_iter_next(cmap_handle_t handle, cmap_iter_handle_t iter_handle, char key_name[], size_t *value_len, cmap_value_types_t *type)
Return next item in iterator iter.
cs_error_t cmap_delete(cmap_handle_t handle, const char *key_name)
Deletes key from cmap database.
cs_error_t cmap_initialize(cmap_handle_t *handle)
Create a new cmap connection.
cs_error_t cmap_iter_finalize(cmap_handle_t handle, cmap_iter_handle_t iter_handle)
Finalize iterator.
cs_error_t cmap_iter_init(cmap_handle_t handle, const char *prefix, cmap_iter_handle_t *cmap_iter_handle)
Initialize iterator with given prefix.
cs_error_t cmap_set_string(cmap_handle_t handle, const char *key_name, const char *value)
cs_error_t cmap_set_uint64(cmap_handle_t handle, const char *key_name, uint64_t value)
uint64_t quorum_handle_t
quorum_handle_t
cs_error_t quorum_initialize(quorum_handle_t *handle, quorum_callbacks_t *callbacks, uint32_t *quorum_type)
Create a new quorum connection.
cs_error_t quorum_fd_get(quorum_handle_t handle, int *fd)
Get a file descriptor on which to poll.
cs_error_t quorum_trackstart(quorum_handle_t handle, unsigned int flags)
Track node and quorum changes.
cs_error_t quorum_finalize(quorum_handle_t handle)
Close the quorum handle.
cs_error_t quorum_dispatch(quorum_handle_t handle, cs_dispatch_flags_t dispatch_types)
Dispatch messages and configuration changes.
cs_error_t sam_warn_signal_set(int warn_signal)
Set warning signal to be sent.
#define SAM_CMAP_S_FAILED
cs_error_t sam_finalize(void)
Close the SAM handle.
cs_error_t sam_hc_callback_register(sam_hc_callback_t cb)
Register healtcheck callback.
#define SAM_CMAP_S_STARTED
#define SAM_CMAP_S_Q_WAIT
cs_error_t sam_data_store(const void *data, size_t size)
Store user data.
@ SAM_COMMAND_MARK_FAILED
@ SAM_COMMAND_WARN_SIGNAL_SET
sam_hc_callback_t hc_callback
quorum_handle_t quorum_handle
#define SAM_CMAP_S_REGISTERED
cs_error_t sam_mark_failed(void)
Marks child as failed.
#define SAM_RP_MASK_C(pol)
#define SAM_RP_MASK_Q(pol)
cs_error_t sam_data_restore(void *data, size_t size)
Return stored data.
sam_recovery_policy_t recovery_policy
enum sam_internal_status_t internal_status
@ SAM_INTERNAL_STATUS_STARTED
@ SAM_INTERNAL_STATUS_NOT_INITIALIZED
@ SAM_INTERNAL_STATUS_FINALIZED
@ SAM_INTERNAL_STATUS_REGISTERED
@ SAM_INTERNAL_STATUS_INITIALIZED
cs_error_t sam_register(unsigned int *instance_id)
Register application.
cs_error_t sam_data_getsize(size_t *size)
Return size of stored data.
cs_error_t sam_stop(void)
Stop healthchecking.
cs_error_t sam_initialize(int time_interval, sam_recovery_policy_t recovery_policy)
Create a new SAM connection.
cs_error_t sam_hc_send(void)
Send healthcheck confirmation.
char cmap_pid_path[CMAP_KEYNAME_MAXLEN]
@ SAM_PARENT_ACTION_ERROR
@ SAM_PARENT_ACTION_CONTINUE
@ SAM_PARENT_ACTION_RECOVERY
cs_error_t sam_start(void)
Start healthchecking.
cmap_handle_t cmap_handle
size_t user_data_allocated
sam_recovery_policy_t
sam_recovery_policy_t enum
@ SAM_RECOVERY_POLICY_CMAP
@ SAM_RECOVERY_POLICY_QUORUM
@ SAM_RECOVERY_POLICY_QUIT
@ SAM_RECOVERY_POLICY_RESTART
int(* sam_hc_callback_t)(void)
Callback definition for event driven checking.
The quorum_callbacks_t struct.
quorum_notification_fn_t quorum_notify_fn
struct memb_ring_id ring_id