corosync 3.1.7
exec/votequorum.c
Go to the documentation of this file.
1/*
2 * Copyright (c) 2009-2020 Red Hat, Inc.
3 *
4 * All rights reserved.
5 *
6 * Authors: Christine Caulfield (ccaulfie@redhat.com)
7 * Fabio M. Di Nitto (fdinitto@redhat.com)
8 *
9 * This software licensed under BSD license, the text of which follows:
10 *
11 * Redistribution and use in source and binary forms, with or without
12 * modification, are permitted provided that the following conditions are met:
13 *
14 * - Redistributions of source code must retain the above copyright notice,
15 * this list of conditions and the following disclaimer.
16 * - Redistributions in binary form must reproduce the above copyright notice,
17 * this list of conditions and the following disclaimer in the documentation
18 * and/or other materials provided with the distribution.
19 * - Neither the name of the MontaVista Software, Inc. nor the names of its
20 * contributors may be used to endorse or promote products derived from this
21 * software without specific prior written permission.
22 *
23 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTIBUTORS "AS IS"
24 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
25 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
26 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
27 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
28 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
29 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
30 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
31 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
32 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
33 * THE POSSIBILITY OF SUCH DAMAGE.
34 */
35
36#include <config.h>
37
38#include <sys/types.h>
39#include <sys/stat.h>
40#include <fcntl.h>
41#include <stdint.h>
42#include <unistd.h>
43
44#include <qb/qblist.h>
45#include <qb/qbipc_common.h>
46
47#include "quorum.h"
48#include <corosync/corodefs.h>
49#include <corosync/logsys.h>
50#include <corosync/coroapi.h>
51#include <corosync/icmap.h>
52#include <corosync/votequorum.h>
54
55#include "service.h"
56#include "util.h"
57
59
60/*
61 * interface with corosync
62 */
63
64static struct corosync_api_v1 *corosync_api;
65
66/*
67 * votequorum global config vars
68 */
69
70
71static char qdevice_name[VOTEQUORUM_QDEVICE_MAX_NAME_LEN];
72static struct cluster_node *qdevice = NULL;
73static unsigned int qdevice_timeout = VOTEQUORUM_QDEVICE_DEFAULT_TIMEOUT;
74static unsigned int qdevice_sync_timeout = VOTEQUORUM_QDEVICE_DEFAULT_SYNC_TIMEOUT;
75static uint8_t qdevice_can_operate = 1;
76static void *qdevice_reg_conn = NULL;
77static uint8_t qdevice_master_wins = 0;
78
79static uint8_t two_node = 0;
80
81static uint8_t wait_for_all = 0;
82static uint8_t wait_for_all_status = 0;
83static uint8_t wait_for_all_autoset = 0; /* Wait for all is not set explicitly and follows two_node */
84
85static enum {ATB_NONE, ATB_LOWEST, ATB_HIGHEST, ATB_LIST} auto_tie_breaker = ATB_NONE, initial_auto_tie_breaker = ATB_NONE;
86static int lowest_node_id = -1;
87static int highest_node_id = -1;
88
89#define DEFAULT_LMS_WIN 10000
90static uint8_t last_man_standing = 0;
91static uint32_t last_man_standing_window = DEFAULT_LMS_WIN;
92
93static uint8_t allow_downscale = 0;
94static uint32_t ev_barrier = 0;
95
96static uint8_t ev_tracking = 0;
97static uint32_t ev_tracking_barrier = 0;
98static int ev_tracking_fd = -1;
99
100/*
101 * votequorum_exec defines/structs/forward definitions
102 */
103
105 struct qb_ipc_request_header header __attribute__((aligned(8)));
106 uint32_t nodeid;
107 uint32_t votes;
109 uint32_t flags;
110} __attribute__((packed));
111
113 struct qb_ipc_request_header header __attribute__((aligned(8)));
114 uint32_t nodeid;
115 uint32_t value;
116 uint8_t param;
117 uint8_t _pad0;
118 uint8_t _pad1;
119 uint8_t _pad2;
120} __attribute__((packed));
121
123 struct qb_ipc_request_header header __attribute__((aligned(8)));
124 uint32_t operation;
126} __attribute__((packed));
127
129 struct qb_ipc_request_header header __attribute__((aligned(8)));
132} __attribute__((packed));
133
134/*
135 * votequorum_exec onwire version (via totem)
136 */
137
138#include "votequorum.h"
139
140/*
141 * votequorum_exec onwire messages (via totem)
142 */
143
144#define MESSAGE_REQ_EXEC_VOTEQUORUM_NODEINFO 0
145#define MESSAGE_REQ_EXEC_VOTEQUORUM_RECONFIGURE 1
146#define MESSAGE_REQ_EXEC_VOTEQUORUM_QDEVICE_REG 2
147#define MESSAGE_REQ_EXEC_VOTEQUORUM_QDEVICE_RECONFIGURE 3
148
149static void votequorum_exec_send_expectedvotes_notification(void);
150static int votequorum_exec_send_quorum_notification(void *conn, uint64_t context);
151static int votequorum_exec_send_nodelist_notification(void *conn, uint64_t context);
152
153#define VOTEQUORUM_RECONFIG_PARAM_EXPECTED_VOTES 1
154#define VOTEQUORUM_RECONFIG_PARAM_NODE_VOTES 2
155#define VOTEQUORUM_RECONFIG_PARAM_CANCEL_WFA 3
156
157static int votequorum_exec_send_reconfigure(uint8_t param, unsigned int nodeid, uint32_t value);
158
159/*
160 * used by req_exec_quorum_qdevice_reg
161 */
162#define VOTEQUORUM_QDEVICE_OPERATION_UNREGISTER 0
163#define VOTEQUORUM_QDEVICE_OPERATION_REGISTER 1
164
165/*
166 * votequorum internal node status/view
167 */
168
169#define NODE_FLAGS_QUORATE 1
170#define NODE_FLAGS_LEAVING 2
171#define NODE_FLAGS_WFASTATUS 4
172#define NODE_FLAGS_FIRST 8
173#define NODE_FLAGS_QDEVICE_REGISTERED 16
174#define NODE_FLAGS_QDEVICE_ALIVE 32
175#define NODE_FLAGS_QDEVICE_CAST_VOTE 64
176#define NODE_FLAGS_QDEVICE_MASTER_WINS 128
177
178typedef enum {
183
187 uint32_t votes;
189 uint32_t flags;
190 struct qb_list_head list;
191};
192
193/*
194 * votequorum internal quorum status
195 */
196
197static uint8_t quorum;
198static uint8_t cluster_is_quorate;
199
200/*
201 * votequorum membership data
202 */
203
204static struct cluster_node *us;
205static struct qb_list_head cluster_members_list;
206static unsigned int quorum_members[PROCESSOR_COUNT_MAX];
207static unsigned int previous_quorum_members[PROCESSOR_COUNT_MAX];
208static unsigned int atb_nodelist[PROCESSOR_COUNT_MAX];
209static int quorum_members_entries = 0;
210static int previous_quorum_members_entries = 0;
211static int atb_nodelist_entries = 0;
212static struct memb_ring_id quorum_ringid;
213
214/*
215 * pre allocate all cluster_nodes + one for qdevice
216 */
217static struct cluster_node cluster_nodes[PROCESSOR_COUNT_MAX+2];
218static int cluster_nodes_entries = 0;
219
220/*
221 * votequorum tracking
222 */
223struct quorum_pd {
224 unsigned char track_flags;
227 struct qb_list_head list;
228 void *conn;
229};
230
231static struct qb_list_head trackers_list;
232
233/*
234 * votequorum timers
235 */
236
237static corosync_timer_handle_t qdevice_timer;
238static int qdevice_timer_set = 0;
239static corosync_timer_handle_t last_man_standing_timer;
240static int last_man_standing_timer_set = 0;
241static int sync_nodeinfo_sent = 0;
242static int sync_wait_for_poll_or_timeout = 0;
243
244/*
245 * Service Interfaces required by service_message_handler struct
246 */
247
248static int sync_in_progress = 0;
249
250static void votequorum_sync_init (
251 const unsigned int *trans_list,
252 size_t trans_list_entries,
253 const unsigned int *member_list,
254 size_t member_list_entries,
255 const struct memb_ring_id *ring_id);
256
257static int votequorum_sync_process (void);
258static void votequorum_sync_activate (void);
259static void votequorum_sync_abort (void);
260
261static quorum_set_quorate_fn_t quorum_callback;
262
263/*
264 * votequorum_exec handler and definitions
265 */
266
267static char *votequorum_exec_init_fn (struct corosync_api_v1 *api);
268static int votequorum_exec_exit_fn (void);
269static int votequorum_exec_send_nodeinfo(uint32_t nodeid);
270
271static void message_handler_req_exec_votequorum_nodeinfo (
272 const void *message,
273 unsigned int nodeid);
274static void exec_votequorum_nodeinfo_endian_convert (void *message);
275
276static void message_handler_req_exec_votequorum_reconfigure (
277 const void *message,
278 unsigned int nodeid);
279static void exec_votequorum_reconfigure_endian_convert (void *message);
280
281static void message_handler_req_exec_votequorum_qdevice_reg (
282 const void *message,
283 unsigned int nodeid);
284static void exec_votequorum_qdevice_reg_endian_convert (void *message);
285
286static void message_handler_req_exec_votequorum_qdevice_reconfigure (
287 const void *message,
288 unsigned int nodeid);
289static void exec_votequorum_qdevice_reconfigure_endian_convert (void *message);
290
291static struct corosync_exec_handler votequorum_exec_engine[] =
292{
293 { /* 0 */
294 .exec_handler_fn = message_handler_req_exec_votequorum_nodeinfo,
295 .exec_endian_convert_fn = exec_votequorum_nodeinfo_endian_convert
296 },
297 { /* 1 */
298 .exec_handler_fn = message_handler_req_exec_votequorum_reconfigure,
299 .exec_endian_convert_fn = exec_votequorum_reconfigure_endian_convert
300 },
301 { /* 2 */
302 .exec_handler_fn = message_handler_req_exec_votequorum_qdevice_reg,
303 .exec_endian_convert_fn = exec_votequorum_qdevice_reg_endian_convert
304 },
305 { /* 3 */
306 .exec_handler_fn = message_handler_req_exec_votequorum_qdevice_reconfigure,
307 .exec_endian_convert_fn = exec_votequorum_qdevice_reconfigure_endian_convert
308 },
309};
310
311/*
312 * Library Handler and Functions Definitions
313 */
314
315static int quorum_lib_init_fn (void *conn);
316
317static int quorum_lib_exit_fn (void *conn);
318
319static void qdevice_timer_fn(void *arg);
320
321static void message_handler_req_lib_votequorum_getinfo (void *conn,
322 const void *message);
323
324static void message_handler_req_lib_votequorum_setexpected (void *conn,
325 const void *message);
326
327static void message_handler_req_lib_votequorum_setvotes (void *conn,
328 const void *message);
329
330static void message_handler_req_lib_votequorum_trackstart (void *conn,
331 const void *message);
332
333static void message_handler_req_lib_votequorum_trackstop (void *conn,
334 const void *message);
335
336static void message_handler_req_lib_votequorum_qdevice_register (void *conn,
337 const void *message);
338
339static void message_handler_req_lib_votequorum_qdevice_unregister (void *conn,
340 const void *message);
341
342static void message_handler_req_lib_votequorum_qdevice_update (void *conn,
343 const void *message);
344
345static void message_handler_req_lib_votequorum_qdevice_poll (void *conn,
346 const void *message);
347
348static void message_handler_req_lib_votequorum_qdevice_master_wins (void *conn,
349 const void *message);
350
351static struct corosync_lib_handler quorum_lib_service[] =
352{
353 { /* 0 */
354 .lib_handler_fn = message_handler_req_lib_votequorum_getinfo,
356 },
357 { /* 1 */
358 .lib_handler_fn = message_handler_req_lib_votequorum_setexpected,
360 },
361 { /* 2 */
362 .lib_handler_fn = message_handler_req_lib_votequorum_setvotes,
364 },
365 { /* 3 */
366 .lib_handler_fn = message_handler_req_lib_votequorum_trackstart,
368 },
369 { /* 4 */
370 .lib_handler_fn = message_handler_req_lib_votequorum_trackstop,
372 },
373 { /* 5 */
374 .lib_handler_fn = message_handler_req_lib_votequorum_qdevice_register,
376 },
377 { /* 6 */
378 .lib_handler_fn = message_handler_req_lib_votequorum_qdevice_unregister,
380 },
381 { /* 7 */
382 .lib_handler_fn = message_handler_req_lib_votequorum_qdevice_update,
384 },
385 { /* 8 */
386 .lib_handler_fn = message_handler_req_lib_votequorum_qdevice_poll,
388 },
389 { /* 9 */
390 .lib_handler_fn = message_handler_req_lib_votequorum_qdevice_master_wins,
392 }
393};
394
395static struct corosync_service_engine votequorum_service_engine = {
396 .name = "corosync vote quorum service v1.0",
397 .id = VOTEQUORUM_SERVICE,
398 .priority = 2,
399 .private_data_size = sizeof (struct quorum_pd),
400 .allow_inquorate = CS_LIB_ALLOW_INQUORATE,
402 .lib_init_fn = quorum_lib_init_fn,
403 .lib_exit_fn = quorum_lib_exit_fn,
404 .lib_engine = quorum_lib_service,
405 .lib_engine_count = sizeof (quorum_lib_service) / sizeof (struct corosync_lib_handler),
406 .exec_init_fn = votequorum_exec_init_fn,
407 .exec_exit_fn = votequorum_exec_exit_fn,
408 .exec_engine = votequorum_exec_engine,
409 .exec_engine_count = sizeof (votequorum_exec_engine) / sizeof (struct corosync_exec_handler),
410 .sync_init = votequorum_sync_init,
411 .sync_process = votequorum_sync_process,
412 .sync_activate = votequorum_sync_activate,
413 .sync_abort = votequorum_sync_abort
414};
415
417{
418 return (&votequorum_service_engine);
419}
420
421static struct default_service votequorum_service[] = {
422 {
423 .name = "corosync_votequorum",
424 .ver = 0,
426 },
427};
428
429/*
430 * common/utility macros/functions
431 */
432
433#define max(a,b) (((a) > (b)) ? (a) : (b))
434
435static void node_add_ordered(struct cluster_node *newnode)
436{
437 struct cluster_node *node = NULL;
438 struct qb_list_head *tmp;
439
440 ENTER();
441
442 qb_list_for_each(tmp, &cluster_members_list) {
443 node = qb_list_entry(tmp, struct cluster_node, list);
444 if (newnode->node_id < node->node_id) {
445 break;
446 }
447 }
448
449 if (!node) {
450 qb_list_add(&newnode->list, &cluster_members_list);
451 } else {
452 qb_list_add_tail(&newnode->list, &node->list);
453 }
454
455 LEAVE();
456}
457
458static struct cluster_node *allocate_node(unsigned int nodeid)
459{
460 struct cluster_node *cl = NULL;
461 struct qb_list_head *tmp;
462
463 ENTER();
464
465 if (cluster_nodes_entries <= PROCESSOR_COUNT_MAX + 1) {
466 cl = (struct cluster_node *)&cluster_nodes[cluster_nodes_entries];
467 cluster_nodes_entries++;
468 } else {
469 qb_list_for_each(tmp, &cluster_members_list) {
470 cl = qb_list_entry(tmp, struct cluster_node, list);
471 if (cl->state == NODESTATE_DEAD) {
472 break;
473 }
474 }
475 /*
476 * this should never happen
477 */
478 if (!cl) {
479 log_printf(LOGSYS_LEVEL_CRIT, "Unable to find memory for node " CS_PRI_NODE_ID " data!!", nodeid);
480 goto out;
481 }
482 qb_list_del(tmp);
483 }
484
485 memset(cl, 0, sizeof(struct cluster_node));
486 cl->node_id = nodeid;
488 node_add_ordered(cl);
489 }
490
491out:
492 LEAVE();
493
494 return cl;
495}
496
497static struct cluster_node *find_node_by_nodeid(unsigned int nodeid)
498{
499 struct cluster_node *node;
500 struct qb_list_head *tmp;
501
502 ENTER();
503
504 if (nodeid == us->node_id) {
505 LEAVE();
506 return us;
507 }
508
510 LEAVE();
511 return qdevice;
512 }
513
514 qb_list_for_each(tmp, &cluster_members_list) {
515 node = qb_list_entry(tmp, struct cluster_node, list);
516 if (node->node_id == nodeid) {
517 LEAVE();
518 return node;
519 }
520 }
521
522 LEAVE();
523 return NULL;
524}
525
526static void get_lowest_node_id(void)
527{
528 struct cluster_node *node = NULL;
529 struct qb_list_head *tmp;
530
531 ENTER();
532
533 lowest_node_id = us->node_id;
534
535 qb_list_for_each(tmp, &cluster_members_list) {
536 node = qb_list_entry(tmp, struct cluster_node, list);
537 if ((node->state == NODESTATE_MEMBER) &&
538 (node->node_id < lowest_node_id)) {
539 lowest_node_id = node->node_id;
540 }
541 }
542 log_printf(LOGSYS_LEVEL_DEBUG, "lowest node id: " CS_PRI_NODE_ID " us: " CS_PRI_NODE_ID, lowest_node_id, us->node_id);
543 icmap_set_uint32("runtime.votequorum.lowest_node_id", lowest_node_id);
544
545 LEAVE();
546}
547
548static void get_highest_node_id(void)
549{
550 struct cluster_node *node = NULL;
551 struct qb_list_head *tmp;
552
553 ENTER();
554
555 highest_node_id = us->node_id;
556
557 qb_list_for_each(tmp, &cluster_members_list) {
558 node = qb_list_entry(tmp, struct cluster_node, list);
559 if ((node->state == NODESTATE_MEMBER) &&
560 (node->node_id > highest_node_id)) {
561 highest_node_id = node->node_id;
562 }
563 }
564 log_printf(LOGSYS_LEVEL_DEBUG, "highest node id: " CS_PRI_NODE_ID " us: " CS_PRI_NODE_ID, highest_node_id, us->node_id);
565 icmap_set_uint32("runtime.votequorum.highest_node_id", highest_node_id);
566
567 LEAVE();
568}
569
570static int check_low_node_id_partition(void)
571{
572 struct cluster_node *node = NULL;
573 struct qb_list_head *tmp;
574 int found = 0;
575
576 ENTER();
577
578 qb_list_for_each(tmp, &cluster_members_list) {
579 node = qb_list_entry(tmp, struct cluster_node, list);
580 if ((node->state == NODESTATE_MEMBER) &&
581 (node->node_id == lowest_node_id)) {
582 found = 1;
583 }
584 }
585
586 LEAVE();
587 return found;
588}
589
590static int check_high_node_id_partition(void)
591{
592 struct cluster_node *node = NULL;
593 struct qb_list_head *tmp;
594 int found = 0;
595
596 ENTER();
597
598 qb_list_for_each(tmp, &cluster_members_list) {
599 node = qb_list_entry(tmp, struct cluster_node, list);
600 if ((node->state == NODESTATE_MEMBER) &&
601 (node->node_id == highest_node_id)) {
602 found = 1;
603 }
604 }
605
606 LEAVE();
607 return found;
608}
609
610static int is_in_nodelist(int nodeid, unsigned int *members, int entries)
611{
612 int i;
613 ENTER();
614
615 for (i=0; i<entries; i++) {
616 if (nodeid == members[i]) {
617 LEAVE();
618 return 1;
619 }
620 }
621 LEAVE();
622 return 0;
623}
624
625/*
626 * The algorithm for a list of tie-breaker nodes is:
627 * travel the list of nodes in the auto_tie_breaker list,
628 * if the node IS in our current partition, check if the
629 * nodes earlier in the atb list are in the 'previous' partition;
630 * If none are found then we are safe to be quorate, if any are
631 * then we cannot be as we don't know if that node is up or down.
632 * If we don't have a node in the current list we are NOT quorate.
633 * Obviously if we find the first node in the atb list in our
634 * partition then we are quorate.
635 *
636 * Special cases lowest nodeid, and highest nodeid are handled separately.
637 */
638static int check_auto_tie_breaker(void)
639{
640 int i, j;
641 int res;
642 ENTER();
643
644 if (auto_tie_breaker == ATB_LOWEST) {
645 res = check_low_node_id_partition();
646 log_printf(LOGSYS_LEVEL_DEBUG, "ATB_LOWEST decision: %d", res);
647 LEAVE();
648 return res;
649 }
650 if (auto_tie_breaker == ATB_HIGHEST) {
651 res = check_high_node_id_partition();
652 log_printf(LOGSYS_LEVEL_DEBUG, "ATB_HIGHEST decision: %d", res);
653 LEAVE();
654 return res;
655 }
656
657 /* Assume ATB_LIST, we should never be called for ATB_NONE */
658 for (i=0; i < atb_nodelist_entries; i++) {
659 if (is_in_nodelist(atb_nodelist[i], quorum_members, quorum_members_entries)) {
660 /*
661 * Node is in our partition, if any of its predecessors are
662 * in the previous quorum partition then it might be in the
663 * 'other half' (as we've got this far without seeing it here)
664 * and so we can't be quorate.
665 */
666 for (j=0; j<i; j++) {
667 if (is_in_nodelist(atb_nodelist[j], previous_quorum_members, previous_quorum_members_entries)) {
668 log_printf(LOGSYS_LEVEL_DEBUG, "ATB_LIST found node " CS_PRI_NODE_ID " in previous partition but not here, quorum denied", atb_nodelist[j]);
669 LEAVE();
670 return 0;
671 }
672 }
673
674 /*
675 * None of the other list nodes were in the previous partition, if there
676 * are enough votes, we can be quorate
677 */
678 log_printf(LOGSYS_LEVEL_DEBUG, "ATB_LIST found node " CS_PRI_NODE_ID " in current partition, we can be quorate", atb_nodelist[i]);
679 LEAVE();
680 return 1;
681 }
682 }
683 log_printf(LOGSYS_LEVEL_DEBUG, "ATB_LIST found no list nodes in current partition, we cannot be quorate");
684 LEAVE();
685 return 0;
686}
687
688/*
689 * atb_string can be either:
690 * 'lowest'
691 * 'highest'
692 * a list of nodeids
693 */
694static void parse_atb_string(char *atb_string)
695{
696 char *ptr;
697 long num;
698
699 ENTER();
700 auto_tie_breaker = ATB_NONE;
701
702 if (!strcmp(atb_string, "lowest"))
703 auto_tie_breaker = ATB_LOWEST;
704
705 if (!strcmp(atb_string, "highest"))
706 auto_tie_breaker = ATB_HIGHEST;
707
708 if (atoi(atb_string)) {
709
710 atb_nodelist_entries = 0;
711 ptr = atb_string;
712 do {
713 num = strtol(ptr, &ptr, 10);
714 if (num) {
715 log_printf(LOGSYS_LEVEL_DEBUG, "ATB nodelist[%d] = %d", atb_nodelist_entries, num);
716 atb_nodelist[atb_nodelist_entries++] = num;
717 }
718 } while (num);
719
720 if (atb_nodelist_entries) {
721 auto_tie_breaker = ATB_LIST;
722 }
723 }
724 icmap_set_uint32("runtime.votequorum.atb_type", auto_tie_breaker);
725 log_printf(LOGSYS_LEVEL_DEBUG, "ATB type = %d", auto_tie_breaker);
726
727 /* Make sure we got something */
728 if (auto_tie_breaker == ATB_NONE) {
729 log_printf(LOGSYS_LEVEL_WARNING, "auto_tie_breaker_nodes is not valid. It must be 'lowest', 'highest' or a space-separated list of node IDs. auto_tie_breaker is disabled");
730 auto_tie_breaker = ATB_NONE;
731 }
732 LEAVE();
733}
734
735static int check_qdevice_master(void)
736{
737 struct cluster_node *node = NULL;
738 struct qb_list_head *tmp;
739 int found = 0;
740
741 ENTER();
742
743 qb_list_for_each(tmp, &cluster_members_list) {
744 node = qb_list_entry(tmp, struct cluster_node, list);
745 if ((node->state == NODESTATE_MEMBER) &&
748 found = 1;
749 }
750 }
751
752 LEAVE();
753 return found;
754}
755
756static void decode_flags(uint32_t flags)
757{
758 ENTER();
759
761 "flags: quorate: %s Leaving: %s WFA Status: %s First: %s Qdevice: %s QdeviceAlive: %s QdeviceCastVote: %s QdeviceMasterWins: %s",
762 (flags & NODE_FLAGS_QUORATE)?"Yes":"No",
763 (flags & NODE_FLAGS_LEAVING)?"Yes":"No",
764 (flags & NODE_FLAGS_WFASTATUS)?"Yes":"No",
765 (flags & NODE_FLAGS_FIRST)?"Yes":"No",
767 (flags & NODE_FLAGS_QDEVICE_ALIVE)?"Yes":"No",
768 (flags & NODE_FLAGS_QDEVICE_CAST_VOTE)?"Yes":"No",
770
771 LEAVE();
772}
773
774/*
775 * load/save are copied almost pristine from totemsrp,c
776 */
777static int load_ev_tracking_barrier(void)
778{
779 int res = 0;
780 char filename[PATH_MAX];
781
782 ENTER();
783
784 snprintf(filename, sizeof(filename) - 1, "%s/ev_tracking", get_state_dir());
785
786 ev_tracking_fd = open(filename, O_RDWR, 0700);
787 if (ev_tracking_fd != -1) {
788 res = read (ev_tracking_fd, &ev_tracking_barrier, sizeof(uint32_t));
789 close(ev_tracking_fd);
790 if (res == sizeof (uint32_t)) {
791 LEAVE();
792 return 0;
793 }
794 }
795
796 ev_tracking_barrier = 0;
797 umask(0);
798 ev_tracking_fd = open (filename, O_CREAT|O_RDWR, 0700);
799 if (ev_tracking_fd != -1) {
800 res = write (ev_tracking_fd, &ev_tracking_barrier, sizeof (uint32_t));
801 if ((res == -1) || (res != sizeof (uint32_t))) {
803 "Unable to write to %s", filename);
804 }
805 close(ev_tracking_fd);
806 LEAVE();
807 return 0;
808 }
810 "Unable to create %s file", filename);
811
812 LEAVE();
813
814 return -1;
815}
816
817static void update_wait_for_all_status(uint8_t wfa_status)
818{
819 ENTER();
820
821 wait_for_all_status = wfa_status;
822 if (wait_for_all_status) {
824 } else {
825 us->flags &= ~NODE_FLAGS_WFASTATUS;
826 }
827 icmap_set_uint8("runtime.votequorum.wait_for_all_status",
828 wait_for_all_status);
829
830 LEAVE();
831}
832
833static void update_two_node(void)
834{
835 ENTER();
836
837 icmap_set_uint8("runtime.votequorum.two_node", two_node);
838
839 LEAVE();
840}
841
842static void update_ev_barrier(uint32_t expected_votes)
843{
844 ENTER();
845
846 ev_barrier = expected_votes;
847 icmap_set_uint32("runtime.votequorum.ev_barrier", ev_barrier);
848
849 LEAVE();
850}
851
852static void update_qdevice_can_operate(uint8_t status)
853{
854 ENTER();
855
856 qdevice_can_operate = status;
857 icmap_set_uint8("runtime.votequorum.qdevice_can_operate", qdevice_can_operate);
858
859 LEAVE();
860}
861
862static void update_qdevice_master_wins(uint8_t allow)
863{
864 ENTER();
865
866 qdevice_master_wins = allow;
867 icmap_set_uint8("runtime.votequorum.qdevice_master_wins", qdevice_master_wins);
868
869 LEAVE();
870}
871
872static void update_ev_tracking_barrier(uint32_t ev_t_barrier)
873{
874 int res;
875
876 ENTER();
877
878 ev_tracking_barrier = ev_t_barrier;
879 icmap_set_uint32("runtime.votequorum.ev_tracking_barrier", ev_tracking_barrier);
880
881 if (lseek (ev_tracking_fd, 0, SEEK_SET) != 0) {
883 "Unable to update ev_tracking_barrier on disk data!!!");
884 LEAVE();
885 return;
886 }
887
888 res = write (ev_tracking_fd, &ev_tracking_barrier, sizeof (uint32_t));
889 if (res != sizeof (uint32_t)) {
891 "Unable to update ev_tracking_barrier on disk data!!!");
892 }
893#ifdef HAVE_FDATASYNC
894 fdatasync(ev_tracking_fd);
895#else
896 fsync(ev_tracking_fd);
897#endif
898
899 LEAVE();
900}
901
902/*
903 * quorum calculation core bits
904 */
905
906static int calculate_quorum(int allow_decrease, unsigned int max_expected, unsigned int *ret_total_votes)
907{
908 struct qb_list_head *nodelist;
909 struct cluster_node *node;
910 unsigned int total_votes = 0;
911 unsigned int highest_expected = 0;
912 unsigned int newquorum, q1, q2;
913 unsigned int total_nodes = 0;
914
915 ENTER();
916
917 if ((allow_downscale) && (allow_decrease) && (max_expected)) {
918 max_expected = max(ev_barrier, max_expected);
919 }
920
921 qb_list_for_each(nodelist, &cluster_members_list) {
922 node = qb_list_entry(nodelist, struct cluster_node, list);
923
924 log_printf(LOGSYS_LEVEL_DEBUG, "node " CS_PRI_NODE_ID " state=%d, votes=%u, expected=%u",
925 node->node_id, node->state, node->votes, node->expected_votes);
926
927 if (node->state == NODESTATE_MEMBER) {
928 highest_expected = max(highest_expected, node->expected_votes);
929 total_votes += node->votes;
930 total_nodes++;
931 }
932 }
933
935 log_printf(LOGSYS_LEVEL_DEBUG, "node 0 state=1, votes=%u", qdevice->votes);
936 total_votes += qdevice->votes;
937 total_nodes++;
938 }
939
940 if (max_expected > 0) {
941 highest_expected = max_expected;
942 }
943
944 /*
945 * This quorum calculation is taken from the OpenVMS Cluster Systems
946 * manual, but, then, you guessed that didn't you
947 */
948 q1 = (highest_expected + 2) / 2;
949 q2 = (total_votes + 2) / 2;
950 newquorum = max(q1, q2);
951
952 /*
953 * Normally quorum never decreases but the system administrator can
954 * force it down by setting expected votes to a maximum value
955 */
956 if (!allow_decrease) {
957 newquorum = max(quorum, newquorum);
958 }
959
960 /*
961 * The special two_node mode allows each of the two nodes to retain
962 * quorum if the other fails. Only one of the two should live past
963 * fencing (as both nodes try to fence each other in split-brain.)
964 * Also: if there are more than two nodes, force us inquorate to avoid
965 * any damage or confusion.
966 */
967 if (two_node && total_nodes <= 2) {
968 newquorum = 1;
969 }
970
971 if (ret_total_votes) {
972 *ret_total_votes = total_votes;
973 }
974
975 LEAVE();
976 return newquorum;
977}
978
979static void update_node_expected_votes(int new_expected_votes)
980{
981 struct qb_list_head *nodelist;
982 struct cluster_node *node;
983
984 if (new_expected_votes) {
985 qb_list_for_each(nodelist, &cluster_members_list) {
986 node = qb_list_entry(nodelist, struct cluster_node, list);
987
988 if (node->state == NODESTATE_MEMBER) {
989 node->expected_votes = new_expected_votes;
990 }
991 }
992 }
993}
994
995static void are_we_quorate(unsigned int total_votes)
996{
997 int quorate;
998 int quorum_change = 0;
999
1000 ENTER();
1001
1002 /*
1003 * wait for all nodes to show up before granting quorum
1004 */
1005
1006 if ((wait_for_all) && (wait_for_all_status)) {
1007 if (total_votes != us->expected_votes) {
1009 "Waiting for all cluster members. "
1010 "Current votes: %d expected_votes: %d",
1011 total_votes, us->expected_votes);
1012 assert(!cluster_is_quorate);
1013 return;
1014 }
1015 update_wait_for_all_status(0);
1016 }
1017
1018 if (quorum > total_votes) {
1019 quorate = 0;
1020 } else {
1021 quorate = 1;
1022 get_lowest_node_id();
1023 get_highest_node_id();
1024 }
1025
1026 if ((auto_tie_breaker != ATB_NONE) &&
1027 /* Must be a half (or half-1) split */
1028 (total_votes == (us->expected_votes / 2)) &&
1029 /* If the 'other' partition in a split might have quorum then we can't run ATB */
1030 (previous_quorum_members_entries - quorum_members_entries < quorum) &&
1031 (check_auto_tie_breaker() == 1)) {
1032 quorate = 1;
1033 }
1034
1035 if ((qdevice_master_wins) &&
1036 (!quorate) &&
1037 (check_qdevice_master() == 1)) {
1038 log_printf(LOGSYS_LEVEL_DEBUG, "node is quorate as part of master_wins partition");
1039 quorate = 1;
1040 }
1041
1042 if (cluster_is_quorate && !quorate) {
1043 quorum_change = 1;
1044 log_printf(LOGSYS_LEVEL_DEBUG, "quorum lost, blocking activity");
1045 }
1046 if (!cluster_is_quorate && quorate) {
1047 quorum_change = 1;
1048 log_printf(LOGSYS_LEVEL_DEBUG, "quorum regained, resuming activity");
1049 }
1050
1051 cluster_is_quorate = quorate;
1052 if (cluster_is_quorate) {
1054 } else {
1055 us->flags &= ~NODE_FLAGS_QUORATE;
1056 }
1057
1058 if (wait_for_all) {
1059 if (quorate) {
1060 update_wait_for_all_status(0);
1061 } else {
1062 update_wait_for_all_status(1);
1063 }
1064 }
1065
1066 if ((quorum_change) &&
1067 (sync_in_progress == 0)) {
1068 quorum_callback(quorum_members, quorum_members_entries,
1069 cluster_is_quorate, &quorum_ringid);
1070 votequorum_exec_send_quorum_notification(NULL, 0L);
1071 }
1072
1073 LEAVE();
1074}
1075
1076static void get_total_votes(unsigned int *totalvotes, unsigned int *current_members)
1077{
1078 unsigned int total_votes = 0;
1079 unsigned int cluster_members = 0;
1080 struct qb_list_head *nodelist;
1081 struct cluster_node *node;
1082
1083 ENTER();
1084
1085 qb_list_for_each(nodelist, &cluster_members_list) {
1086 node = qb_list_entry(nodelist, struct cluster_node, list);
1087 if (node->state == NODESTATE_MEMBER) {
1088 cluster_members++;
1089 total_votes += node->votes;
1090 }
1091 }
1092
1093 if (qdevice->votes) {
1094 total_votes += qdevice->votes;
1095 cluster_members++;
1096 }
1097
1098 *totalvotes = total_votes;
1099 *current_members = cluster_members;
1100
1101 LEAVE();
1102}
1103
1104/*
1105 * Recalculate cluster quorum, set quorate and notify changes
1106 */
1107static void recalculate_quorum(int allow_decrease, int by_current_nodes)
1108{
1109 unsigned int total_votes = 0;
1110 unsigned int cluster_members = 0;
1111
1112 ENTER();
1113
1114 get_total_votes(&total_votes, &cluster_members);
1115
1116 if (!by_current_nodes) {
1117 cluster_members = 0;
1118 }
1119
1120 /*
1121 * Keep expected_votes at the highest number of votes in the cluster
1122 */
1123 log_printf(LOGSYS_LEVEL_DEBUG, "total_votes=%d, expected_votes=%d", total_votes, us->expected_votes);
1124 if (total_votes > us->expected_votes) {
1125 us->expected_votes = total_votes;
1126 votequorum_exec_send_expectedvotes_notification();
1127 }
1128
1129 if ((ev_tracking) &&
1130 (us->expected_votes > ev_tracking_barrier)) {
1131 update_ev_tracking_barrier(us->expected_votes);
1132 }
1133
1134 quorum = calculate_quorum(allow_decrease, cluster_members, &total_votes);
1135 update_node_expected_votes(cluster_members);
1136
1137 are_we_quorate(total_votes);
1138
1139 LEAVE();
1140}
1141
1142/*
1143 * configuration bits and pieces
1144 */
1145
1146static int votequorum_read_nodelist_configuration(uint32_t *votes,
1147 uint32_t *nodes,
1148 uint32_t *expected_votes)
1149{
1150 icmap_iter_t iter;
1151 const char *iter_key;
1152 char tmp_key[ICMAP_KEYNAME_MAXLEN];
1153 uint32_t our_pos, node_pos, last_node_pos=-1;
1154 uint32_t nodecount = 0;
1155 uint32_t nodelist_expected_votes = 0;
1156 uint32_t node_votes = 0;
1157 int res = 0;
1158
1159 ENTER();
1160
1161 if (icmap_get_uint32("nodelist.local_node_pos", &our_pos) != CS_OK) {
1163 "No nodelist defined or our node is not in the nodelist");
1164 return 0;
1165 }
1166
1167 iter = icmap_iter_init("nodelist.node.");
1168
1169 while ((iter_key = icmap_iter_next(iter, NULL, NULL)) != NULL) {
1170
1171 res = sscanf(iter_key, "nodelist.node.%u.%s", &node_pos, tmp_key);
1172 if (res != 2) {
1173 continue;
1174 }
1175
1176 /*
1177 * If current node_pos is the same as the last_node_pos then skip it
1178 * so we only do the code below once per node.
1179 * (icmap keys are always in order)
1180 */
1181 if (last_node_pos == node_pos) {
1182 continue;
1183 }
1184 last_node_pos = node_pos;
1185
1186 nodecount++;
1187
1188 snprintf(tmp_key, ICMAP_KEYNAME_MAXLEN, "nodelist.node.%u.quorum_votes", node_pos);
1189 if (icmap_get_uint32(tmp_key, &node_votes) != CS_OK) {
1190 node_votes = 1;
1191 }
1192
1193 nodelist_expected_votes = nodelist_expected_votes + node_votes;
1194
1195 if (node_pos == our_pos) {
1196 *votes = node_votes;
1197 }
1198 }
1199
1200 *expected_votes = nodelist_expected_votes;
1201 *nodes = nodecount;
1202
1203 icmap_iter_finalize(iter);
1204
1205 LEAVE();
1206
1207 return 1;
1208}
1209
1210static int votequorum_qdevice_is_configured(uint32_t *qdevice_votes)
1211{
1212 char *qdevice_model = NULL;
1213 int ret = 0;
1214
1215 ENTER();
1216
1217 if (icmap_get_string("quorum.device.model", &qdevice_model) == CS_OK) {
1218 if (strlen(qdevice_model)) {
1219 if (icmap_get_uint32("quorum.device.votes", qdevice_votes) != CS_OK) {
1220 *qdevice_votes = -1;
1221 }
1222 if (icmap_get_uint32("quorum.device.timeout", &qdevice_timeout) != CS_OK) {
1223 qdevice_timeout = VOTEQUORUM_QDEVICE_DEFAULT_TIMEOUT;
1224 }
1225 if (icmap_get_uint32("quorum.device.sync_timeout", &qdevice_sync_timeout) != CS_OK) {
1226 qdevice_sync_timeout = VOTEQUORUM_QDEVICE_DEFAULT_SYNC_TIMEOUT;
1227 }
1228 update_qdevice_can_operate(1);
1229 ret = 1;
1230 }
1231
1232 free(qdevice_model);
1233 }
1234
1235 LEAVE();
1236
1237 return ret;
1238}
1239
1240#define VOTEQUORUM_READCONFIG_STARTUP 0
1241#define VOTEQUORUM_READCONFIG_RUNTIME 1
1242
1243static char *votequorum_readconfig(int runtime)
1244{
1245 uint32_t node_votes = 0, qdevice_votes = 0;
1246 uint32_t node_expected_votes = 0, expected_votes = 0;
1247 uint32_t node_count = 0;
1248 uint8_t atb = 0;
1249 int have_nodelist, have_qdevice;
1250 char *atb_string = NULL;
1251 char *error = NULL;
1252
1253 ENTER();
1254
1255 log_printf(LOGSYS_LEVEL_DEBUG, "Reading configuration (runtime: %d)", runtime);
1256
1257 /*
1258 * Set the few things we re-read at runtime back to their defaults
1259 */
1260 if (runtime) {
1261 two_node = 0;
1262 expected_votes = 0;
1263 /* auto_tie_breaker cannot be changed by config reload, but
1264 * we automatically disable it on odd-sized clusters without
1265 * wait_for_all.
1266 * We may need to re-enable it when membership changes to ensure
1267 * that auto_tie_breaker is consistent across all nodes */
1268 auto_tie_breaker = initial_auto_tie_breaker;
1269 icmap_set_uint32("runtime.votequorum.atb_type", auto_tie_breaker);
1270 }
1271
1272 /*
1273 * gather basic data here
1274 */
1275 (void)icmap_get_uint32("quorum.expected_votes", &expected_votes);
1276 have_nodelist = votequorum_read_nodelist_configuration(&node_votes, &node_count, &node_expected_votes);
1277 have_qdevice = votequorum_qdevice_is_configured(&qdevice_votes);
1278 (void)icmap_get_uint8("quorum.two_node", &two_node);
1279
1280 /*
1281 * do config verification and enablement
1282 */
1283
1284 if ((!have_nodelist) && (!expected_votes)) {
1285 if (!runtime) {
1286 error = (char *)"configuration error: nodelist or quorum.expected_votes must be configured!";
1287 } else {
1288 log_printf(LOGSYS_LEVEL_CRIT, "configuration error: nodelist or quorum.expected_votes must be configured!");
1289 log_printf(LOGSYS_LEVEL_CRIT, "will continue with current runtime data");
1290 }
1291 goto out;
1292 }
1293
1294 /*
1295 * two_node and qdevice are not compatible in the same config.
1296 * try to make an educated guess of what to do
1297 */
1298
1299 if ((two_node) && (have_qdevice)) {
1300 if (!runtime) {
1301 error = (char *)"configuration error: two_node and quorum device cannot be configured at the same time!";
1302 goto out;
1303 } else {
1304 log_printf(LOGSYS_LEVEL_CRIT, "configuration error: two_node and quorum device cannot be configured at the same time!");
1306 log_printf(LOGSYS_LEVEL_CRIT, "quorum device is registered, disabling two_node");
1307 two_node = 0;
1308 } else {
1309 log_printf(LOGSYS_LEVEL_CRIT, "quorum device is not registered, allowing two_node");
1310 update_qdevice_can_operate(0);
1311 }
1312 }
1313 }
1314
1315 /*
1316 * Enable special features
1317 */
1318 if (!runtime) {
1319 (void)icmap_get_uint8("quorum.allow_downscale", &allow_downscale);
1320 if (icmap_get_uint8("quorum.wait_for_all", &wait_for_all) != CS_OK) {
1321 wait_for_all_autoset = 1;
1322 }
1323 (void)icmap_get_uint8("quorum.last_man_standing", &last_man_standing);
1324 (void)icmap_get_uint32("quorum.last_man_standing_window", &last_man_standing_window);
1325 (void)icmap_get_uint8("quorum.expected_votes_tracking", &ev_tracking);
1326 (void)icmap_get_uint8("quorum.auto_tie_breaker", &atb);
1327 (void)icmap_get_string("quorum.auto_tie_breaker_node", &atb_string);
1328
1329 /* auto_tie_breaker defaults to LOWEST */
1330 if (atb) {
1331 auto_tie_breaker = ATB_LOWEST;
1332 icmap_set_uint32("runtime.votequorum.atb_type", auto_tie_breaker);
1333 }
1334 else {
1335 auto_tie_breaker = ATB_NONE;
1336 if (atb_string) {
1338 "auto_tie_breaker_node: is meaningless if auto_tie_breaker is set to 0");
1339 }
1340 }
1341
1342 if (atb && atb_string) {
1343 parse_atb_string(atb_string);
1344 }
1345 free(atb_string);
1346 initial_auto_tie_breaker = auto_tie_breaker;
1347
1348 /* allow_downscale requires ev_tracking */
1349 if (allow_downscale) {
1350 ev_tracking = 1;
1351 }
1352
1353 if (ev_tracking) {
1354 if (load_ev_tracking_barrier() < 0) {
1355 LEAVE();
1356 return ((char *)"Unable to load ev_tracking file!");
1357 }
1358 update_ev_tracking_barrier(ev_tracking_barrier);
1359 }
1360
1361 }
1362
1363 /*
1364 * Changing of wait_for_all during runtime is not supported, but changing of two_node is
1365 * and two_node may set wfa if not configured explicitly. It is safe to unset it
1366 * (or set it back) when two_node changes.
1367 */
1368 if (wait_for_all_autoset) {
1369 wait_for_all = two_node;
1370 }
1371
1372 /* two_node and auto_tie_breaker are not compatible as two_node uses
1373 * a fence race to decide quorum whereas ATB decides based on node id
1374 */
1375 if (two_node && auto_tie_breaker != ATB_NONE) {
1376 log_printf(LOGSYS_LEVEL_CRIT, "two_node and auto_tie_breaker are both specified but are not compatible.");
1377 log_printf(LOGSYS_LEVEL_CRIT, "two_node has been disabled, please fix your corosync.conf");
1378 two_node = 0;
1379 }
1380
1381 /* If ATB is set and the cluster has an odd number of nodes then wait_for_all needs
1382 * to be set so that an isolated half+1 without the tie breaker node
1383 * does not have quorum on reboot.
1384 */
1385 if ((auto_tie_breaker != ATB_NONE) && (node_expected_votes % 2) &&
1386 (!wait_for_all)) {
1387 if (last_man_standing) {
1388 /* if LMS is set too, it's a fatal configuration error. We can't dictate to the user what
1389 * they might want so we'll just quit.
1390 */
1391 log_printf(LOGSYS_LEVEL_CRIT, "auto_tie_breaker is set, the cluster has an odd number of nodes\n");
1392 log_printf(LOGSYS_LEVEL_CRIT, "and last_man_standing is also set. With this situation a better\n");
1393 log_printf(LOGSYS_LEVEL_CRIT, "solution would be to disable LMS, leave ATB enabled, and also\n");
1394 log_printf(LOGSYS_LEVEL_CRIT, "enable wait_for_all (mandatory for ATB in odd-numbered clusters).\n");
1395 log_printf(LOGSYS_LEVEL_CRIT, "Due to this ambiguity, corosync will fail to start. Please fix your corosync.conf\n");
1396 error = (char *)"configuration error: auto_tie_breaker & last_man_standing not available in odd sized cluster";
1397 goto out;
1398 }
1399 else {
1400 log_printf(LOGSYS_LEVEL_CRIT, "auto_tie_breaker is set and the cluster has an odd number of nodes.\n");
1401 log_printf(LOGSYS_LEVEL_CRIT, "wait_for_all needs to be set for this configuration but it is missing\n");
1402 log_printf(LOGSYS_LEVEL_CRIT, "Therefore auto_tie_breaker has been disabled. Please fix your corosync.conf\n");
1403 auto_tie_breaker = ATB_NONE;
1404 icmap_set_uint32("runtime.votequorum.atb_type", auto_tie_breaker);
1405 }
1406 }
1407
1408 /*
1409 * quorum device is not compatible with last_man_standing and auto_tie_breaker
1410 * neither lms or atb can be set at runtime, so there is no need to check for
1411 * runtime incompatibilities, but qdevice can be configured _after_ LMS and ATB have
1412 * been enabled at startup.
1413 */
1414
1415 if ((have_qdevice) && (last_man_standing)) {
1416 if (!runtime) {
1417 error = (char *)"configuration error: quorum.device is not compatible with last_man_standing";
1418 goto out;
1419 } else {
1420 log_printf(LOGSYS_LEVEL_CRIT, "configuration error: quorum.device is not compatible with last_man_standing");
1421 log_printf(LOGSYS_LEVEL_CRIT, "disabling quorum device operations");
1422 update_qdevice_can_operate(0);
1423 }
1424 }
1425
1426 if ((have_qdevice) && (auto_tie_breaker != ATB_NONE)) {
1427 if (!runtime) {
1428 error = (char *)"configuration error: quorum.device is not compatible with auto_tie_breaker";
1429 goto out;
1430 } else {
1431 log_printf(LOGSYS_LEVEL_CRIT, "configuration error: quorum.device is not compatible with auto_tie_breaker");
1432 log_printf(LOGSYS_LEVEL_CRIT, "disabling quorum device operations");
1433 update_qdevice_can_operate(0);
1434 }
1435 }
1436
1437 if ((have_qdevice) && (allow_downscale)) {
1438 if (!runtime) {
1439 error = (char *)"configuration error: quorum.device is not compatible with allow_downscale";
1440 goto out;
1441 } else {
1442 log_printf(LOGSYS_LEVEL_CRIT, "configuration error: quorum.device is not compatible with allow_downscale");
1443 log_printf(LOGSYS_LEVEL_CRIT, "disabling quorum device operations");
1444 update_qdevice_can_operate(0);
1445 }
1446 }
1447
1448 /*
1449 * if user specifies quorum.expected_votes + quorum.device but NOT the device.votes
1450 * we don't know what the quorum device should vote.
1451 */
1452
1453 if ((expected_votes) && (have_qdevice) && (qdevice_votes == -1)) {
1454 if (!runtime) {
1455 error = (char *)"configuration error: quorum.device.votes must be specified when quorum.expected_votes is set";
1456 goto out;
1457 } else {
1458 log_printf(LOGSYS_LEVEL_CRIT, "configuration error: quorum.device.votes must be specified when quorum.expected_votes is set");
1459 log_printf(LOGSYS_LEVEL_CRIT, "disabling quorum device operations");
1460 update_qdevice_can_operate(0);
1461 }
1462 }
1463
1464 /*
1465 * if user specifies a node list with uneven votes and no device.votes
1466 * we cannot autocalculate the votes
1467 */
1468
1469 if ((have_qdevice) &&
1470 (qdevice_votes == -1) &&
1471 (have_nodelist) &&
1472 (node_count != node_expected_votes)) {
1473 if (!runtime) {
1474 error = (char *)"configuration error: quorum.device.votes must be specified when not all nodes votes 1";
1475 goto out;
1476 } else {
1477 log_printf(LOGSYS_LEVEL_CRIT, "configuration error: quorum.device.votes must be specified when not all nodes votes 1");
1478 log_printf(LOGSYS_LEVEL_CRIT, "disabling quorum device operations");
1479 update_qdevice_can_operate(0);
1480 }
1481 }
1482
1483 /*
1484 * validate quorum device votes vs expected_votes
1485 */
1486
1487 if ((qdevice_votes > 0) && (expected_votes)) {
1488 int delta = expected_votes - qdevice_votes;
1489 if (delta < 2) {
1490 if (!runtime) {
1491 error = (char *)"configuration error: quorum.device.votes is too high or expected_votes is too low";
1492 goto out;
1493 } else {
1494 log_printf(LOGSYS_LEVEL_CRIT, "configuration error: quorum.device.votes is too high or expected_votes is too low");
1495 log_printf(LOGSYS_LEVEL_CRIT, "disabling quorum device operations");
1496 update_qdevice_can_operate(0);
1497 }
1498 }
1499 }
1500
1501 /*
1502 * automatically calculate device votes and adjust expected_votes from nodelist
1503 */
1504
1505 if ((have_qdevice) &&
1506 (qdevice_votes == -1) &&
1507 (!expected_votes) &&
1508 (have_nodelist) &&
1509 (node_count == node_expected_votes)) {
1510 qdevice_votes = node_expected_votes - 1;
1511 node_expected_votes = node_expected_votes + qdevice_votes;
1512 }
1513
1514 /*
1515 * set this node votes and expected_votes
1516 */
1517 log_printf(LOGSYS_LEVEL_DEBUG, "ev_tracking=%d, ev_tracking_barrier = %d: expected_votes = %d\n", ev_tracking, ev_tracking_barrier, expected_votes);
1518
1519 if (ev_tracking) {
1520 expected_votes = ev_tracking_barrier;
1521 }
1522
1523 if (have_nodelist) {
1524 us->votes = node_votes;
1525 us->expected_votes = node_expected_votes;
1526 } else {
1527 us->votes = 1;
1528 (void)icmap_get_uint32("quorum.votes", &us->votes);
1529 }
1530
1531 if (expected_votes) {
1533 }
1534
1535 /*
1536 * set qdevice votes
1537 */
1538
1539 if (!have_qdevice) {
1540 qdevice->votes = 0;
1541 }
1542
1543 if (qdevice_votes != -1) {
1544 qdevice->votes = qdevice_votes;
1545 }
1546
1547 update_ev_barrier(us->expected_votes);
1548 update_two_node();
1549 if (wait_for_all) {
1550 if (!runtime) {
1551 update_wait_for_all_status(1);
1552 }
1553 } else if (wait_for_all_autoset && wait_for_all_status) {
1554 /*
1555 * Reset wait for all status for consistency when wfa is auto-unset by 2node.
1556 * wait_for_all_status would be ignored by are_we_quorate anyway.
1557 */
1558 update_wait_for_all_status(0);
1559 }
1560
1561out:
1562 LEAVE();
1563 return error;
1564}
1565
1566static void votequorum_refresh_config(
1567 int32_t event,
1568 const char *key_name,
1569 struct icmap_notify_value new_val,
1570 struct icmap_notify_value old_val,
1571 void *user_data)
1572{
1573 int old_votes, old_expected_votes;
1574 uint8_t reloading;
1575 uint8_t cancel_wfa;
1576 int32_t reload_status;
1577
1578 ENTER();
1579
1580 /*
1581 * If a full reload is in progress then don't do anything until it's done and
1582 * can reconfigure it all atomically
1583 */
1584 if (icmap_get_uint8("config.totemconfig_reload_in_progress", &reloading) == CS_OK && reloading) {
1585 return;
1586 }
1587
1588 /* If a full reload failed, then don't reconfigure */
1589 if ( (strcmp(key_name, "config.totemconfig_reload_in_progress") == 0) &&
1590 (icmap_get_int32("config.reload_status", &reload_status) == CS_OK) &&
1591 (reload_status != CS_OK) ) {
1592 return;
1593 }
1594
1595 (void)icmap_get_uint8("quorum.cancel_wait_for_all", &cancel_wfa);
1596 if (strcmp(key_name, "quorum.cancel_wait_for_all") == 0 &&
1597 cancel_wfa >= 1) {
1598 icmap_set_uint8("quorum.cancel_wait_for_all", 0);
1599 if (votequorum_exec_send_reconfigure(VOTEQUORUM_RECONFIG_PARAM_CANCEL_WFA,
1600 us->node_id, 0)) {
1601 log_printf(LOGSYS_LEVEL_ERROR, "Failed to send Cancel WFA message to other nodes");
1602 }
1603 return;
1604 }
1605
1606 old_votes = us->votes;
1607 old_expected_votes = us->expected_votes;
1608
1609 /*
1610 * Reload the configuration
1611 */
1612 votequorum_readconfig(VOTEQUORUM_READCONFIG_RUNTIME);
1613
1614 /*
1615 * activate new config
1616 */
1617 votequorum_exec_send_nodeinfo(us->node_id);
1618 votequorum_exec_send_nodeinfo(VOTEQUORUM_QDEVICE_NODEID);
1619 if (us->votes != old_votes) {
1620 if (votequorum_exec_send_reconfigure(VOTEQUORUM_RECONFIG_PARAM_NODE_VOTES,
1621 us->node_id, us->votes)) {
1622 log_printf(LOGSYS_LEVEL_ERROR, "Failed to send new votes message to other nodes");
1623 }
1624 }
1625 if (us->expected_votes != old_expected_votes) {
1626 if (votequorum_exec_send_reconfigure(VOTEQUORUM_RECONFIG_PARAM_EXPECTED_VOTES,
1627 us->node_id, us->expected_votes)) {
1628 log_printf(LOGSYS_LEVEL_ERROR, "Failed to send expected votes message to other nodes");
1629 }
1630 }
1631
1632 LEAVE();
1633}
1634
1635static void votequorum_exec_add_config_notification(void)
1636{
1637 icmap_track_t icmap_track_nodelist = NULL;
1638 icmap_track_t icmap_track_quorum = NULL;
1639 icmap_track_t icmap_track_reload = NULL;
1640
1641 ENTER();
1642
1643 icmap_track_add("nodelist.",
1645 votequorum_refresh_config,
1646 NULL,
1647 &icmap_track_nodelist);
1648
1649 icmap_track_add("quorum.",
1651 votequorum_refresh_config,
1652 NULL,
1653 &icmap_track_quorum);
1654
1655 icmap_track_add("config.totemconfig_reload_in_progress",
1657 votequorum_refresh_config,
1658 NULL,
1659 &icmap_track_reload);
1660
1661 LEAVE();
1662}
1663
1664/*
1665 * votequorum_exec core
1666 */
1667
1668static int votequorum_exec_send_reconfigure(uint8_t param, unsigned int nodeid, uint32_t value)
1669{
1671 struct iovec iov[1];
1672 int ret;
1673
1674 ENTER();
1675
1682
1685
1686 iov[0].iov_base = (void *)&req_exec_quorum_reconfigure;
1687 iov[0].iov_len = sizeof(req_exec_quorum_reconfigure);
1688
1689 ret = corosync_api->totem_mcast (iov, 1, TOTEM_AGREED);
1690
1691 LEAVE();
1692 return ret;
1693}
1694
1695static int votequorum_exec_send_nodeinfo(uint32_t nodeid)
1696{
1698 struct iovec iov[1];
1699 struct cluster_node *node;
1700 int ret;
1701
1702 ENTER();
1703
1704 node = find_node_by_nodeid(nodeid);
1705 if (!node) {
1706 return -1;
1707 }
1708
1715 decode_flags(node->flags);
1716 }
1717
1720
1721 iov[0].iov_base = (void *)&req_exec_quorum_nodeinfo;
1722 iov[0].iov_len = sizeof(req_exec_quorum_nodeinfo);
1723
1724 ret = corosync_api->totem_mcast (iov, 1, TOTEM_AGREED);
1725
1726 LEAVE();
1727 return ret;
1728}
1729
1730static int votequorum_exec_send_qdevice_reconfigure(const char *oldname, const char *newname)
1731{
1733 struct iovec iov[1];
1734 int ret;
1735
1736 ENTER();
1737
1740
1741 assert(strlen(oldname) < sizeof(req_exec_quorum_qdevice_reconfigure.oldname));
1743
1744 assert(strlen(newname) < sizeof(req_exec_quorum_qdevice_reconfigure.newname));
1746
1747 iov[0].iov_base = (void *)&req_exec_quorum_qdevice_reconfigure;
1748 iov[0].iov_len = sizeof(req_exec_quorum_qdevice_reconfigure);
1749
1750 ret = corosync_api->totem_mcast (iov, 1, TOTEM_AGREED);
1751
1752 LEAVE();
1753 return ret;
1754}
1755
1756static int votequorum_exec_send_qdevice_reg(uint32_t operation, const char *qdevice_name_req)
1757{
1759 struct iovec iov[1];
1760 int ret;
1761
1762 ENTER();
1763
1767
1768 assert(strlen(qdevice_name_req) < sizeof(req_exec_quorum_qdevice_reg.qdevice_name));
1769 strcpy(req_exec_quorum_qdevice_reg.qdevice_name, qdevice_name_req);
1770
1771 iov[0].iov_base = (void *)&req_exec_quorum_qdevice_reg;
1772 iov[0].iov_len = sizeof(req_exec_quorum_qdevice_reg);
1773
1774 ret = corosync_api->totem_mcast (iov, 1, TOTEM_AGREED);
1775
1776 LEAVE();
1777 return ret;
1778}
1779
1780static int votequorum_exec_send_quorum_notification(void *conn, uint64_t context)
1781{
1782 struct res_lib_votequorum_quorum_notification *res_lib_votequorum_notification;
1783 struct qb_list_head *tmp;
1784 struct cluster_node *node;
1785 int i = 0;
1786 int cluster_members = 0;
1787 int size;
1788 char buf[sizeof(struct res_lib_votequorum_quorum_notification) + sizeof(struct votequorum_node) * (PROCESSOR_COUNT_MAX + 2)];
1789
1790 ENTER();
1791
1792 log_printf(LOGSYS_LEVEL_DEBUG, "Sending quorum callback, quorate = %d", cluster_is_quorate);
1793
1794 qb_list_for_each(tmp, &cluster_members_list) {
1795 node = qb_list_entry(tmp, struct cluster_node, list);
1796 cluster_members++;
1797 }
1799 cluster_members++;
1800 }
1801
1802 size = sizeof(struct res_lib_votequorum_quorum_notification) + sizeof(struct votequorum_node) * cluster_members;
1803
1804 res_lib_votequorum_notification = (struct res_lib_votequorum_quorum_notification *)&buf;
1805 res_lib_votequorum_notification->quorate = cluster_is_quorate;
1806 res_lib_votequorum_notification->context = context;
1807 res_lib_votequorum_notification->node_list_entries = cluster_members;
1808 res_lib_votequorum_notification->header.id = MESSAGE_RES_VOTEQUORUM_QUORUM_NOTIFICATION;
1809 res_lib_votequorum_notification->header.size = size;
1810 res_lib_votequorum_notification->header.error = CS_OK;
1811
1812 /* Send all known nodes and their states */
1813 qb_list_for_each(tmp, &cluster_members_list) {
1814 node = qb_list_entry(tmp, struct cluster_node, list);
1815 res_lib_votequorum_notification->node_list[i].nodeid = node->node_id;
1816 res_lib_votequorum_notification->node_list[i++].state = node->state;
1817 }
1819 res_lib_votequorum_notification->node_list[i].nodeid = VOTEQUORUM_QDEVICE_NODEID;
1820 res_lib_votequorum_notification->node_list[i++].state = qdevice->state;
1821 }
1822
1823 /* Send it to all interested parties */
1824 if (conn) {
1825 int ret = corosync_api->ipc_dispatch_send(conn, &buf, size);
1826 LEAVE();
1827 return ret;
1828 } else {
1829 struct quorum_pd *qpd;
1830
1831 qb_list_for_each(tmp, &trackers_list) {
1832 qpd = qb_list_entry(tmp, struct quorum_pd, list);
1833 res_lib_votequorum_notification->context = qpd->tracking_context;
1834 corosync_api->ipc_dispatch_send(qpd->conn, &buf, size);
1835 }
1836 }
1837
1838 LEAVE();
1839
1840 return 0;
1841}
1842
1843static int votequorum_exec_send_nodelist_notification(void *conn, uint64_t context)
1844{
1845 struct res_lib_votequorum_nodelist_notification *res_lib_votequorum_notification;
1846 int i = 0;
1847 int size;
1848 struct qb_list_head *tmp;
1849 char buf[sizeof(struct res_lib_votequorum_nodelist_notification) + sizeof(uint32_t) * quorum_members_entries];
1850
1851 ENTER();
1852
1853 log_printf(LOGSYS_LEVEL_DEBUG, "Sending nodelist callback. ring_id = " CS_PRI_RING_ID, quorum_ringid.nodeid, quorum_ringid.seq);
1854
1855 size = sizeof(struct res_lib_votequorum_nodelist_notification) + sizeof(uint32_t) * quorum_members_entries;
1856
1857 res_lib_votequorum_notification = (struct res_lib_votequorum_nodelist_notification *)&buf;
1858 res_lib_votequorum_notification->node_list_entries = quorum_members_entries;
1859 res_lib_votequorum_notification->ring_id.nodeid = quorum_ringid.nodeid;
1860 res_lib_votequorum_notification->ring_id.seq = quorum_ringid.seq;
1861 res_lib_votequorum_notification->context = context;
1862
1863 for (i=0; i<quorum_members_entries; i++) {
1864 res_lib_votequorum_notification->node_list[i] = quorum_members[i];
1865 }
1866
1867 res_lib_votequorum_notification->header.id = MESSAGE_RES_VOTEQUORUM_NODELIST_NOTIFICATION;
1868 res_lib_votequorum_notification->header.size = size;
1869 res_lib_votequorum_notification->header.error = CS_OK;
1870
1871 /* Send it to all interested parties */
1872 if (conn) {
1873 int ret = corosync_api->ipc_dispatch_send(conn, &buf, size);
1874 LEAVE();
1875 return ret;
1876 } else {
1877 struct quorum_pd *qpd;
1878
1879 qb_list_for_each(tmp, &trackers_list) {
1880 qpd = qb_list_entry(tmp, struct quorum_pd, list);
1881 res_lib_votequorum_notification->context = qpd->tracking_context;
1882 corosync_api->ipc_dispatch_send(qpd->conn, &buf, size);
1883 }
1884 }
1885
1886 LEAVE();
1887
1888 return 0;
1889}
1890
1891static void votequorum_exec_send_expectedvotes_notification(void)
1892{
1894 struct quorum_pd *qpd;
1895 struct qb_list_head *tmp;
1896
1897 ENTER();
1898
1899 log_printf(LOGSYS_LEVEL_DEBUG, "Sending expected votes callback");
1900
1905
1906 qb_list_for_each(tmp, &trackers_list) {
1907 qpd = qb_list_entry(tmp, struct quorum_pd, list);
1911 }
1912
1913 LEAVE();
1914}
1915
1916static void exec_votequorum_qdevice_reconfigure_endian_convert (void *message)
1917{
1918 ENTER();
1919
1920 LEAVE();
1921}
1922
1923static void message_handler_req_exec_votequorum_qdevice_reconfigure (
1924 const void *message,
1925 unsigned int nodeid)
1926{
1928
1929 ENTER();
1930
1931 log_printf(LOGSYS_LEVEL_DEBUG, "Received qdevice name change req from node " CS_PRI_NODE_ID " [from: %s to: %s]",
1932 nodeid,
1935
1936 if (!strcmp(req_exec_quorum_qdevice_reconfigure->oldname, qdevice_name)) {
1937 log_printf(LOGSYS_LEVEL_DEBUG, "Allowing qdevice rename");
1938 memset(qdevice_name, 0, VOTEQUORUM_QDEVICE_MAX_NAME_LEN);
1939 strcpy(qdevice_name, req_exec_quorum_qdevice_reconfigure->newname);
1940 /*
1941 * TODO: notify qdevices about name change?
1942 * this is not relevant for now and can wait later on since
1943 * qdevices are local only and libvotequorum is not final
1944 */
1945 }
1946
1947 LEAVE();
1948}
1949
1950static void exec_votequorum_qdevice_reg_endian_convert (void *message)
1951{
1953
1954 ENTER();
1955
1957
1958 LEAVE();
1959}
1960
1961static void message_handler_req_exec_votequorum_qdevice_reg (
1962 const void *message,
1963 unsigned int nodeid)
1964{
1967 int wipe_qdevice_name = 1;
1968 struct cluster_node *node = NULL;
1969 struct qb_list_head *tmp;
1970 cs_error_t error = CS_OK;
1971
1972 ENTER();
1973
1974 log_printf(LOGSYS_LEVEL_DEBUG, "Received qdevice op %u req from node " CS_PRI_NODE_ID " [%s]",
1977
1979 {
1981 if (nodeid != us->node_id) {
1982 if (!strlen(qdevice_name)) {
1983 log_printf(LOGSYS_LEVEL_DEBUG, "Remote qdevice name recorded");
1984 strcpy(qdevice_name, req_exec_quorum_qdevice_reg->qdevice_name);
1985 }
1986 LEAVE();
1987 return;
1988 }
1989
1990 /*
1991 * protect against the case where we broadcast qdevice registration
1992 * to new memebers, we receive the message back, but there is no registration
1993 * connection in progress
1994 */
1996 LEAVE();
1997 return;
1998 }
1999
2000 /*
2001 * this should NEVER happen
2002 */
2003 if (!qdevice_reg_conn) {
2004 log_printf(LOGSYS_LEVEL_WARNING, "Unable to determine origin of the qdevice register call!");
2005 LEAVE();
2006 return;
2007 }
2008
2009 /*
2010 * registering our own device in this case
2011 */
2012 if (!strlen(qdevice_name)) {
2013 strcpy(qdevice_name, req_exec_quorum_qdevice_reg->qdevice_name);
2014 }
2015
2016 /*
2017 * check if it is our device or something else
2018 */
2020 qdevice_name, VOTEQUORUM_QDEVICE_MAX_NAME_LEN))) {
2022 votequorum_exec_send_nodeinfo(VOTEQUORUM_QDEVICE_NODEID);
2023 votequorum_exec_send_nodeinfo(us->node_id);
2024 } else {
2026 "A new qdevice with different name (new: %s old: %s) is trying to register!",
2028 error = CS_ERR_EXIST;
2029 }
2030
2033 res_lib_votequorum_status.header.error = error;
2034 corosync_api->ipc_response_send(qdevice_reg_conn, &res_lib_votequorum_status, sizeof(res_lib_votequorum_status));
2035 qdevice_reg_conn = NULL;
2036 break;
2038 qb_list_for_each(tmp, &cluster_members_list) {
2039 node = qb_list_entry(tmp, struct cluster_node, list);
2040 if ((node->state == NODESTATE_MEMBER) &&
2042 wipe_qdevice_name = 0;
2043 }
2044 }
2045
2046 if (wipe_qdevice_name) {
2047 memset(qdevice_name, 0, VOTEQUORUM_QDEVICE_MAX_NAME_LEN);
2048 }
2049
2050 break;
2051 }
2052 LEAVE();
2053}
2054
2055static void exec_votequorum_nodeinfo_endian_convert (void *message)
2056{
2057 struct req_exec_quorum_nodeinfo *nodeinfo = message;
2058
2059 ENTER();
2060
2061 nodeinfo->nodeid = swab32(nodeinfo->nodeid);
2062 nodeinfo->votes = swab32(nodeinfo->votes);
2063 nodeinfo->expected_votes = swab32(nodeinfo->expected_votes);
2064 nodeinfo->flags = swab32(nodeinfo->flags);
2065
2066 LEAVE();
2067}
2068
2069static void message_handler_req_exec_votequorum_nodeinfo (
2070 const void *message,
2071 unsigned int sender_nodeid)
2072{
2073 const struct req_exec_quorum_nodeinfo *req_exec_quorum_nodeinfo = message;
2074 struct cluster_node *node = NULL;
2075 int old_votes;
2076 int old_expected;
2077 uint32_t old_flags;
2078 nodestate_t old_state;
2079 int new_node = 0;
2080 int allow_downgrade = 0;
2081 int by_node = 0;
2082 unsigned int nodeid = req_exec_quorum_nodeinfo->nodeid;
2083
2084 ENTER();
2085
2086 log_printf(LOGSYS_LEVEL_DEBUG, "got nodeinfo message from cluster node " CS_PRI_NODE_ID, sender_nodeid);
2087 log_printf(LOGSYS_LEVEL_DEBUG, "nodeinfo message[" CS_PRI_NODE_ID "]: votes: %d, expected: %d flags: %d",
2088 nodeid,
2092
2094 decode_flags(req_exec_quorum_nodeinfo->flags);
2095 }
2096
2097 node = find_node_by_nodeid(nodeid);
2098 if (!node) {
2099 node = allocate_node(nodeid);
2100 new_node = 1;
2101 }
2102 if (!node) {
2103 corosync_api->error_memory_failure();
2104 LEAVE();
2105 return;
2106 }
2107
2108 if (new_node) {
2109 old_votes = 0;
2110 old_expected = 0;
2111 old_state = NODESTATE_DEAD;
2112 old_flags = 0;
2113 } else {
2114 old_votes = node->votes;
2115 old_expected = node->expected_votes;
2116 old_state = node->state;
2117 old_flags = node->flags;
2118 }
2119
2121 struct cluster_node *sender_node = find_node_by_nodeid(sender_nodeid);
2122
2123 assert(sender_node != NULL);
2124
2125 if ((!cluster_is_quorate) &&
2126 (sender_node->flags & NODE_FLAGS_QUORATE)) {
2128 } else {
2129 node->votes = max(node->votes, req_exec_quorum_nodeinfo->votes);
2130 }
2131 goto recalculate;
2132 }
2133
2134 /* Update node state */
2137 node->state = NODESTATE_MEMBER;
2138
2139 if (node->flags & NODE_FLAGS_LEAVING) {
2140 node->state = NODESTATE_LEAVING;
2141 allow_downgrade = 1;
2142 by_node = 1;
2143 }
2144
2145 if ((!cluster_is_quorate) &&
2146 (node->flags & NODE_FLAGS_QUORATE)) {
2147 allow_downgrade = 1;
2149 }
2150
2151 if (node->flags & NODE_FLAGS_QUORATE || (ev_tracking)) {
2153 } else {
2154 node->expected_votes = us->expected_votes;
2155 }
2156
2157 if ((last_man_standing) && (node->votes > 1)) {
2158 log_printf(LOGSYS_LEVEL_WARNING, "Last Man Standing feature is supported only when all"
2159 "cluster nodes votes are set to 1. Disabling LMS.");
2160 last_man_standing = 0;
2161 if (last_man_standing_timer_set) {
2162 corosync_api->timer_delete(last_man_standing_timer);
2163 last_man_standing_timer_set = 0;
2164 }
2165 }
2166
2167recalculate:
2168 if ((new_node) ||
2169 (nodeid == us->node_id) ||
2170 (node->flags & NODE_FLAGS_FIRST) ||
2171 (old_votes != node->votes) ||
2172 (old_expected != node->expected_votes) ||
2173 (old_flags != node->flags) ||
2174 (old_state != node->state)) {
2175 recalculate_quorum(allow_downgrade, by_node);
2176 }
2177
2178 if ((wait_for_all) &&
2179 (!(node->flags & NODE_FLAGS_WFASTATUS)) &&
2180 (node->flags & NODE_FLAGS_QUORATE)) {
2181 update_wait_for_all_status(0);
2182 }
2183
2184 LEAVE();
2185}
2186
2187static void exec_votequorum_reconfigure_endian_convert (void *message)
2188{
2189 struct req_exec_quorum_reconfigure *reconfigure = message;
2190
2191 ENTER();
2192
2193 reconfigure->nodeid = swab32(reconfigure->nodeid);
2194 reconfigure->value = swab32(reconfigure->value);
2195
2196 LEAVE();
2197}
2198
2199static void message_handler_req_exec_votequorum_reconfigure (
2200 const void *message,
2201 unsigned int nodeid)
2202{
2204 struct cluster_node *node;
2205
2206 ENTER();
2207
2208 log_printf(LOGSYS_LEVEL_DEBUG, "got reconfigure message from cluster node " CS_PRI_NODE_ID " for " CS_PRI_NODE_ID,
2210
2212 {
2214 update_node_expected_votes(req_exec_quorum_reconfigure->value);
2215 votequorum_exec_send_expectedvotes_notification();
2216 update_ev_barrier(req_exec_quorum_reconfigure->value);
2217 if (ev_tracking) {
2218 us->expected_votes = max(us->expected_votes, ev_tracking_barrier);
2219 }
2220 recalculate_quorum(1, 0); /* Allow decrease */
2221 break;
2222
2224 node = find_node_by_nodeid(req_exec_quorum_reconfigure->nodeid);
2225 if (!node) {
2226 LEAVE();
2227 return;
2228 }
2230 recalculate_quorum(1, 0); /* Allow decrease */
2231 break;
2232
2234 update_wait_for_all_status(0);
2235 log_printf(LOGSYS_LEVEL_INFO, "wait_for_all_status reset by user on node " CS_PRI_NODE_ID ".",
2237 recalculate_quorum(0, 0);
2238
2239 break;
2240
2241 }
2242
2243 LEAVE();
2244}
2245
2246static int votequorum_exec_exit_fn (void)
2247{
2248 int ret = 0;
2249
2250 ENTER();
2251
2252 /*
2253 * tell the other nodes we are leaving
2254 */
2255
2256 if (allow_downscale) {
2258 ret = votequorum_exec_send_nodeinfo(us->node_id);
2259 }
2260
2261 if ((ev_tracking) && (ev_tracking_fd != -1)) {
2262 close(ev_tracking_fd);
2263 }
2264
2265
2266 LEAVE();
2267 return ret;
2268}
2269
2270static void votequorum_set_icmap_ro_keys(void)
2271{
2272 icmap_set_ro_access("quorum.allow_downscale", CS_FALSE, CS_TRUE);
2273 icmap_set_ro_access("quorum.wait_for_all", CS_FALSE, CS_TRUE);
2274 icmap_set_ro_access("quorum.last_man_standing", CS_FALSE, CS_TRUE);
2275 icmap_set_ro_access("quorum.last_man_standing_window", CS_FALSE, CS_TRUE);
2276 icmap_set_ro_access("quorum.expected_votes_tracking", CS_FALSE, CS_TRUE);
2277 icmap_set_ro_access("quorum.auto_tie_breaker", CS_FALSE, CS_TRUE);
2278 icmap_set_ro_access("quorum.auto_tie_breaker_node", CS_FALSE, CS_TRUE);
2279}
2280
2281static char *votequorum_exec_init_fn (struct corosync_api_v1 *api)
2282{
2283 char *error = NULL;
2284
2285 ENTER();
2286
2287 /*
2288 * make sure we start clean
2289 */
2290 qb_list_init(&cluster_members_list);
2291 qb_list_init(&trackers_list);
2292 qdevice = NULL;
2293 us = NULL;
2294 memset(cluster_nodes, 0, sizeof(cluster_nodes));
2295
2296 /*
2297 * Allocate a cluster_node for qdevice
2298 */
2299 qdevice = allocate_node(VOTEQUORUM_QDEVICE_NODEID);
2300 if (!qdevice) {
2301 LEAVE();
2302 return ((char *)"Could not allocate node.");
2303 }
2304 qdevice->votes = 0;
2305 memset(qdevice_name, 0, VOTEQUORUM_QDEVICE_MAX_NAME_LEN);
2306
2307 /*
2308 * Allocate a cluster_node for us
2309 */
2310 us = allocate_node(corosync_api->totem_nodeid_get());
2311 if (!us) {
2312 LEAVE();
2313 return ((char *)"Could not allocate node.");
2314 }
2315
2316 icmap_set_uint32("runtime.votequorum.this_node_id", us->node_id);
2317
2318 us->state = NODESTATE_MEMBER;
2319 us->votes = 1;
2320 us->flags |= NODE_FLAGS_FIRST;
2321
2322 error = votequorum_readconfig(VOTEQUORUM_READCONFIG_STARTUP);
2323 if (error) {
2324 return error;
2325 }
2326 recalculate_quorum(0, 0);
2327
2328 /*
2329 * Set RO keys in icmap
2330 */
2331 votequorum_set_icmap_ro_keys();
2332
2333 /*
2334 * Listen for changes
2335 */
2336 votequorum_exec_add_config_notification();
2337
2338 /*
2339 * Start us off with one node
2340 */
2341 votequorum_exec_send_nodeinfo(us->node_id);
2342
2343 LEAVE();
2344
2345 return (NULL);
2346}
2347
2348/*
2349 * votequorum service core
2350 */
2351
2352static void votequorum_last_man_standing_timer_fn(void *arg)
2353{
2354 ENTER();
2355
2356 last_man_standing_timer_set = 0;
2357 if (cluster_is_quorate) {
2358 recalculate_quorum(1,1);
2359 }
2360
2361 LEAVE();
2362}
2363
2364static void votequorum_sync_init (
2365 const unsigned int *trans_list, size_t trans_list_entries,
2366 const unsigned int *member_list, size_t member_list_entries,
2367 const struct memb_ring_id *ring_id)
2368{
2369 int i, j;
2370 int found;
2371 int left_nodes;
2372 struct cluster_node *node;
2373
2374 ENTER();
2375
2376 sync_in_progress = 1;
2377 sync_nodeinfo_sent = 0;
2378 sync_wait_for_poll_or_timeout = 0;
2379
2380 if (member_list_entries > 1) {
2381 us->flags &= ~NODE_FLAGS_FIRST;
2382 }
2383
2384 /*
2385 * we don't need to track which nodes have left directly,
2386 * since that info is in the node db, but we need to know
2387 * if somebody has left for last_man_standing
2388 */
2389 left_nodes = 0;
2390 for (i = 0; i < quorum_members_entries; i++) {
2391 found = 0;
2392 for (j = 0; j < member_list_entries; j++) {
2393 if (quorum_members[i] == member_list[j]) {
2394 found = 1;
2395 break;
2396 }
2397 }
2398 if (found == 0) {
2399 left_nodes = 1;
2400 node = find_node_by_nodeid(quorum_members[i]);
2401 if (node) {
2402 node->state = NODESTATE_DEAD;
2403 }
2404 }
2405 }
2406
2407 if (last_man_standing) {
2408 if (((member_list_entries >= quorum) && (left_nodes)) ||
2409 ((member_list_entries <= quorum) && (auto_tie_breaker != ATB_NONE) && (check_low_node_id_partition() == 1))) {
2410 if (last_man_standing_timer_set) {
2411 corosync_api->timer_delete(last_man_standing_timer);
2412 last_man_standing_timer_set = 0;
2413 }
2414 corosync_api->timer_add_duration((unsigned long long)last_man_standing_window*1000000,
2415 NULL, votequorum_last_man_standing_timer_fn,
2416 &last_man_standing_timer);
2417 last_man_standing_timer_set = 1;
2418 }
2419 }
2420
2421 memcpy(previous_quorum_members, quorum_members, sizeof(unsigned int) * quorum_members_entries);
2422 previous_quorum_members_entries = quorum_members_entries;
2423
2424 memcpy(quorum_members, member_list, sizeof(unsigned int) * member_list_entries);
2425 quorum_members_entries = member_list_entries;
2426 memcpy(&quorum_ringid, ring_id, sizeof(*ring_id));
2427
2429 /*
2430 * Reset poll timer. Sync waiting is interrupted on valid qdevice poll or after timeout
2431 */
2432 if (qdevice_timer_set) {
2433 corosync_api->timer_delete(qdevice_timer);
2434 }
2435 corosync_api->timer_add_duration((unsigned long long)qdevice_sync_timeout*1000000, qdevice,
2436 qdevice_timer_fn, &qdevice_timer);
2437 qdevice_timer_set = 1;
2438 sync_wait_for_poll_or_timeout = 1;
2439
2440 log_printf(LOGSYS_LEVEL_INFO, "waiting for quorum device %s poll (but maximum for %u ms)",
2441 qdevice_name, qdevice_sync_timeout);
2442 }
2443
2444 LEAVE();
2445}
2446
2447static int votequorum_sync_process (void)
2448{
2449 if (!sync_nodeinfo_sent) {
2450 votequorum_exec_send_nodeinfo(us->node_id);
2451 votequorum_exec_send_nodeinfo(VOTEQUORUM_QDEVICE_NODEID);
2452 if (strlen(qdevice_name)) {
2453 votequorum_exec_send_qdevice_reg(VOTEQUORUM_QDEVICE_OPERATION_REGISTER,
2454 qdevice_name);
2455 }
2456 votequorum_exec_send_nodelist_notification(NULL, 0LL);
2457 sync_nodeinfo_sent = 1;
2458 }
2459
2460 if (us->flags & NODE_FLAGS_QDEVICE_REGISTERED && sync_wait_for_poll_or_timeout) {
2461 /*
2462 * Waiting for qdevice to poll with new ringid or timeout
2463 */
2464
2465 return (-1);
2466 }
2467
2468 return 0;
2469}
2470
2471static void votequorum_sync_activate (void)
2472{
2473 recalculate_quorum(0, 0);
2474 quorum_callback(quorum_members, quorum_members_entries,
2475 cluster_is_quorate, &quorum_ringid);
2476 votequorum_exec_send_quorum_notification(NULL, 0L);
2477
2478 sync_in_progress = 0;
2479}
2480
2481static void votequorum_sync_abort (void)
2482{
2483
2484}
2485
2487 quorum_set_quorate_fn_t q_set_quorate_fn)
2488{
2489 char *error;
2490
2491 ENTER();
2492
2493 if (q_set_quorate_fn == NULL) {
2494 return ((char *)"Quorate function not set");
2495 }
2496
2497 corosync_api = api;
2498 quorum_callback = q_set_quorate_fn;
2499
2500 error = corosync_service_link_and_init(corosync_api,
2501 &votequorum_service[0]);
2502 if (error) {
2503 return (error);
2504 }
2505
2506 LEAVE();
2507
2508 return (NULL);
2509}
2510
2511/*
2512 * Library Handler init/fini
2513 */
2514
2515static int quorum_lib_init_fn (void *conn)
2516{
2517 struct quorum_pd *pd = (struct quorum_pd *)corosync_api->ipc_private_data_get (conn);
2518
2519 ENTER();
2520
2521 qb_list_init (&pd->list);
2522 pd->conn = conn;
2523
2524 LEAVE();
2525 return (0);
2526}
2527
2528static int quorum_lib_exit_fn (void *conn)
2529{
2530 struct quorum_pd *quorum_pd = (struct quorum_pd *)corosync_api->ipc_private_data_get (conn);
2531
2532 ENTER();
2533
2535 qb_list_del (&quorum_pd->list);
2536 qb_list_init (&quorum_pd->list);
2537 }
2538
2539 LEAVE();
2540
2541 return (0);
2542}
2543
2544/*
2545 * library internal functions
2546 */
2547
2548static void qdevice_timer_fn(void *arg)
2549{
2550 ENTER();
2551
2552 if ((!(us->flags & NODE_FLAGS_QDEVICE_ALIVE)) ||
2553 (!qdevice_timer_set)) {
2554 LEAVE();
2555 return;
2556 }
2557
2558 us->flags &= ~NODE_FLAGS_QDEVICE_ALIVE;
2559 us->flags &= ~NODE_FLAGS_QDEVICE_CAST_VOTE;
2560 log_printf(LOGSYS_LEVEL_INFO, "lost contact with quorum device %s", qdevice_name);
2561 votequorum_exec_send_nodeinfo(us->node_id);
2562
2563 qdevice_timer_set = 0;
2564 sync_wait_for_poll_or_timeout = 0;
2565
2566 LEAVE();
2567}
2568
2569/*
2570 * Library Handler Functions
2571 */
2572
2573static void message_handler_req_lib_votequorum_getinfo (void *conn, const void *message)
2574{
2577 struct cluster_node *node;
2578 unsigned int highest_expected = 0;
2579 unsigned int total_votes = 0;
2580 cs_error_t error = CS_OK;
2582
2583 ENTER();
2584
2585 log_printf(LOGSYS_LEVEL_DEBUG, "got getinfo request on %p for node " CS_PRI_NODE_ID, conn, req_lib_votequorum_getinfo->nodeid);
2586
2588 nodeid = us->node_id;
2589 }
2590
2591 node = find_node_by_nodeid(nodeid);
2592 if (node) {
2593 struct cluster_node *iternode;
2594 struct qb_list_head *nodelist;
2595
2596 qb_list_for_each(nodelist, &cluster_members_list) {
2597 iternode = qb_list_entry(nodelist, struct cluster_node, list);
2598
2599 if (iternode->state == NODESTATE_MEMBER) {
2600 highest_expected =
2601 max(highest_expected, iternode->expected_votes);
2602 total_votes += iternode->votes;
2603 }
2604 }
2605
2606 if (node->flags & NODE_FLAGS_QDEVICE_CAST_VOTE) {
2607 total_votes += qdevice->votes;
2608 }
2609
2610 switch(node->state) {
2611 case NODESTATE_MEMBER:
2613 break;
2614 case NODESTATE_DEAD:
2616 break;
2617 case NODESTATE_LEAVING:
2619 break;
2620 default:
2622 break;
2623 }
2628
2633
2634 if (two_node) {
2636 }
2637 if (cluster_is_quorate) {
2639 }
2640 if (wait_for_all) {
2642 }
2643 if (last_man_standing) {
2645 }
2646 if (auto_tie_breaker != ATB_NONE) {
2648 }
2649 if (allow_downscale) {
2651 }
2652
2654 strcpy(res_lib_votequorum_getinfo.qdevice_name, qdevice_name);
2656
2659 }
2660 if (node->flags & NODE_FLAGS_QDEVICE_ALIVE) {
2662 }
2663 if (node->flags & NODE_FLAGS_QDEVICE_CAST_VOTE) {
2665 }
2668 }
2669 } else {
2670 error = CS_ERR_NOT_EXIST;
2671 }
2672
2675 res_lib_votequorum_getinfo.header.error = error;
2677 log_printf(LOGSYS_LEVEL_DEBUG, "getinfo response error: %d", error);
2678
2679 LEAVE();
2680}
2681
2682static void message_handler_req_lib_votequorum_setexpected (void *conn, const void *message)
2683{
2686 cs_error_t error = CS_OK;
2687 unsigned int newquorum;
2688 unsigned int total_votes;
2689 uint8_t allow_downscale_status = 0;
2690
2691 ENTER();
2692
2693 allow_downscale_status = allow_downscale;
2694 allow_downscale = 0;
2695
2696 /*
2697 * Validate new expected votes
2698 */
2699 newquorum = calculate_quorum(1, req_lib_votequorum_setexpected->expected_votes, &total_votes);
2700 allow_downscale = allow_downscale_status;
2701 /*
2702 * Setting expected_votes < total_votes doesn't make sense.
2703 * For quorate cluster prevent cluster to become unquorate.
2704 */
2706 (cluster_is_quorate && (newquorum > total_votes))) {
2707 error = CS_ERR_INVALID_PARAM;
2708 goto error_exit;
2709 }
2710 update_node_expected_votes(req_lib_votequorum_setexpected->expected_votes);
2711
2712 if (votequorum_exec_send_reconfigure(VOTEQUORUM_RECONFIG_PARAM_EXPECTED_VOTES, us->node_id,
2714 error = CS_ERR_NO_RESOURCES;
2715 }
2716
2717error_exit:
2720 res_lib_votequorum_status.header.error = error;
2722
2723 LEAVE();
2724}
2725
2726static void message_handler_req_lib_votequorum_setvotes (void *conn, const void *message)
2727{
2730 struct cluster_node *node;
2731 unsigned int newquorum;
2732 unsigned int total_votes;
2733 unsigned int saved_votes;
2734 cs_error_t error = CS_OK;
2735 unsigned int nodeid;
2736
2737 ENTER();
2738
2740 node = find_node_by_nodeid(nodeid);
2741 if (!node) {
2742 error = CS_ERR_NAME_NOT_FOUND;
2743 goto error_exit;
2744 }
2745
2746 /*
2747 * Check votes is valid
2748 */
2749 saved_votes = node->votes;
2751
2752 newquorum = calculate_quorum(1, 0, &total_votes);
2753
2754 if (newquorum < total_votes / 2 ||
2755 newquorum > total_votes) {
2756 node->votes = saved_votes;
2757 error = CS_ERR_INVALID_PARAM;
2758 goto error_exit;
2759 }
2760
2761 if (votequorum_exec_send_reconfigure(VOTEQUORUM_RECONFIG_PARAM_NODE_VOTES, nodeid,
2763 error = CS_ERR_NO_RESOURCES;
2764 }
2765
2766error_exit:
2769 res_lib_votequorum_status.header.error = error;
2771
2772 LEAVE();
2773}
2774
2775static void message_handler_req_lib_votequorum_trackstart (void *conn,
2776 const void *message)
2777{
2780 struct quorum_pd *quorum_pd = (struct quorum_pd *)corosync_api->ipc_private_data_get (conn);
2781 cs_error_t error = CS_OK;
2782
2783 ENTER();
2784
2785 /*
2786 * If an immediate listing of the current cluster membership
2787 * is requested, generate membership list
2788 */
2791 log_printf(LOGSYS_LEVEL_DEBUG, "sending initial status to %p", conn);
2792 votequorum_exec_send_nodelist_notification(conn, req_lib_votequorum_trackstart->context);
2793 votequorum_exec_send_quorum_notification(conn, req_lib_votequorum_trackstart->context);
2794 }
2795
2797 error = CS_ERR_EXIST;
2798 goto response_send;
2799 }
2800
2801 /*
2802 * Record requests for tracking
2803 */
2806
2810
2811 qb_list_add (&quorum_pd->list, &trackers_list);
2812 }
2813
2814response_send:
2817 res_lib_votequorum_status.header.error = error;
2819
2820 LEAVE();
2821}
2822
2823static void message_handler_req_lib_votequorum_trackstop (void *conn,
2824 const void *message)
2825{
2827 struct quorum_pd *quorum_pd = (struct quorum_pd *)corosync_api->ipc_private_data_get (conn);
2828 int error = CS_OK;
2829
2830 ENTER();
2831
2833 error = CS_OK;
2835 qb_list_del (&quorum_pd->list);
2836 qb_list_init (&quorum_pd->list);
2837 } else {
2838 error = CS_ERR_NOT_EXIST;
2839 }
2840
2843 res_lib_votequorum_status.header.error = error;
2845
2846 LEAVE();
2847}
2848
2849static void message_handler_req_lib_votequorum_qdevice_register (void *conn,
2850 const void *message)
2851{
2854 cs_error_t error = CS_OK;
2855
2856 ENTER();
2857
2858 if (!qdevice_can_operate) {
2859 log_printf(LOGSYS_LEVEL_INFO, "Registration of quorum device is disabled by incorrect corosync.conf. See logs for more information");
2860 error = CS_ERR_ACCESS;
2861 goto out;
2862 }
2863
2866 qdevice_name, VOTEQUORUM_QDEVICE_MAX_NAME_LEN))) {
2867 goto out;
2868 } else {
2870 "A new qdevice with different name (new: %s old: %s) is trying to re-register!",
2872 error = CS_ERR_EXIST;
2873 goto out;
2874 }
2875 } else {
2876 if (qdevice_reg_conn != NULL) {
2878 "Registration request already in progress");
2879 error = CS_ERR_TRY_AGAIN;
2880 goto out;
2881 }
2882 qdevice_reg_conn = conn;
2883 if (votequorum_exec_send_qdevice_reg(VOTEQUORUM_QDEVICE_OPERATION_REGISTER,
2886 "Unable to send qdevice registration request to cluster");
2887 error = CS_ERR_TRY_AGAIN;
2888 qdevice_reg_conn = NULL;
2889 } else {
2890 LEAVE();
2891 return;
2892 }
2893 }
2894
2895out:
2896
2899 res_lib_votequorum_status.header.error = error;
2901
2902 LEAVE();
2903}
2904
2905static void message_handler_req_lib_votequorum_qdevice_unregister (void *conn,
2906 const void *message)
2907{
2910 cs_error_t error = CS_OK;
2911
2912 ENTER();
2913
2916 error = CS_ERR_INVALID_PARAM;
2917 goto out;
2918 }
2919 if (qdevice_timer_set) {
2920 corosync_api->timer_delete(qdevice_timer);
2921 qdevice_timer_set = 0;
2922 sync_wait_for_poll_or_timeout = 0;
2923 }
2924 us->flags &= ~NODE_FLAGS_QDEVICE_REGISTERED;
2925 us->flags &= ~NODE_FLAGS_QDEVICE_ALIVE;
2926 us->flags &= ~NODE_FLAGS_QDEVICE_CAST_VOTE;
2927 us->flags &= ~NODE_FLAGS_QDEVICE_MASTER_WINS;
2928 votequorum_exec_send_nodeinfo(us->node_id);
2929 votequorum_exec_send_qdevice_reg(VOTEQUORUM_QDEVICE_OPERATION_UNREGISTER,
2931 } else {
2932 error = CS_ERR_NOT_EXIST;
2933 }
2934
2935out:
2938 res_lib_votequorum_status.header.error = error;
2940
2941 LEAVE();
2942}
2943
2944static void message_handler_req_lib_votequorum_qdevice_update (void *conn,
2945 const void *message)
2946{
2949 cs_error_t error = CS_OK;
2950
2951 ENTER();
2952
2955 error = CS_ERR_INVALID_PARAM;
2956 goto out;
2957 }
2958 votequorum_exec_send_qdevice_reconfigure(req_lib_votequorum_qdevice_update->oldname,
2960 } else {
2961 error = CS_ERR_NOT_EXIST;
2962 }
2963
2964out:
2967 res_lib_votequorum_status.header.error = error;
2969
2970 LEAVE();
2971}
2972
2973static void message_handler_req_lib_votequorum_qdevice_poll (void *conn,
2974 const void *message)
2975{
2978 cs_error_t error = CS_OK;
2979 uint32_t oldflags;
2980
2981 ENTER();
2982
2983 if (!qdevice_can_operate) {
2984 error = CS_ERR_ACCESS;
2985 goto out;
2986 }
2987
2989 if (!(req_lib_votequorum_qdevice_poll->ring_id.nodeid == quorum_ringid.nodeid &&
2990 req_lib_votequorum_qdevice_poll->ring_id.seq == quorum_ringid.seq)) {
2991 log_printf(LOGSYS_LEVEL_DEBUG, "Received poll ring id (" CS_PRI_RING_ID ") != last sync "
2992 "ring id (" CS_PRI_RING_ID "). Ignoring poll call.",
2994 quorum_ringid.nodeid, quorum_ringid.seq);
2995 error = CS_ERR_MESSAGE_ERROR;
2996 goto out;
2997 }
2999 error = CS_ERR_INVALID_PARAM;
3000 goto out;
3001 }
3002
3003 if (qdevice_timer_set) {
3004 corosync_api->timer_delete(qdevice_timer);
3005 qdevice_timer_set = 0;
3006 }
3007
3008 oldflags = us->flags;
3009
3011
3014 } else {
3015 us->flags &= ~NODE_FLAGS_QDEVICE_CAST_VOTE;
3016 }
3017
3018 if (us->flags != oldflags) {
3019 votequorum_exec_send_nodeinfo(us->node_id);
3020 }
3021
3022 corosync_api->timer_add_duration((unsigned long long)qdevice_timeout*1000000, qdevice,
3023 qdevice_timer_fn, &qdevice_timer);
3024 qdevice_timer_set = 1;
3025 sync_wait_for_poll_or_timeout = 0;
3026 } else {
3027 error = CS_ERR_NOT_EXIST;
3028 }
3029
3030out:
3033 res_lib_votequorum_status.header.error = error;
3035
3036 LEAVE();
3037}
3038
3039static void message_handler_req_lib_votequorum_qdevice_master_wins (void *conn,
3040 const void *message)
3041{
3044 cs_error_t error = CS_OK;
3045 uint32_t oldflags = us->flags;
3046
3047 ENTER();
3048
3049 if (!qdevice_can_operate) {
3050 error = CS_ERR_ACCESS;
3051 goto out;
3052 }
3053
3056 error = CS_ERR_INVALID_PARAM;
3057 goto out;
3058 }
3059
3062 } else {
3063 us->flags &= ~NODE_FLAGS_QDEVICE_MASTER_WINS;
3064 }
3065
3066 if (us->flags != oldflags) {
3067 votequorum_exec_send_nodeinfo(us->node_id);
3068 }
3069
3070 update_qdevice_master_wins(req_lib_votequorum_qdevice_master_wins->allow);
3071 } else {
3072 error = CS_ERR_NOT_EXIST;
3073 }
3074
3075out:
3078 res_lib_votequorum_status.header.error = error;
3080
3081 LEAVE();
3082}
#define SERVICE_ID_MAKE(a, b)
Definition: coroapi.h:458
@ CS_LIB_ALLOW_INQUORATE
Definition: coroapi.h:164
qb_loop_timer_handle corosync_timer_handle_t
corosync_timer_handle_t
Definition: coroapi.h:74
#define TOTEM_AGREED
Definition: coroapi.h:102
#define COROSYNC_LIB_FLOW_CONTROL_NOT_REQUIRED
Definition: coroapi.h:157
#define COROSYNC_LIB_FLOW_CONTROL_REQUIRED
Definition: coroapi.h:156
#define PROCESSOR_COUNT_MAX
Definition: coroapi.h:96
@ VOTEQUORUM_SERVICE
Definition: corodefs.h:49
#define CS_TRACK_CURRENT
Definition: corotypes.h:91
#define CS_FALSE
Definition: corotypes.h:53
#define CS_PRI_NODE_ID
Definition: corotypes.h:59
#define CS_TRACK_CHANGES
Definition: corotypes.h:92
#define CS_TRUE
Definition: corotypes.h:54
#define CS_TRACK_CHANGES_ONLY
Definition: corotypes.h:93
cs_error_t
The cs_error_t enum.
Definition: corotypes.h:98
@ CS_ERR_MESSAGE_ERROR
Definition: corotypes.h:120
@ CS_ERR_NO_RESOURCES
Definition: corotypes.h:116
@ CS_ERR_ACCESS
Definition: corotypes.h:109
@ CS_ERR_TRY_AGAIN
Definition: corotypes.h:104
@ CS_OK
Definition: corotypes.h:99
@ CS_ERR_INVALID_PARAM
Definition: corotypes.h:105
@ CS_ERR_NAME_NOT_FOUND
Definition: corotypes.h:115
@ CS_ERR_NOT_EXIST
Definition: corotypes.h:110
@ CS_ERR_EXIST
Definition: corotypes.h:112
#define CS_PRI_RING_ID
Definition: corotypes.h:62
void(* quorum_set_quorate_fn_t)(const unsigned int *view_list, size_t view_list_entries, int quorate, struct memb_ring_id *)
Definition: exec/quorum.h:42
struct corosync_service_engine * votequorum_get_service_engine_ver0(void)
#define VOTEQUORUM_RECONFIG_PARAM_CANCEL_WFA
#define NODE_FLAGS_QUORATE
#define VOTEQUORUM_RECONFIG_PARAM_NODE_VOTES
char * votequorum_init(struct corosync_api_v1 *api, quorum_set_quorate_fn_t q_set_quorate_fn)
#define DEFAULT_LMS_WIN
#define VOTEQUORUM_RECONFIG_PARAM_EXPECTED_VOTES
uint32_t operation
#define VOTEQUORUM_QDEVICE_OPERATION_UNREGISTER
uint8_t param
char oldname[VOTEQUORUM_QDEVICE_MAX_NAME_LEN]
#define NODE_FLAGS_FIRST
uint32_t nodeid
#define NODE_FLAGS_QDEVICE_REGISTERED
#define MESSAGE_REQ_EXEC_VOTEQUORUM_QDEVICE_RECONFIGURE
uint32_t flags
nodestate_t
@ NODESTATE_LEAVING
@ NODESTATE_DEAD
@ NODESTATE_MEMBER
char newname[VOTEQUORUM_QDEVICE_MAX_NAME_LEN]
uint32_t votes
#define NODE_FLAGS_QDEVICE_CAST_VOTE
typedef __attribute__
#define MESSAGE_REQ_EXEC_VOTEQUORUM_RECONFIGURE
uint32_t expected_votes
#define NODE_FLAGS_QDEVICE_MASTER_WINS
#define VOTEQUORUM_READCONFIG_STARTUP
#define NODE_FLAGS_LEAVING
#define NODE_FLAGS_QDEVICE_ALIVE
#define NODE_FLAGS_WFASTATUS
#define MESSAGE_REQ_EXEC_VOTEQUORUM_QDEVICE_REG
@ ATB_LIST
@ ATB_LOWEST
@ ATB_HIGHEST
@ ATB_NONE
LOGSYS_DECLARE_SUBSYS("VOTEQ")
#define VOTEQUORUM_QDEVICE_OPERATION_REGISTER
#define MESSAGE_REQ_EXEC_VOTEQUORUM_NODEINFO
uint32_t value
#define VOTEQUORUM_READCONFIG_RUNTIME
#define max(a, b)
cs_error_t icmap_get_uint8(const char *key_name, uint8_t *u8)
Definition: icmap.c:868
#define ICMAP_TRACK_MODIFY
Definition: icmap.h:78
cs_error_t icmap_get_uint32(const char *key_name, uint32_t *u32)
Definition: icmap.c:892
cs_error_t icmap_set_uint8(const char *key_name, uint8_t value)
Definition: icmap.c:573
cs_error_t icmap_set_ro_access(const char *key_name, int prefix, int ro_access)
Set read-only access for given key (key_name) or prefix, If prefix is set.
Definition: icmap.c:1225
#define ICMAP_TRACK_DELETE
Definition: icmap.h:77
cs_error_t icmap_track_add(const char *key_name, int32_t track_type, icmap_notify_fn_t notify_fn, void *user_data, icmap_track_t *icmap_track)
Add tracking function for given key_name.
Definition: icmap.c:1159
#define ICMAP_TRACK_PREFIX
Whole prefix is tracked, instead of key only (so "totem." tracking means that "totem....
Definition: icmap.h:85
icmap_iter_t icmap_iter_init(const char *prefix)
Initialize iterator with given prefix.
Definition: icmap.c:1089
const char * icmap_iter_next(icmap_iter_t iter, size_t *value_len, icmap_value_types_t *type)
Return next item in iterator iter.
Definition: icmap.c:1095
qb_map_iter_t * icmap_iter_t
Itterator type.
Definition: icmap.h:123
void icmap_iter_finalize(icmap_iter_t iter)
Finalize iterator.
Definition: icmap.c:1116
cs_error_t icmap_get_int32(const char *key_name, int32_t *i32)
Definition: icmap.c:886
#define ICMAP_KEYNAME_MAXLEN
Maximum length of key in icmap.
Definition: icmap.h:48
cs_error_t icmap_set_uint32(const char *key_name, uint32_t value)
Definition: icmap.c:597
#define ICMAP_TRACK_ADD
Definition: icmap.h:76
cs_error_t icmap_get_string(const char *key_name, char **str)
Shortcut for icmap_get for string type.
Definition: icmap.c:856
#define VOTEQUORUM_QDEVICE_DEFAULT_SYNC_TIMEOUT
#define VOTEQUORUM_INFO_AUTO_TIE_BREAKER
#define VOTEQUORUM_INFO_QDEVICE_ALIVE
#define VOTEQUORUM_NODESTATE_MEMBER
#define VOTEQUORUM_NODESTATE_DEAD
#define VOTEQUORUM_INFO_ALLOW_DOWNSCALE
@ MESSAGE_RES_VOTEQUORUM_NODELIST_NOTIFICATION
@ MESSAGE_RES_VOTEQUORUM_EXPECTEDVOTES_NOTIFICATION
@ MESSAGE_RES_VOTEQUORUM_STATUS
@ MESSAGE_RES_VOTEQUORUM_QUORUM_NOTIFICATION
@ MESSAGE_RES_VOTEQUORUM_GETINFO
#define VOTEQUORUM_INFO_QDEVICE_MASTER_WINS
#define VOTEQUORUM_NODESTATE_LEAVING
#define VOTEQUORUM_INFO_TWONODE
#define VOTEQUORUM_INFO_WAIT_FOR_ALL
#define VOTEQUORUM_INFO_QUORATE
#define VOTEQUORUM_INFO_QDEVICE_REGISTERED
#define VOTEQUORUM_INFO_LAST_MAN_STANDING
#define VOTEQUORUM_QDEVICE_MAX_NAME_LEN
#define VOTEQUORUM_QDEVICE_NODEID
#define VOTEQUORUM_INFO_QDEVICE_CAST_VOTE
#define VOTEQUORUM_QDEVICE_DEFAULT_TIMEOUT
#define LOGSYS_LEVEL_ERROR
Definition: logsys.h:72
#define LEAVE
Definition: logsys.h:334
#define log_printf(level, format, args...)
Definition: logsys.h:332
#define LOGSYS_LEVEL_INFO
Definition: logsys.h:75
#define LOGSYS_LEVEL_CRIT
Definition: logsys.h:71
#define LOGSYS_LEVEL_NOTICE
Definition: logsys.h:74
#define LOGSYS_LEVEL_WARNING
Definition: logsys.h:73
#define LOGSYS_LEVEL_DEBUG
Definition: logsys.h:76
#define ENTER
Definition: logsys.h:333
void * user_data
Definition: sam.c:127
uint32_t quorate
Definition: sam.c:134
char * corosync_service_link_and_init(struct corosync_api_v1 *corosync_api, struct default_service *service)
Link and initialize a service.
Definition: service.c:117
nodestate_t state
uint32_t expected_votes
struct qb_list_head list
The corosync_api_v1 struct.
Definition: coroapi.h:225
int(* timer_add_duration)(unsigned long long nanoseconds_in_future, void *data, void(*timer_nf)(void *data), corosync_timer_handle_t *handle)
Definition: coroapi.h:229
int(* totem_mcast)(const struct iovec *iovec, unsigned int iov_len, unsigned int guarantee)
Definition: coroapi.h:279
void *(* ipc_private_data_get)(void *conn)
Definition: coroapi.h:256
void(* timer_delete)(corosync_timer_handle_t timer_handle)
Definition: coroapi.h:241
unsigned int(* totem_nodeid_get)(void)
Definition: coroapi.h:275
int(* ipc_dispatch_send)(void *conn, const void *msg, size_t mlen)
Definition: coroapi.h:263
int(* ipc_response_send)(void *conn, const void *msg, size_t mlen)
Definition: coroapi.h:258
void(* error_memory_failure)(void) __attribute__((noreturn))
Definition: coroapi.h:422
The corosync_exec_handler struct.
Definition: coroapi.h:475
void(* exec_handler_fn)(const void *msg, unsigned int nodeid)
Definition: coroapi.h:476
The corosync_lib_handler struct.
Definition: coroapi.h:467
void(* lib_handler_fn)(void *conn, const void *msg)
Definition: coroapi.h:468
The corosync_service_engine struct.
Definition: coroapi.h:490
const char * name
Definition: coroapi.h:491
const char * name
Definition: service.h:43
Structure passed as new_value and old_value in change callback.
Definition: icmap.h:91
The memb_ring_id struct.
Definition: coroapi.h:122
unsigned long long seq
Definition: coroapi.h:124
unsigned int nodeid
Definition: coroapi.h:123
unsigned char track_flags
uint64_t tracking_context
int tracking_enabled
struct qb_list_head list
struct qb_ipc_request_header header __attribute__((aligned(8)))
char newname[VOTEQUORUM_QDEVICE_MAX_NAME_LEN]
struct qb_ipc_request_header header __attribute__((aligned(8)))
char oldname[VOTEQUORUM_QDEVICE_MAX_NAME_LEN]
struct qb_ipc_request_header header __attribute__((aligned(8)))
char qdevice_name[VOTEQUORUM_QDEVICE_MAX_NAME_LEN]
struct qb_ipc_request_header header __attribute__((aligned(8)))
The req_lib_votequorum_getinfo struct.
The req_lib_votequorum_qdevice_master_wins struct.
char name[VOTEQUORUM_QDEVICE_MAX_NAME_LEN]
The req_lib_votequorum_qdevice_poll struct.
char name[VOTEQUORUM_QDEVICE_MAX_NAME_LEN]
The req_lib_votequorum_qdevice_register struct.
char name[VOTEQUORUM_QDEVICE_MAX_NAME_LEN]
The req_lib_votequorum_qdevice_unregister struct.
char name[VOTEQUORUM_QDEVICE_MAX_NAME_LEN]
The req_lib_votequorum_qdevice_update struct.
char newname[VOTEQUORUM_QDEVICE_MAX_NAME_LEN]
char oldname[VOTEQUORUM_QDEVICE_MAX_NAME_LEN]
The req_lib_votequorum_setexpected struct.
The req_lib_votequorum_setvotes struct.
The req_lib_votequorum_trackstart struct.
The res_lib_votequorum_expectedvotes_notification struct.
The res_lib_votequorum_getinfo struct.
char qdevice_name[VOTEQUORUM_QDEVICE_MAX_NAME_LEN]
The res_lib_votequorum_quorum_notification struct.
The res_lib_votequorum_status struct.
The votequorum_node struct.
#define swab32(x)
The swab32 macro.
Definition: swab.h:51
struct memb_ring_id ring_id
Definition: totemsrp.c:4
struct totem_message_header header
Definition: totemsrp.c:0
const char * get_state_dir(void)
Definition: util.c:174