Fixing a problem with pcp_detach_node, When graceful node detach is requested
authorMuhammad Usama <m.usama@gmail.com>
Tue, 7 Apr 2015 12:48:44 +0000 (17:48 +0500)
committerMuhammad Usama <m.usama@gmail.com>
Tue, 7 Apr 2015 12:53:27 +0000 (17:53 +0500)
gracefully detaching a node by pcp_detach_node should check if it is allowed to
process detach_node command on the particular node before blocking the incoming
connections and closing the existing connections.

Conflicts:
main.c

main.c
pcp_child.c
pool.h

diff --git a/main.c b/main.c
index ed9b50ebecbaf00d270431650021d801dcd93dfe..98b394d07b12d4691d93cbc3bfd1e81310cc1aa2 100644 (file)
--- a/main.c
+++ b/main.c
@@ -1317,8 +1317,21 @@ void notice_backend_error(int node_id)
        degenerate_backend_set(&n, 1);
 }
 
-/* notice backend connection error using SIGUSR1 */
-void degenerate_backend_set(int *node_id_set, int count)
+/*
+ * degenerate_backend_set_ex:
+ *
+ * The function signals/verifies the node down request.
+ * The request is then processed by failover function.
+ *
+ * node_id_set: array of node ids to be registered for NODE DOWN operation
+ * count:       number of elements in node_id_set array
+ * test_only:   When set, function only checks if NODE DOWN operation can be
+ *              executed on provided node ids and never registers the operation
+ *              request.
+ *              For test_only case function returs false as
+ *              soon as first non complient node in node_id_set is found
+ */
+bool degenerate_backend_set_ex(int *node_id_set, int count, bool test_only)
 {
        pid_t parent = getppid();
        int i;
@@ -1331,37 +1344,64 @@ void degenerate_backend_set(int *node_id_set, int count)
 
        if (pool_config->parallel_mode)
        {
-               return;
+               return false;
+       }
+       if (test_only == false)
+       {
+               POOL_SETMASK2(&BlockSig, &oldmask);
+               pool_semaphore_lock(REQUEST_INFO_SEM);
+               Req_info->kind = NODE_DOWN_REQUEST;
        }
 
-       POOL_SETMASK2(&BlockSig, &oldmask);
-       pool_semaphore_lock(REQUEST_INFO_SEM);
-       Req_info->kind = NODE_DOWN_REQUEST;
        for (i = 0; i < count; i++)
        {
                if (node_id_set[i] < 0 || node_id_set[i] >= MAX_NUM_BACKENDS ||
                        !VALID_BACKEND(node_id_set[i]))
                {
                        pool_log("degenerate_backend_set: node %d is not valid backend.", i);
+                       if (test_only)
+                               return false;
                        continue;
                }
 
                if (POOL_DISALLOW_TO_FAILOVER(BACKEND_INFO(node_id_set[i]).flag))
                {
                        pool_log("degenerate_backend_set: %d failover request from pid %d is canceld because failover is disallowed", node_id_set[i], getpid());
+                       if (test_only)
+                               return false;
                        continue;
                }
 
-               pool_log("degenerate_backend_set: %d fail over request from pid %d", node_id_set[i], getpid());
-               Req_info->node_id[i] = node_id_set[i];
+               if (test_only == false)
+               {
+                       pool_log("degenerate_backend_set: %d fail over request from pid %d", node_id_set[i], getpid());
+                       Req_info->node_id[i] = node_id_set[i];
+               }
                need_signal = true;
        }
 
        if (need_signal)
+       {
+               if (test_only)
+                       return true;
                kill(parent, SIGUSR1);
+       }
+       else if (test_only)
+               return false;
 
        pool_semaphore_unlock(REQUEST_INFO_SEM);
        POOL_SETMASK(&oldmask);
+
+       return need_signal;
+}
+
+/*
+ * wrapper over degenerate_backend_set_ex function to signal
+ * NODE down operation request
+ */
+void degenerate_backend_set(int *node_id_set, int count)
+{
+       degenerate_backend_set_ex(node_id_set, count, false);
 }
 
 /* send promote node request using SIGUSR1 */
index 1cf9d3f09418536727460db9bc187dd2ea99dba0..68f1ed85674c8fafaafef80c22935f025c245dac 100644 (file)
@@ -752,7 +752,7 @@ pcp_do_child(int unix_fd, int inet_fd, char *pcp_conf_file)
                        {
                                int node_id;
                                int wsize;
-                               char code[] = "CommandComplete";
+                               char *code;
                                bool gracefully;
 
                                if (tos == 'D')
@@ -762,12 +762,15 @@ pcp_do_child(int unix_fd, int inet_fd, char *pcp_conf_file)
 
                                node_id = atoi(buf);
                                pool_debug("pcp_child: detaching Node ID %d", node_id);
-                               pool_detach_node(node_id, gracefully);
+                               if (pool_detach_node(node_id, gracefully) == 0)
+                                       code = "CommandComplete";
+                               else
+                                       code = "CommandFailed";
 
                                pcp_write(frontend, "d", 1);
-                               wsize = htonl(sizeof(code) + sizeof(int));
+                               wsize = htonl(strlen(code) + 1 + sizeof(int));
                                pcp_write(frontend, &wsize, sizeof(int));
-                               pcp_write(frontend, code, sizeof(code));
+                               pcp_write(frontend, code, strlen(code) + 1);
                                if (pcp_flush(frontend) < 0)
                                {
                                        pool_error("pcp_child: pcp_flush() failed. reason: %s", strerror(errno));
@@ -1319,12 +1322,26 @@ static RETSIGTYPE reload_config_handler(int sig)
 /* Dedatch a node */
 static int pool_detach_node(int node_id, bool gracefully)
 {
+       int nRet = 0;
        if (!gracefully)
        {
-               notice_backend_error(node_id);  /* send failover request */
+               if (degenerate_backend_set_ex(&node_id, 1, false) == false)
+               {
+                       pool_error("pcp_child: processing detach node failed");
+                       return -1;
+               }
                return 0;
        }
-               
+
+       /* Check if the NODE DOWN can be executed on
+        * the given node id.
+        */
+       if (degenerate_backend_set_ex(&node_id, 1, true) == false)
+       {
+               pool_error("pcp_child: processing graceful detach node failed");
+               return -1;
+       }
+
        /*
         * Wait until all frontends exit
         */
@@ -1341,12 +1358,19 @@ static int pool_detach_node(int node_id, bool gracefully)
        /*
         * Now all frontends have gone. Let's do failover.
         */
-       notice_backend_error(node_id);          /* send failover request */
-
-       /*
-        * Wait for failover completed.
-        */
-       pcp_wakeup_request = 0;
+       if (degenerate_backend_set_ex(&node_id, 1, false) == false)
+       {
+               nRet = -1;
+               pcp_wakeup_request = 1;
+               pool_error("pcp_child: processing graceful detach node failed");
+       }
+       else
+       {
+               /*
+                * Wait for failover completed.
+                */
+               pcp_wakeup_request = 0;
+       }
 
        while (!pcp_wakeup_request)
        {
@@ -1361,7 +1385,7 @@ static int pool_detach_node(int node_id, bool gracefully)
         */
        finish_recovery();
 
-       return 0;
+       return nRet;
 }
 
 /* Promote a node */
diff --git a/pool.h b/pool.h
index e772d24543697ad268e2ea9a0c345f310c9fb4d0..c0242263fb4d1967dc5b09edcc4ee2ca5f4e8c6d 100644 (file)
--- a/pool.h
+++ b/pool.h
@@ -455,6 +455,7 @@ extern POOL_STATUS NoticeResponse(POOL_CONNECTION *frontend,
 
 extern void notice_backend_error(int node_id);
 extern void degenerate_backend_set(int *node_id_set, int count);
+extern bool degenerate_backend_set_ex(int *node_id_set, int count, bool test_only);
 extern void promote_backend(int node_id);
 extern void send_failback_request(int node_id);