Fix for [pgpool-hackers: 1501] kind does not match error
authorMuhammad Usama <m.usama@gmail.com>
Thu, 14 Jul 2016 13:50:51 +0000 (18:50 +0500)
committerMuhammad Usama <m.usama@gmail.com>
Fri, 15 Jul 2016 19:34:06 +0000 (00:34 +0500)
pgpool throws ".. kind does not match.." error message when all the attached
backend nodes do not return the same response to the query. Although this error
message can be a symptom of the backend node sync issue in most cases, but in
case when the message kind of backend nodes differs because one of the backend
returned the notice response while the other returned some other kind then that
case should not be considered as an error case.

Consider the scenario where a pgpool is connected to multiple backend nodes and
is expecting to receive "[C] command complete" message from all nodes.
But while processing the query one of the backend also produced an extra warning message.
... WARNING: database "testdb" must be vacuum within 11000000 transaction

Although that the query was eventually successful on all backends, but as soon
as pgpool reads this WARNING message from one backend it will throw an error
ERROR: read_kind_from_backend: 1 th kind N does not match with master or majority connection kind C

But if pgpool would have kept reading from the backend, It would have also got
the expected command complete message after that warning notice

The solution for this is to keep forwarding the notice/log messages to the
frontend while reading the backend response in read_kind_from_backend() function
until the ERROR or expected message is received.

Though there is also one slight drawback of this approach, If the backends are
configured to forward all log messages to the client application, then the client
applications will get multiple log messages for the queries that are sent to
more than one node by pgpool.

test=# begin;
LOG:  statement: begin;
LOG:  statement: begin;
BEGIN

But since it is a very rare scenario, and just a minor inconvenience so it's not
holding us back to commit this solution until a better one arrives.

pool_process_query.c

index d4507e5a2c9597b9aded03a63e13165b05b0654b..f8780001918ed46fdbafa0302e81fbab7c140df0 100644 (file)
@@ -80,7 +80,7 @@ static bool has_lock_target(POOL_CONNECTION *frontend, POOL_CONNECTION_POOL *bac
 static POOL_STATUS insert_oid_into_insert_lock(POOL_CONNECTION *frontend, POOL_CONNECTION_POOL *backend, char* table);
 static POOL_STATUS read_packets_and_process(POOL_CONNECTION *frontend, POOL_CONNECTION_POOL *backend, int reset_request, int *state, short *num_fields, bool *cont);
 static bool is_all_slaves_command_complete(unsigned char *kind_list, int num_backends, int master);
-
+static POOL_STATUS pool_process_notice_message_from_one_backend(POOL_CONNECTION *frontend, POOL_CONNECTION_POOL *backend, int backend_idx, char kind);
 /* timeout sec for pool_check_fd */
 static int timeoutsec;
 
@@ -3843,31 +3843,35 @@ POOL_STATUS read_kind_from_backend(POOL_CONNECTION *frontend, POOL_CONNECTION_PO
                                pool_debug("read_kind_from_backend: kind: %c from %d th backend", kind, i);
 
                                /*
-                                * Read and discard parameter status
+                                * Read and discard parameter status and notice messages
                                 */
-                               if (kind != 'S')
-                               {
-                                       break;
-                               }
-
-                               if (pool_read(CONNECTION(backend, i), &len, sizeof(len)) < 0)
+                               if (kind == 'N')
                                {
-                                       pool_error("read_kind_from_backend: failed to read parameter status packet length from %d th backend", i);
-                                       return POOL_ERROR;
+                                       pool_debug("read_kind_from_backend: received log message from backend %d while reading packet kind",i);
+                                       if (pool_process_notice_message_from_one_backend(frontend, backend, i, kind) != POOL_CONTINUE)
+                                               return POOL_ERROR;
                                }
-                               len = htonl(len) - 4;
-                               p = pool_read2(CONNECTION(backend, i), len);
-                               if (p)
+                               else if (kind == 'S')
                                {
-                                       value = p + strlen(p) + 1;
-                                       pool_debug("read_kind_from_backend: parameter name: %s value: %s", p, value);
-                                       if (IS_MASTER_NODE_ID(i))
-                                               pool_add_param(&CONNECTION(backend, i)->params, p, value);
+                                       if (pool_read(CONNECTION(backend, i), &len, sizeof(len)) < 0)
+                                       {
+                                               pool_error("read_kind_from_backend: failed to read parameter status packet length from %d th backend", i);
+                                               return POOL_ERROR;
+                                       }
+                                       len = htonl(len) - 4;
+                                       p = pool_read2(CONNECTION(backend, i), len);
+                                       if (p)
+                                       {
+                                               value = p + strlen(p) + 1;
+                                               pool_debug("read_kind_from_backend: parameter name: %s value: %s", p, value);
+                                               if (IS_MASTER_NODE_ID(i))
+                                                       pool_add_param(&CONNECTION(backend, i)->params, p, value);
+                                       }
+                                       else
+                                               pool_error("read_kind_from_backend: failed to read parameter status packet from %d th backend", i);
                                }
-                               else
-                                       pool_error("read_kind_from_backend: failed to read parameter status packet from %d th backend", i);
 
-                       } while (kind == 'S');
+                       } while (kind == 'S' || kind == 'N' );
 
 #ifdef DEALLOCATE_ERROR_TEST
                        /*
@@ -4641,6 +4645,102 @@ static int detect_error(POOL_CONNECTION *backend, char *error_code, int major, c
        return is_error;
 }
 
+/*
+ * The function forwards the NOTICE mesaage received from one backend
+ * to the frontend and also puts the human readable message to the
+ * pgpool log
+ */
+
+static POOL_STATUS pool_process_notice_message_from_one_backend(POOL_CONNECTION *frontend, POOL_CONNECTION_POOL *backend, int backend_idx, char kind)
+{
+       int major = MAJOR(backend);
+       POOL_CONNECTION *backend_conn = CONNECTION(backend, backend_idx);
+
+       if (kind != 'N')
+               return POOL_ERROR;
+
+       /* read actual message */
+       if (major == PROTO_MAJOR_V3)
+       {
+               char *e;
+               int len, datalen;
+               char *buff;
+               char *errorSev = NULL;
+               char *errorMsg = NULL;
+
+               if (pool_read(backend_conn, &datalen, sizeof(datalen)) < 0)
+               {
+                       pool_error("pool_process_notice_message_from_one_backend: failed to read data length from %d th backend", backend_idx);
+                       return POOL_ERROR;
+               }
+
+               len = ntohl(datalen) - 4;
+
+               if (len <= 0 )
+                       return POOL_ERROR;
+
+               buff = malloc(len);
+               if (!buff)
+               {
+                       pool_error("pool_process_notice_message_from_one_backend: malloc failed");
+                       return POOL_ERROR;
+               }
+
+               if (pool_read(backend_conn, buff, len) < 0)
+               {
+                       pool_error("pool_process_notice_message_from_one_backend: failed to read data from %d th backend", backend_idx);
+                       return POOL_ERROR;
+               }
+
+               e = buff;
+
+               while (*e)
+               {
+                       char tok = *e;
+                       e++;
+                       if(*e == 0)
+                               break;
+                       if (tok == 'M')
+                               errorMsg = e;
+                       else if(tok == 'S')
+                               errorSev = e;
+                       else
+                               e += strlen(e) + 1;
+
+                       if(errorSev && errorMsg) /* we have all what we need */
+                               break;
+               }
+
+               /* produce a pgpool log entry */
+               pool_log("backend [%d]: %s: %s",backend_idx,errorSev,errorMsg);
+               /* forward it to the frontend */
+               pool_write(frontend, &kind, 1);
+               pool_write(frontend, &datalen, sizeof(datalen));
+               if (pool_write_and_flush(frontend, buff, len) < 0)
+               {
+                       free(buff);
+                       return POOL_END;
+               }
+               free(buff);
+       }
+       else /* Old Protocol */
+       {
+               int len = 0;
+               char *str = pool_read_string(backend_conn, &len, 0);
+
+               if (str == NULL || len <= 0)
+                       return POOL_END;
+
+               /* produce a pgpool log entry */
+               pool_log("backend [%d]: NOTICE: %s",backend_idx,str);
+               /* forward it to the frontend */
+               pool_write(frontend, &kind, 1);
+               if (pool_write_and_flush(frontend, str, len) < 0)
+                       return POOL_END;
+       }
+       return POOL_CONTINUE;
+}
+
 /*
  * pool_extract_error_message: Extract human readable message from
  * ERROR/NOTICE reponse packet and return it. If read_kind is true,