From 0564864e7bb8505d2408ef47a9e8e112b937afe3 Mon Sep 17 00:00:00 2001 From: Tatsuo Ishii Date: Thu, 4 Apr 2024 13:54:34 +0900 Subject: [PATCH] Fix assorted causes of segmentation fault. It is reported that pgpool and its child process segfault in certain cases when failover involved. In pgpool main get_query_result (called from find_primary_node) crashed. do_query(slots[backend_id]->con, query, res, PROTO_MAJOR_V3); It seems slots[0] is NULL here. slots[0] is created by make_persistent_db_connection_noerror() but it failed with log message: "find_primary_node: make_persistent_db_connection_noerror failed on node 0". Note that at the time when make_persistent_db_connection_noerror() is called, VALID_BACKEND reported that node 0 is up. This means that failover is ongoing and the node status used by VALID_BACKEND did not catch up. As a result get_query_user is called with slots[0] = NULL, which caused the segfault. Fix is, check slots entry before calling get_query_result. Also health check has an issue with connection "slot" memory. It is managed by HealthCheckMemoryContext. slot is the pointer to the memory. When elog(ERROR) is raised, pgpool long jumps and resets the memory context. Thus, slot remains as a pointer to freed memory. To fix this, always set NULL to slot right after the HealthCheckMemoryContext call. Similar issue is found with streaming replication check too and is also fixed in this commit. Problem reported and analyzed: Emond Papegaaij Backpatch-through: v4.4 Discussion: [pgpool-general: 9070] Re: Segmentation after switchover https://www.pgpool.net/pipermail/pgpool-general/2024-April/009131.html --- src/main/health_check.c | 8 +++++++- src/main/pgpool_main.c | 4 ++-- src/streaming_replication/pool_worker_child.c | 6 ++++++ 3 files changed, 15 insertions(+), 3 deletions(-) diff --git a/src/main/health_check.c b/src/main/health_check.c index 17a94d86f..fec0e6dbc 100644 --- a/src/main/health_check.c +++ b/src/main/health_check.c @@ -5,7 +5,7 @@ * pgpool: a language independent connection pool server for PostgreSQL * written by Tatsuo Ishii * - * Copyright (c) 2003-2022 PgPool Global Development Group + * Copyright (c) 2003-2024 PgPool Global Development Group * * Permission to use, copy, modify, and distribute this software and * its documentation for any purpose and without fee is hereby @@ -172,6 +172,12 @@ do_health_check_child(int *node_id) { MemoryContextSwitchTo(HealthCheckMemoryContext); MemoryContextResetAndDeleteChildren(HealthCheckMemoryContext); + /* + * Since HealthCheckMemoryContext is used for "slot", we need to clear it + * so that new slot is allocated later on. + */ + slot = NULL; + bool skipped = false; CHECK_REQUEST; diff --git a/src/main/pgpool_main.c b/src/main/pgpool_main.c index edd8a3094..ea4c489e5 100644 --- a/src/main/pgpool_main.c +++ b/src/main/pgpool_main.c @@ -5,7 +5,7 @@ * pgpool: a language independent connection pool server for PostgreSQL * written by Tatsuo Ishii * - * Copyright (c) 2003-2023 PgPool Global Development Group + * Copyright (c) 2003-2024 PgPool Global Development Group * * Permission to use, copy, modify, and distribute this software and * its documentation for any purpose and without fee is hereby @@ -2601,7 +2601,7 @@ verify_backend_node_status(POOL_CONNECTION_POOL_SLOT * *slots) for (j = 0; j < NUM_BACKENDS; j++) { - if (pool_node_status[j] == POOL_NODE_STATUS_STANDBY) + if (pool_node_status[j] == POOL_NODE_STATUS_STANDBY && slots[j]) { ereport(DEBUG1, (errmsg("verify_backend_node_status: %d is standby", j))); diff --git a/src/streaming_replication/pool_worker_child.c b/src/streaming_replication/pool_worker_child.c index 3b1d9cff6..a9695fb83 100644 --- a/src/streaming_replication/pool_worker_child.c +++ b/src/streaming_replication/pool_worker_child.c @@ -162,6 +162,12 @@ do_worker_child(void) { MemoryContextSwitchTo(WorkerMemoryContext); MemoryContextResetAndDeleteChildren(WorkerMemoryContext); + /* + * Since WorkerMemoryContext is used for "slots", we need to clear it + * so that new slots are allocated later on. + */ + memset(slots, 0, sizeof(slots)); + bool watchdog_leader; /* true if I am the watchdog leader */ -- 2.39.5