From bab1aeb321ce3fa8d5ad927ba74b5413242fd904 Mon Sep 17 00:00:00 2001 From: Steve Singer Date: Tue, 29 Oct 2013 16:22:37 -0400 Subject: [PATCH] Additional changes for bug322. cloneNodePrepare() and many other places need to grab the central config lock. They heavily rely on checking things via "not found", which inherently has a race condition. Conflicts: src/slon/remote_worker.c note: removed src/slon/remote_worker.c changes from the cherry-picked patch they aren't actually part of 322 --- src/backend/slony1_funcs.sql | 289 +++++++++++++++++++++++++++++++---- 1 file changed, 257 insertions(+), 32 deletions(-) diff --git a/src/backend/slony1_funcs.sql b/src/backend/slony1_funcs.sql index 7588f003..9e6ebf67 100644 --- a/src/backend/slony1_funcs.sql +++ b/src/backend/slony1_funcs.sql @@ -808,6 +808,11 @@ as $$ declare v_old_row record; begin + -- ---- + -- Grab the central configuration lock + -- ---- + lock table @NAMESPACE@.sl_config_lock; + -- ---- -- Check if the node exists -- ---- @@ -854,6 +859,11 @@ declare v_local_node_id int4; v_node_row record; begin + -- ---- + -- Grab the central configuration lock + -- ---- + lock table @NAMESPACE@.sl_config_lock; + -- ---- -- Check that we are the node to activate and that we are -- currently disabled. @@ -897,6 +907,11 @@ declare v_node_row record; v_sub_row record; begin + -- ---- + -- Grab the central configuration lock + -- ---- + lock table @NAMESPACE@.sl_config_lock; + -- ---- -- Check that the node is inactive -- ---- @@ -1001,6 +1016,11 @@ declare v_node_row record; v_idx integer; begin + -- ---- + -- Grab the central configuration lock + -- ---- + lock table @NAMESPACE@.sl_config_lock; + -- ---- -- Check that this got called on a different node -- ---- @@ -1066,6 +1086,11 @@ as $$ declare v_tab_row record; begin + -- ---- + -- Grab the central configuration lock + -- ---- + lock table @NAMESPACE@.sl_config_lock; + -- ---- -- If the dropped node is a remote node, clean the configuration -- from all traces for it. @@ -1129,6 +1154,11 @@ declare v_row2 record; v_n int4; begin + -- ---- + -- Grab the central configuration lock + -- ---- + lock table @NAMESPACE@.sl_config_lock; + -- ---- -- All consistency checks first @@ -1211,6 +1241,11 @@ declare v_restart_required boolean; begin + -- ---- + -- Grab the central configuration lock + -- ---- + lock table @NAMESPACE@.sl_config_lock; + v_restart_required:=false; -- -- any nodes other than the backup receiving @@ -1289,6 +1324,11 @@ declare v_row record; v_new_event bigint; begin + -- ---- + -- Grab the central configuration lock + -- ---- + lock table @NAMESPACE@.sl_config_lock; + select * into v_row from @NAMESPACE@.sl_event where ev_origin = p_failed_node @@ -1331,8 +1371,13 @@ as $$ declare begin - perform @NAMESPACE@.failoverSet_int(p_failed_node, - p_backup_node,p_seq_no); + -- ---- + -- Grab the central configuration lock + -- ---- + lock table @NAMESPACE@.sl_config_lock; + + perform @NAMESPACE@.failoverSet_int(p_failed_node, + p_backup_node,p_seq_no); notify "_@CLUSTERNAME@_Restart"; return 0; @@ -1352,6 +1397,11 @@ declare v_last_sync int8; v_set int4; begin + -- ---- + -- Grab the central configuration lock + -- ---- + lock table @NAMESPACE@.sl_config_lock; + SELECT max(ev_seqno) into v_last_sync FROM @NAMESPACE@.sl_event where ev_origin=p_failed_node; if v_last_sync > p_last_seqno then @@ -1529,41 +1579,42 @@ as $$ declare v_dummy int4; begin - select 1 into v_dummy from @NAMESPACE@.sl_node where - no_id = p_no_id; + -- ---- + -- Grab the central configuration lock + -- ---- + lock table @NAMESPACE@.sl_config_lock; + + update @NAMESPACE@.sl_node set + no_active = np.no_active, + no_comment = np.no_comment, + no_failed = np.no_failed + from @NAMESPACE@.sl_node np + where np.no_id = p_no_provider + and sl_node.no_id = p_no_id; if not found then insert into @NAMESPACE@.sl_node (no_id, no_active, no_comment,no_failed) - select p_no_id, no_active, p_no_comment,no_failed + select p_no_id, no_active, p_no_comment, no_failed from @NAMESPACE@.sl_node where no_id = p_no_provider; - else - update @NAMESPACE@.sl_node set - no_active= np.no_active - ,no_comment=np.no_comment - ,no_failed=np.no_failed - from @NAMESPACE@.sl_node np - where np.no_id=p_no_provider - and sl_node.no_id=p_no_id; - end if; - select 1 into v_dummy from @NAMESPACE@.sl_path where - pa_server=p_no_provider and pa_client=p_no_id; - if not found then - insert into @NAMESPACE@.sl_path - (pa_server, pa_client, pa_conninfo, pa_connretry) - select pa_server, p_no_id, '', pa_connretry - from @NAMESPACE@.sl_path - where pa_client = p_no_provider; - end if; - select 1 into v_dummy from @NAMESPACE@.sl_path where - pa_server=p_no_id and pa_client=p_no_provider; - if not found then - insert into @NAMESPACE@.sl_path - (pa_server, pa_client, pa_conninfo, pa_connretry) - select p_no_id, pa_client, '', pa_connretry - from @NAMESPACE@.sl_path - where pa_server = p_no_provider; end if; + + insert into @NAMESPACE@.sl_path + (pa_server, pa_client, pa_conninfo, pa_connretry) + select pa_server, p_no_id, '', pa_connretry + from @NAMESPACE@.sl_path + where pa_client = p_no_provider + and (pa_server, p_no_id) not in (select pa_server, pa_client + from @NAMESPACE@.sl_path); + + insert into @NAMESPACE@.sl_path + (pa_server, pa_client, pa_conninfo, pa_connretry) + select p_no_id, pa_client, '', pa_connretry + from @NAMESPACE@.sl_path + where pa_server = p_no_provider + and (p_no_id, pa_client) not in (select pa_server, pa_client + from @NAMESPACE@.sl_path); + insert into @NAMESPACE@.sl_subscribe (sub_set, sub_provider, sub_receiver, sub_forward, sub_active) select sub_set, sub_provider, p_no_id, sub_forward, sub_active @@ -1596,6 +1647,11 @@ as $$ declare v_row record; begin + -- ---- + -- Grab the central configuration lock + -- ---- + lock table @NAMESPACE@.sl_config_lock; + perform "pg_catalog".setval('@NAMESPACE@.sl_local_node_id', p_no_id); perform @NAMESPACE@.resetSession(); for v_row in select sub_set from @NAMESPACE@.sl_subscribe @@ -1661,6 +1717,11 @@ as $$ declare v_dummy int4; begin + -- ---- + -- Grab the central configuration lock + -- ---- + lock table @NAMESPACE@.sl_config_lock; + -- ---- -- Check if the path already exists -- ---- @@ -1722,6 +1783,11 @@ as $$ declare v_row record; begin + -- ---- + -- Grab the central configuration lock + -- ---- + lock table @NAMESPACE@.sl_config_lock; + -- ---- -- There should be no existing subscriptions. Auto unsubscribing -- is considered too dangerous. @@ -1773,6 +1839,11 @@ create or replace function @NAMESPACE@.dropPath_int (p_pa_server int4, p_pa_clie returns int4 as $$ begin + -- ---- + -- Grab the central configuration lock + -- ---- + lock table @NAMESPACE@.sl_config_lock; + -- ---- -- Remove any dangling sl_listen entries with the server -- as provider and the client as receiver. This must have @@ -1837,6 +1908,11 @@ as $$ declare v_exists int4; begin + -- ---- + -- Grab the central configuration lock + -- ---- + lock table @NAMESPACE@.sl_config_lock; + select 1 into v_exists from @NAMESPACE@.sl_listen where li_origin = p_li_origin @@ -1909,6 +1985,11 @@ create or replace function @NAMESPACE@.dropListen_int (p_li_origin int4, p_li_pr returns int4 as $$ begin + -- ---- + -- Grab the central configuration lock + -- ---- + lock table @NAMESPACE@.sl_config_lock; + delete from @NAMESPACE@.sl_listen where li_origin = p_li_origin and li_provider = p_li_provider @@ -1938,6 +2019,11 @@ as $$ declare v_local_node_id int4; begin + -- ---- + -- Grab the central configuration lock + -- ---- + lock table @NAMESPACE@.sl_config_lock; + v_local_node_id := @NAMESPACE@.getLocalNodeId('_@CLUSTERNAME@'); insert into @NAMESPACE@.sl_set @@ -1962,6 +2048,11 @@ as $$ declare v_dummy int4; begin + -- ---- + -- Grab the central configuration lock + -- ---- + lock table @NAMESPACE@.sl_config_lock; + select 1 into v_dummy from @NAMESPACE@.sl_set where set_id = p_set_id @@ -2007,6 +2098,11 @@ declare v_set_row record; v_tab_row record; begin + -- ---- + -- Grab the central configuration lock + -- ---- + lock table @NAMESPACE@.sl_config_lock; + -- ---- -- Check that the set exists and that we are the origin -- and that it is not already locked. @@ -2076,6 +2172,11 @@ declare v_set_row record; v_tab_row record; begin + -- ---- + -- Grab the central configuration lock + -- ---- + lock table @NAMESPACE@.sl_config_lock; + -- ---- -- Check that the set exists and that we are the origin -- and that it is not already locked. @@ -2140,6 +2241,11 @@ declare v_sync_seqno int8; v_lv_row record; begin + -- ---- + -- Grab the central configuration lock + -- ---- + lock table @NAMESPACE@.sl_config_lock; + -- ---- -- Check that the set is locked and that this locking -- happened long enough ago. @@ -2232,6 +2338,11 @@ declare v_sub_next int4; v_last_sync int8; begin + -- ---- + -- Grab the central configuration lock + -- ---- + lock table @NAMESPACE@.sl_config_lock; + -- ---- -- Get our local node ID -- ---- @@ -2405,6 +2516,11 @@ as $$ declare v_origin int4; begin + -- ---- + -- Grab the central configuration lock + -- ---- + lock table @NAMESPACE@.sl_config_lock; + -- ---- -- Check that the set exists and originates here -- ---- @@ -2440,6 +2556,11 @@ as $$ declare v_tab_row record; begin + -- ---- + -- Grab the central configuration lock + -- ---- + lock table @NAMESPACE@.sl_config_lock; + -- ---- -- Restore all tables original triggers and rules and remove -- our replication stuff. @@ -2491,6 +2612,11 @@ declare v_origin int4; in_progress boolean; begin + -- ---- + -- Grab the central configuration lock + -- ---- + lock table @NAMESPACE@.sl_config_lock; + -- ---- -- Check that both sets exist and originate here -- ---- @@ -2596,6 +2722,11 @@ create or replace function @NAMESPACE@.mergeSet_int (p_set_id int4, p_add_id int returns int4 as $$ begin + -- ---- + -- Grab the central configuration lock + -- ---- + lock table @NAMESPACE@.sl_config_lock; + update @NAMESPACE@.sl_sequence set seq_set = p_set_id where seq_set = p_add_id; @@ -2626,6 +2757,11 @@ as $$ declare v_set_origin int4; begin + -- ---- + -- Grab the central configuration lock + -- ---- + lock table @NAMESPACE@.sl_config_lock; + -- ---- -- Check that we are the origin of the set -- ---- @@ -2682,6 +2818,11 @@ declare v_pkcand_nn boolean; v_prec record; begin + -- ---- + -- Grab the central configuration lock + -- ---- + lock table @NAMESPACE@.sl_config_lock; + -- ---- -- For sets with a remote origin, check that we are subscribed -- to that set. Otherwise we ignore the table because it might @@ -2790,6 +2931,11 @@ declare v_set_id int4; v_set_origin int4; begin + -- ---- + -- Grab the central configuration lock + -- ---- + lock table @NAMESPACE@.sl_config_lock; + -- ---- -- Determine the set_id -- ---- @@ -2843,6 +2989,11 @@ declare v_sub_provider int4; v_tab_reloid oid; begin + -- ---- + -- Grab the central configuration lock + -- ---- + lock table @NAMESPACE@.sl_config_lock; + -- ---- -- Determine the set_id -- ---- @@ -2902,6 +3053,11 @@ as $$ declare v_set_origin int4; begin + -- ---- + -- Grab the central configuration lock + -- ---- + lock table @NAMESPACE@.sl_config_lock; + -- ---- -- Check that we are the origin of the set -- ---- @@ -2955,6 +3111,11 @@ declare v_seq_nspname name; v_sync_row record; begin + -- ---- + -- Grab the central configuration lock + -- ---- + lock table @NAMESPACE@.sl_config_lock; + -- ---- -- For sets with a remote origin, check that we are subscribed -- to that set. Otherwise we ignore the sequence because it might @@ -3047,6 +3208,11 @@ declare v_set_id int4; v_set_origin int4; begin + -- ---- + -- Grab the central configuration lock + -- ---- + lock table @NAMESPACE@.sl_config_lock; + -- ---- -- Determine set id for this sequence -- ---- @@ -3102,6 +3268,11 @@ declare v_relkind char; v_sync_row record; begin + -- ---- + -- Grab the central configuration lock + -- ---- + lock table @NAMESPACE@.sl_config_lock; + -- ---- -- Determine set id for this sequence -- ---- @@ -3166,6 +3337,11 @@ declare v_old_set_id int4; v_origin int4; begin + -- ---- + -- Grab the central configuration lock + -- ---- + lock table @NAMESPACE@.sl_config_lock; + -- ---- -- Get the tables current set -- ---- @@ -3249,6 +3425,11 @@ create or replace function @NAMESPACE@.setMoveTable_int (p_tab_id int4, p_new_se returns int4 as $$ begin + -- ---- + -- Grab the central configuration lock + -- ---- + lock table @NAMESPACE@.sl_config_lock; + -- ---- -- Move the table to the new set -- ---- @@ -3275,6 +3456,11 @@ declare v_old_set_id int4; v_origin int4; begin + -- ---- + -- Grab the central configuration lock + -- ---- + lock table @NAMESPACE@.sl_config_lock; + -- ---- -- Get the sequences current set -- ---- @@ -3357,6 +3543,11 @@ create or replace function @NAMESPACE@.setMoveSequence_int (p_seq_id int4, p_new returns int4 as $$ begin + -- ---- + -- Grab the central configuration lock + -- ---- + lock table @NAMESPACE@.sl_config_lock; + -- ---- -- Move the sequence to the new set -- ---- @@ -3382,7 +3573,6 @@ declare v_fqname text; v_found integer; begin - -- ---- -- Get the sequences fully qualified name -- ---- @@ -3823,6 +4013,11 @@ declare v_missing_sets text; v_ev_seqno bigint; begin + -- ---- + -- Grab the central configuration lock + -- ---- + lock table @NAMESPACE@.sl_config_lock; + -- -- Check that the receiver exists -- @@ -3913,6 +4108,11 @@ declare v_ev_seqno2 int8; v_rec record; begin + -- ---- + -- Grab the central configuration lock + -- ---- + lock table @NAMESPACE@.sl_config_lock; + -- -- Check that the receiver exists -- @@ -4054,6 +4254,11 @@ declare v_sub_row record; v_seq_id bigint; begin + -- ---- + -- Grab the central configuration lock + -- ---- + lock table @NAMESPACE@.sl_config_lock; + -- ---- -- Lookup the set origin -- ---- @@ -4188,6 +4393,11 @@ as $$ declare v_tab_row record; begin + -- ---- + -- Grab the central configuration lock + -- ---- + lock table @NAMESPACE@.sl_config_lock; + -- ---- -- Check that this is called on the receiver node -- ---- @@ -4281,6 +4491,11 @@ returns int4 as $$ declare begin + -- ---- + -- Grab the central configuration lock + -- ---- + lock table @NAMESPACE@.sl_config_lock; + -- ---- -- All the real work is done before event generation on the -- subscriber. @@ -4343,6 +4558,11 @@ as $$ declare v_n int4; begin + -- ---- + -- Grab the central configuration lock + -- ---- + lock table @NAMESPACE@.sl_config_lock; + -- ---- -- The real work is done in the replication engine. All -- we have to do here is remembering that it happened. @@ -4748,6 +4968,11 @@ as $$ declare v_row record; begin + -- ---- + -- Grab the central configuration lock + -- ---- + lock table @NAMESPACE@.sl_config_lock; + -- First remove the entire configuration delete from @NAMESPACE@.sl_listen; -- 2.39.5