bdr: Make waitforstart reliably notice when BDR has come up

author Craig Ringer <craig@2ndquadrant.com>

Thu, 18 Dec 2014 10:20:22 +0000 (18:20 +0800)

committer Craig Ringer <craig@2ndquadrant.com>

Wed, 4 Feb 2015 12:24:27 +0000 (23:24 +1100)
author Craig Ringer <craig@2ndquadrant.com>
Thu, 18 Dec 2014 10:20:22 +0000 (18:20 +0800)
committer Craig Ringer <craig@2ndquadrant.com>
Wed, 4 Feb 2015 12:24:27 +0000 (23:24 +1100)
diff --git a/expected/isolation/waitforstart.out b/expected/isolation/waitforstart.out

index 6979d65db889acdf11c3c9bc5598b1bc4080b0fd..8fcfe0210bb7044ac6809e6d0f6b0af3597c4c1a 100644 (file)
--- a/expected/isolation/waitforstart.out
+++ b/expected/isolation/waitforstart.out
@@ -2,10 +2,6 @@ Parsed test spec with 1 sessions
  
  starting permutation: wait
  step wait: 
-   -- pg_xlog_wait_remote_apply isn't good enough alone
-   -- as it doesn't permit us to say how many nodes must be present.
-   -- It'll succeed if there are zero nodes. So we first have to wait
-   -- for enough replication connections.
     DO $$
     DECLARE
         nodecount integer := 0;
@@ -14,6 +10,8 @@ step wait:
         WHILE nodecount <> 6
         LOOP
             PERFORM pg_sleep(1);
+           PERFORM pg_stat_clear_snapshot();
+           -- Now find out how many walsenders are running
             nodecount := (SELECT count(*)
                           FROM pg_catalog.pg_stat_replication);
             RAISE NOTICE 'Found % nodes',nodecount;
diff --git a/specs/isolation/waitforstart.spec b/specs/isolation/waitforstart.spec

index 539df8f0083ba80c594659ffdf5345eece2b2588..b5ef85c75ee285e39413ff7e84189146845a37be 100644 (file)
--- a/specs/isolation/waitforstart.spec
+++ b/specs/isolation/waitforstart.spec
@@ -3,12 +3,19 @@ conninfo "node2" "dbname=node2"
  conninfo "node3" "dbname=node3"
  
  session "snode1"
+
+# pg_xlog_wait_remote_apply isn't good enough alone as it doesn't permit us to
+# say how many nodes must be present.  It'll succeed if there are zero nodes.
+# So we first have to wait for enough replication connections.
+#
+# The reason why we call pg_stat_clear_snapshot() is that pg_stat_activity is
+# cached when first accessed so repeat access within the same transaction sees
+# unchanging results. As pg_stat_replication joins pg_stat_get_wal_senders() on
+# pg_stat_activity, new walsenders are filtered out by the join unles we force
+# a refresh of pg_stat_activity.
+
  step "wait"
  {
-   -- pg_xlog_wait_remote_apply isn't good enough alone
-   -- as it doesn't permit us to say how many nodes must be present.
-   -- It'll succeed if there are zero nodes. So we first have to wait
-   -- for enough replication connections.
     DO $$
     DECLARE
         nodecount integer := 0;
@@ -17,6 +24,8 @@ step "wait"
         WHILE nodecount <> 6
         LOOP
             PERFORM pg_sleep(1);
+           PERFORM pg_stat_clear_snapshot();
+           -- Now find out how many walsenders are running
             nodecount := (SELECT count(*)
                           FROM pg_catalog.pg_stat_replication);
             RAISE NOTICE 'Found % nodes',nodecount;
author	Craig Ringer <craig@2ndquadrant.com>
	Thu, 18 Dec 2014 10:20:22 +0000 (18:20 +0800)
committer	Craig Ringer <craig@2ndquadrant.com>
	Wed, 4 Feb 2015 12:24:27 +0000 (23:24 +1100)
expected/isolation/waitforstart.out		patch \| blob \| blame \| history
specs/isolation/waitforstart.spec		patch \| blob \| blame \| history