--- /dev/null
+
+coordinator.includeFile('disorder/tests/FailNodeTest.js');
+
+SiteFailover = function(coordinator, testResults) {
+ Failover.call(this, coordinator, testResults);
+ this.testDescription='Test the FAILOVER command. This test will try FAILOVER'
+ +' with an entire site failing';
+ this.compareQueryList.push(['select i_id,comments from disorder.do_item_review order by i_id','i_id']);
+
+}
+SiteFailover.prototype = new Failover();
+SiteFailover.prototype.constructor = SiteFailover;
+
+SiteFailover.prototype.getNodeCount = function() {
+ return 6;
+}
+
+SiteFailover.prototype.runTest = function() {
+ this.coordinator.log("SiteFailover.prototype.runTest - begin");
+ this.testResults.newGroup("Multinode Fail Over Test");
+ basicTest.prepareDb(['db6']);
+ this.setupReplication();
+
+ /**
+ *
+ * set 1
+ *
+ * 1 --------> 2
+ * /\ /\
+ * 3 4 5 6
+ *
+ * set 2
+ * 3-----------> 5
+ * /\ /\
+ * 1 4 2 6
+ *
+ *
+ * 3/2 has a path because 3 is the local failover target for node 1
+ *
+ * There is a path between 1/5 because 1 is the failover target for set 2 if node 3 fails.
+ *
+ * There is no path between 4/6 , or 1/6 or even 3/6.
+ *
+ */
+
+ /**
+ * Start the slons.
+ */
+ this.slonArray = [];
+ for ( var idx = 1; idx <= this.getNodeCount(); idx++) {
+ this.slonArray[idx - 1] = this.coordinator.createSlonLauncher('db' + idx);
+ this.slonArray[idx - 1].run();
+ }
+ var slonikScript='';
+ /**
+ * add in paths between nodes 1-->4 and 2-->5
+ * also add in node 6
+ */
+ slonikScript += 'store path(server=1,client=4,conninfo=@CONNINFO1 );\n';
+ slonikScript += 'store path(server=4,client=1,conninfo=@CONNINFO4 );\n';
+ slonikScript += 'store path(server=2,client=5,conninfo=@CONNINFO2 );\n';
+ slonikScript += 'store path(server=5,client=2,conninfo=@CONNINFO5 );\n';
+
+
+ slonikScript += 'store path(server=2,client=6,conninfo=@CONNINFO2 );\n';
+ slonikScript += 'store path(server=6,client=2,conninfo=@CONNINFO6 );\n';
+ slonikScript += 'store path(server=5,client=6,conninfo=@CONNINFO5 );\n';
+ slonikScript += 'store path(server=6,client=5,conninfo=@CONNINFO6 );\n';
+ var slonikPreamble = this.getSlonikPreamble();
+ var slonik=this.coordinator.createSlonik('failover',slonikPreamble,slonikScript);
+ slonik.run();
+ this.coordinator.join(slonik);
+ this.testResults.assertCheck('add extra paths passes',slonik.getReturnCode(),0);
+
+ /**
+ * Add some tables to replication.
+ *
+ */
+ this.addTables();
+
+
+ /**
+ * create the second set.
+ */
+ this.createSecondSet(3);
+
+ /**
+ * Subscribe the first set.
+ */
+ this.subscribeSet(1,1, 1, [ 2, 3 ,4]);
+ this.subscribeSet(1,1, 2, [ 5,6 ]);
+ /**
+ * subscribe the second set.
+ */
+ this.subscribeSet(2,3,3,[1,4,5]);
+ this.subscribeSet(2,3,5,[2,6]);
+ this.slonikSync(1,1);
+ var load = this.generateLoad(1);
+ java.lang.Thread.sleep(10*1000);
+ this.slonikSync(1,1);
+ this.populateReviewTable(2);
+ /**
+ * make sure the _review data makes it to
+ * all slaves, then let some SYNC events get
+ * genereated. Next we FAILOVER.
+ */
+ this.slonikSync(2,2);
+ java.lang.Thread.sleep(10*1000);
+ this.failover(1,2,3,5,4);
+ load.stop();
+ this.coordinator.join(load);
+
+ /**
+ * Now drop the nodes
+ */
+ var slonikPreamble = this.getSlonikPreamble();
+ var slonikScript = 'echo \'SiteFailover.drop nodes\';\n';
+ slonikScript+= 'drop node(id=\'1,3,4\',event node = 2);\nuninstall node(id=1);\nuninstall node(id=3);\uninstall node(id=3);\n';
+
+ var slonik=this.coordinator.createSlonik('drop node',slonikPreamble,slonikScript);
+ slonik.run();
+ this.coordinator.join(slonik);
+ this.testResults.assertCheck('drop 3 nodes passes',slonik.getReturnCode(),0);
+
+ load = this.generateLoad(2);
+ java.lang.Thread.sleep(10*1000);
+ this.slonikSync(1,2);
+ this.slonikSync(2,5);
+
+
+ for ( var idx = 1; idx <= this.getNodeCount(); idx++) {
+ this.slonArray[idx - 1].stop();
+ this.coordinator.join(this.slonArray[idx - 1]);
+ }
+ this.compareDb('db2','db5');
+ this.compareDb('db2','db6');
+ this.dropDb(['db6']);
+
+}
+
+SiteFailover.prototype.failover=function(originA,backupA,originB,backupB,receiverC)
+{
+ var slonikPreamble = 'cluster name=disorder_replica;\n';
+ var idx=2
+ slonikPreamble += 'node ' + idx + ' admin conninfo=\'dbname=$database.db'
+ + idx + '.dbname host=$database.db' + idx
+ + '.host '
+ + ' port=$database.db' + idx + '.port'
+ +' user=$database.db' + idx
+ + '.user.slony password=$database.db' + idx + '.password.slony\';\n';
+
+ var idx=5
+ slonikPreamble += 'node ' + idx + ' admin conninfo=\'dbname=$database.db'
+ + idx + '.dbname host=$database.db' + idx
+ + '.host '
+ + ' port=$database.db' + idx + '.port'
+ +' user=$database.db' + idx
+ + '.user.slony password=$database.db' + idx + '.password.slony\';\n';
+
+ var idx=6
+ slonikPreamble += 'node ' + idx + ' admin conninfo=\'dbname=$database.db'
+ + idx + '.dbname host=$database.db' + idx
+ + '.host '
+ + ' port=$database.db' + idx + '.port'
+ +' user=$database.db' + idx
+ + '.user.slony password=$database.db' + idx + '.password.slony\';\n';
+
+
+ /**
+ * first try failing over when we don't mention node 4.
+ * This should fail.
+ */
+ var slonikScript = 'echo \'SiteFailover.prototype.failover\';\n';
+ slonikScript += 'FAILOVER( node=(id=' + originA + ',backup node=' + backupA +')'
+ + ', node=(id=' + originB + ',backup node=' + backupB + '));\n';
+ var slonik=this.coordinator.createSlonik('failover',slonikPreamble,slonikScript);
+ slonik.run();
+ this.coordinator.join(slonik);
+ this.testResults.assertCheck('failover (should not work)',slonik.getReturnCode()==0,false);
+
+
+ /**
+ * try again, everything should work
+ */
+
+ slonikScript = 'echo \'SiteFailover.prototype.failover\';\n';
+ slonikScript += 'FAILOVER( node=(id=' + originA + ',backup node=' + backupA +')'
+ + ', node=(id=' + originB + ',backup node=' + backupB
+ + '), node=(id='+receiverC + ', backup node=' + backupA + ') );\n';
+ var slonik=this.coordinator.createSlonik('failover',slonikPreamble,slonikScript);
+ slonik.run();
+ this.coordinator.join(slonik);
+ this.testResults.assertCheck('failover should work',slonik.getReturnCode(),0);
+
+
+}
+
+
-- Initiate a failover. This function must be called on all nodes
-- and then waited for the restart of all node daemons.
-- ----------------------------------------------------------------------
-create or replace function @NAMESPACE@.failedNode(p_failed_node int4, p_backup_node int4)
+create or replace function @NAMESPACE@.failedNode(p_failed_node int4, p_backup_node int4,p_failed_nodes integer[])
returns int4
as $$
declare
update @NAMESPACE@.sl_subscribe set
sub_provider=p_backup_node
where sub_provider=p_failed_node
- and sub_receiver<>p_backup_node;
+ and sub_receiver<>p_backup_node
+ and sub_receiver <> ALL (p_failed_nodes);
-- ----
-- Terminate all connections of the failed node the hard way
if not v_failed then
- update @NAMESPACE@.sl_node set no_failed=true where no_id=p_failed_node
+ update @NAMESPACE@.sl_node set no_failed=true where no_id = ANY (p_failed_nodes)
and no_failed=false;
-- Rewrite sl_listen table
perform @NAMESPACE@.RebuildListenEntries();
return p_failed_node;
end;
$$ language plpgsql;
-comment on function @NAMESPACE@.failedNode(p_failed_node int4, p_backup_node int4) is
+comment on function @NAMESPACE@.failedNode(p_failed_node int4, p_backup_node int4,p_failed_nodes integer[]) is
'Initiate failover from failed_node to backup_node. This function must be called on all nodes,
and then waited for the restart of all node daemons.';
-- On the node that has the highest sequence number of the failed node,
-- fake the FAILED_NODE event.
-- ----------------------------------------------------------------------
-create or replace function @NAMESPACE@.failedNode2 (p_failed_node int4, p_backup_node int4, p_ev_seqno int8)
+create or replace function @NAMESPACE@.failedNode2 (p_failed_node int4, p_backup_node int4, p_ev_seqno int8, p_failed_nodes integer[])
returns bigint
as $$
declare
p_failed_node, p_ev_seqno;
end if;
- update @NAMESPACE@.sl_node set no_failed=true where no_id=p_failed_node
- and no_failed=false;
+ update @NAMESPACE@.sl_node set no_failed=true where no_id = ANY
+ (p_failed_nodes) and no_failed=false;
-- Rewrite sl_listen table
perform @NAMESPACE@.RebuildListenEntries();
-- ----
notify "_@CLUSTERNAME@_Restart";
select @NAMESPACE@.createEvent('_@CLUSTERNAME@','FAILOVER_NODE',
- p_failed_node::text,p_ev_seqno::text)
+ p_failed_node::text,p_ev_seqno::text,
+ array_to_string(p_failed_nodes,','))
into v_new_event;
end;
$$ language plpgsql;
-comment on function @NAMESPACE@.failedNode2 (p_failed_node int4, p_backup_node int4, p_ev_seqno int8) is
-'FUNCTION failedNode2 (failed_node, backup_node, set_id, ev_seqno, ev_seqfake)
+comment on function @NAMESPACE@.failedNode2 (p_failed_node int4, p_backup_node int4, p_ev_seqno int8,p_failed_nodes integer[] ) is
+'FUNCTION failedNode2 (failed_node, backup_node, set_id, ev_seqno, ev_seqfake,p_failed_nodes)
On the node that has the highest sequence number of the failed node,
fake the FAILOVER_SET event.';
v_row record;
v_last_sync int8;
v_set int4;
+ v_missing_path int4;
begin
SELECT max(ev_seqno) into v_last_sync FROM @NAMESPACE@.sl_event where
ev_origin=p_failed_node;
and sub_receiver = p_backup_node;
update @NAMESPACE@.sl_set
set set_origin = p_backup_node
- where set_id = v_set;
+ where set_id = v_set;
update @NAMESPACE@.sl_subscribe
- set sub_provider=p_backup_node
- where sub_set = v_set;
+ set sub_provider=p_backup_node
+ FROM @NAMESPACE@.sl_node receive_node
+ where sub_set = v_set
+ and sub_provider=p_failed_node
+ and sub_receiver=receive_node.no_id
+ and receive_node.no_failed=false;
for v_row in select * from @NAMESPACE@.sl_table
where tab_set = v_set
delete from @NAMESPACE@.sl_subscribe
where sub_set = v_set
and sub_receiver = p_backup_node;
+
update @NAMESPACE@.sl_subscribe
set sub_provider=p_backup_node
- where sub_set = v_set;
+ FROM @NAMESPACE@.sl_node receive_node
+ where sub_set = v_set
+ and sub_provider=p_failed_node
+ and sub_provider=p_failed_node
+ and sub_receiver=receive_node.no_id
+ and receive_node.no_failed=false;
update @NAMESPACE@.sl_set
set set_origin = p_backup_node
where set_id = v_set;
-- Loop over every possible pair of receiver and event origin
for v_row in select N1.no_id as receiver, N2.no_id as origin,
- N2.no_failed as failed
+ N2.no_failed as origin_failed,
+ N1.no_failed as receiver_failed
from @NAMESPACE@.sl_node as N1, @NAMESPACE@.sl_node as N2
where N1.no_id <> N2.no_id
loop
end if;
end if;
- if v_row.failed then
+ if v_row.origin_failed then
--for every failed node we delete all sl_listen entries
--except via providers (listed in sl_subscribe).
{
int failed_node = (int) strtol(event->ev_data1, NULL, 10);
char *seq_no_c = event->ev_data2;
+ char *failed_node_list = event->ev_data3;
+
PGresult *res;
/**
* The most-ahead failover canidate is the node that
* created the FAILOVER_NODE event (node->id)
*/
- slon_mkquery(&query2, "select %s.failedNode(%d,%d);"
+ slon_mkquery(&query2, "select %s.failedNode(%d,%d,ARRAY[%s]);"
,rtcfg_namespace,
- failed_node, node->no_id);
+ failed_node, node->no_id,failed_node_list);
res = PQexec(local_dbconn, dstring_data(&query2));
if (PQresultStatus(res) != PGRES_TUPLES_OK)
fail_node_promote(SlonikStmt_failed_node * stmt,
failed_node_entry * node_entry,
failnode_node * nodeinfo
- ,int *fail_node_ids);
+ ,int *fail_node_ids,
+ SlonDString * failed_node_list);
/* ----------
* main
}
}
+#if 0
if (slonik_is_slony_installed((SlonikStmt *) stmt, curAdmInfo) > 0)
{
rc = slonik_wait_config_caughtup(curAdmInfo, (SlonikStmt *) stmt,
if (rc < 0)
return rc;
}
+#endif
}
}
failnode_node **max_node_total = NULL;
failed_node_entry *node_entry = stmt->nodes;
int *fail_node_ids = NULL;
-
+ bool missing_paths=false;
int rc = 0;
/**
* The failover procedure (at a high level) is as follows
*
- * 1. Get a list of failover candidates for each failed node.
- * 2. validate that we have conninfo to all of them
- * 3. blank their paths to the failed nodes
- * 4. Wait for slons to restart
- * 5. for each failed node get the highest xid for each candidate
- * 6. execute FAILOVER on the highest canidate
- * 7. MOVE SET to the backup node.
+ * 1. Check to see if the failover will leave orphan nodes
+ * 2. Get a list of failover candidates for each failed node.
+ * 3. validate that we have conninfo to all of them
+ * 4. blank their paths to the failed nodes
+ * 5. Wait for slons to restart
+ * 6. for each failed node get the highest xid for each candidate
+ * 7. execute FAILOVER on the highest canidate
+ * 8. MOVE SET to the backup node.
*/
memset(set_list, 0, sizeof(int *) * num_origins);
+ /**
+ * validate that the list of failed nodes is complete.
+ * This means that for any providers in sl_subscribe
+ * that have failed there muts be a path between
+ * the receiver node and the specified backup node.
+ *
+ * If this isn't the case then the user must also include
+ * the receiver node as a failed node, or add in the path
+ * before calling FAILOVER.
+ *
+ * We only check sl_subscribe on arbitrary backup node
+ */
+ for (node_entry = stmt->nodes; node_entry != NULL;
+ node_entry = node_entry->next, cur_origin_idx++)
+ {
+ int num_orphans=0;
+ int res_idx;
+ slon_mkquery(&query,
+ " select sub_receiver FROM \"_%s\".sl_subscribe left join "
+ "\"_%s\".sl_path on (pa_server="
+ " %d and pa_client=sub_receiver) where sub_receiver not "
+ "in (%s) and sub_provider=%d and sub_receiver <> %d and pa_client is null"
+ , stmt->hdr.script->clustername
+ , stmt->hdr.script->clustername
+ , node_entry->backup_node
+ , dstring_data(&failed_node_list)
+ , node_entry->no_id,node_entry->backup_node);
+ adminfo1 = get_active_adminfo((SlonikStmt *) stmt, node_entry->backup_node);
+ if (adminfo1 == NULL)
+ {
+ printf("%s:%d no admin conninfo for node %d\n",
+ stmt->hdr.stmt_filename, stmt->hdr.stmt_lno,
+ node_entry->backup_node);
+ rc = -1;
+ goto cleanup;
+ }
+ res1 = db_exec_select((SlonikStmt *) stmt, adminfo1, &query);
+ if (res1 == NULL)
+ {
+ rc = -1;
+ goto cleanup;
+ }
+ num_orphans = PQntuples(res1);
+ for(res_idx=0; res_idx < num_orphans; res_idx++)
+ {
+ char * receiver=PQgetvalue(res1,res_idx,0);
+ printf("%s:%d can't failover node %s does not have a path to %d\n",
+ stmt->hdr.stmt_filename, stmt->hdr.stmt_lno,receiver,
+ node_entry->backup_node);
+ missing_paths=true;
+
+
+ }
+ PQclear(res1);
+ }
+ if ( missing_paths )
+ {
+ rc=-1;
+ goto cleanup;
+
+ }
+
/**
* get the list of failover candidates for each of the
* Connect to all these nodes and determine if there is a node daemon
* running on that node.
*/
- has_candidate = true;
+
for (i = 0; i < node_entry->num_nodes; i++)
{
+ has_candidate = true;
const char * pidcolumn;
nodeinfo[i].no_id = (int) strtol(PQgetvalue(res1, i, 0), NULL, 10);
nodeinfo[i].adminfo = get_active_adminfo((SlonikStmt *) stmt,
}
PQclear(res1);
PQclear(res2);
- if (!has_candidate)
+ if (!has_candidate && node_entry->num_sets > 0 )
{
printf("%s:%d error no failover candidates for %d\n",
stmt->hdr.stmt_filename, stmt->hdr.stmt_lno
,node_entry->no_id);
- PQclear(res1);
rc = -1;
goto cleanup;
}
+
/*
* Execute the preFailover() procedure on all failover candidate nodes
node_entry = node_entry->next, cur_origin_idx++)
{
+
failnode_node *nodeinfo = (failnode_node *) failnodebuf[cur_origin_idx];
+
+ if ( node_entry->num_nodes == 0 )
+ continue;
rc = fail_node_promote(stmt, node_entry, nodeinfo,
- fail_node_ids);
+ fail_node_ids,&failed_node_list);
if (rc < 0)
{
goto cleanup;
fail_node_promote(SlonikStmt_failed_node * stmt,
failed_node_entry * node_entry,
failnode_node * nodeinfo,
- int *fail_node_ids)
+ int *fail_node_ids,
+ SlonDString * failed_node_list)
{
int64 max_seqno = 0;
int max_node_idx = 0;
dstring_init(&query);
+ if ( node_entry->num_nodes == 0 && node_entry->num_sets == 0 )
+ {
+ /**
+ * This node is the origin of no sets but we still need to
+ * let the rest of the cluster know that this node is considered
+ * failed.
+ */
+
+ }
+
/*
* For every node determine the one with the event , preferring the backup
* node.
}
adminfo1 = nodeinfo[max_node_idx].adminfo;
+
/*
* Now execute all FAILED_NODE events on the most ahead candidate
*/
sprintf(ev_seqno_c, INT64_FORMAT, max_seqno);
slon_mkquery(&query,
"lock table \"_%s\".sl_event_lock, \"_%s\".sl_config_lock;"
- "select \"_%s\".failedNode2(%d,%d,'%s'); ",
+ "select \"_%s\".failedNode2(%d,%d,'%s',ARRAY[%s]); ",
stmt->hdr.script->clustername,
stmt->hdr.script->clustername,
stmt->hdr.script->clustername,
node_entry->no_id, nodeinfo[max_node_idx].no_id
- ,ev_seqno_c);
+ ,ev_seqno_c,dstring_data(failed_node_list));
printf("NOTICE: executing \"_%s\".failedNode2 on node %d\n",
stmt->hdr.script->clustername,
nodeinfo[max_node_idx].no_id);