/* Copyright (c) 2003, 2015, Oracle and/or its affiliates. All rights reserved. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; version 2 of the License. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program; if not, write to the Free Software Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include int runLoadTable(NDBT_Context* ctx, NDBT_Step* step){ int records = ctx->getNumRecords(); HugoTransactions hugoTrans(*ctx->getTab()); if (hugoTrans.loadTable(GETNDB(step), records) != 0){ return NDBT_FAILED; } return NDBT_OK; } int runFillTable(NDBT_Context* ctx, NDBT_Step* step){ HugoTransactions hugoTrans(*ctx->getTab()); if (hugoTrans.fillTable(GETNDB(step)) != 0){ return NDBT_FAILED; } return NDBT_OK; } int runInsertUntilStopped(NDBT_Context* ctx, NDBT_Step* step){ int result = NDBT_OK; int records = ctx->getNumRecords(); int i = 0; HugoTransactions hugoTrans(*ctx->getTab()); while (ctx->isTestStopped() == false) { g_info << i << ": "; if (hugoTrans.loadTable(GETNDB(step), records) != 0){ return NDBT_FAILED; } i++; } return result; } int runClearTable(NDBT_Context* ctx, NDBT_Step* step){ int records = ctx->getNumRecords(); UtilTransactions utilTrans(*ctx->getTab()); if (utilTrans.clearTable(GETNDB(step), records) != 0){ return NDBT_FAILED; } return NDBT_OK; } int runClearTableUntilStopped(NDBT_Context* ctx, NDBT_Step* step){ int records = ctx->getNumRecords(); int i = 0; UtilTransactions utilTrans(*ctx->getTab()); while (ctx->isTestStopped() == false) { g_info << i << ": "; if (utilTrans.clearTable(GETNDB(step), records) != 0){ return NDBT_FAILED; } i++; } return NDBT_OK; } int runScanReadUntilStopped(NDBT_Context* ctx, NDBT_Step* step){ int result = NDBT_OK; int records = ctx->getNumRecords(); int i = 0; HugoTransactions hugoTrans(*ctx->getTab()); while (ctx->isTestStopped() == false) { g_info << i << ": "; if (hugoTrans.scanReadRecords(GETNDB(step), records) != 0){ return NDBT_FAILED; } i++; } return result; } int runPkReadUntilStopped(NDBT_Context* ctx, NDBT_Step* step){ int result = NDBT_OK; int records = ctx->getNumRecords(); NdbOperation::LockMode lm = (NdbOperation::LockMode)ctx->getProperty("ReadLockMode", (Uint32)NdbOperation::LM_Read); int i = 0; HugoTransactions hugoTrans(*ctx->getTab()); while (ctx->isTestStopped() == false) { g_info << i << ": "; int rows = (rand()%records)+1; int batch = (rand()%rows)+1; if (hugoTrans.pkReadRecords(GETNDB(step), rows, batch, lm) != 0){ return NDBT_FAILED; } i++; } return result; } static int start_transaction_on_specific_place(Vector op_array, Uint32 index, Ndb *pNdb, NodeId node_id, Uint32 instance_id) { if (op_array[index]->startTransaction(pNdb, node_id, instance_id) != NDBT_OK) { return NDBT_FAILED; } NdbConnection* pCon = op_array[index]->getTransaction(); Uint32 transNode= pCon->getConnectedNodeId(); if (transNode == node_id) { return NDBT_OK; } op_array[index]->closeTransaction(pNdb); return NDBT_FAILED; } static void cleanup_op_array(Vector &op_array, Ndb *pNdb, int num_instances) { for (int instance_id = 0; instance_id < num_instances; instance_id++) { op_array[instance_id]->closeTransaction(pNdb); } } /** * This test case is about stress testing our TC failover code. * We always run this with a special config with 4 data nodes * where node 2 has more transaction records than node 1 and * node 3. Node 4 has 4 TC instances and has more operation * records than node 1 and node 3. * * So in order to test we fill up all transaction records with * small transactions in node 2 and instance 1. This is done * by runManyTransactions. * * We also fill up all operation records in instance 1 through * 4. This is done by runLargeTransactions since we execute * this by fairly large transactions, few transactions enough to * be able to handle all transactions, but too many operations to * handle. This will ensure that each TC failover step will make * progress. * * We don't commit the transactions, instead we crash the node * 2 and 4 (we do this by a special error insert that crashes * node 4 when node 2 fails. This ensures that both the nodes * have to handle TC failover in the same failover batch. This * is important to ensure that we also test the failed node * queue handling in DBTC. */ int run_multiTCtakeover(NDBT_Context* ctx, NDBT_Step* step) { int records = ctx->getNumRecords(); HugoTransactions hugoTrans(*ctx->getTab()); if (hugoTrans.loadTable(GETNDB(step), records, 12) != 0) { ndbout << "Failed to load table for multiTC takeover test" << endl; return NDBT_FAILED; } ctx->setProperty("runLargeDone", (Uint32)0); ctx->setProperty("restartsDone", (Uint32)0); return NDBT_OK; } int runLargeTransactions(NDBT_Context* ctx, NDBT_Step* step) { int multiop = 50; int trans_per_instance = 10; int num_instances = 4; int op_instances = num_instances * trans_per_instance; Vector op_array; int records = ctx->getNumRecords(); Ndb* pNdb = GETNDB(step); for (int i = 0; i < op_instances; i++) { op_array.push_back(new HugoOperations(*ctx->getTab())); if (op_array[i] == NULL) { ndbout << "Failed to allocate HugoOperations instance " << i << endl; cleanup_op_array(op_array, pNdb, i); return NDBT_FAILED; } } for (int instance_id = 1; instance_id <= num_instances; instance_id++) { for (int i = 0; i < trans_per_instance; i++) { Uint32 index = (instance_id - 1) * trans_per_instance + i; if (start_transaction_on_specific_place(op_array, index, pNdb, 4, /* node id */ instance_id) != NDBT_OK) { ndbout << "Failed to start transaction, index = " << index << endl; cleanup_op_array(op_array, pNdb, op_instances); return NDBT_FAILED; } for (int j = 0; j < multiop; j++) { int record_no = records + (index * multiop) + j; if (op_array[index]->pkInsertRecord(pNdb, record_no, 1, rand())) { ndbout << "Failed to insert record number = " << record_no << endl; cleanup_op_array(op_array, pNdb, op_instances); return NDBT_FAILED; } } if (op_array[index]->execute_NoCommit(pNdb) != 0) { ndbout << "Failed to execute no commit, index = " << index << endl; cleanup_op_array(op_array, pNdb, op_instances); return NDBT_FAILED; } } } /** * Wait until all preparations are complete until we restart node 4 that * holds those transactions. */ ndbout << "runLargeTransactions prepare done" << endl; ctx->setProperty("runLargeDone", (Uint32)1); while (ctx->getProperty("restartsDone", (Uint32)0) != 1) { ndbout << "Waiting for restarts to complete" << endl; NdbSleep_SecSleep(10); } cleanup_op_array(op_array, pNdb, op_instances); return NDBT_OK; } int runManyTransactions(NDBT_Context* ctx, NDBT_Step* step) { NdbRestarter restarter; int multi_trans = 400; int result = NDBT_OK; int records = ctx->getNumRecords(); Ndb* pNdb = GETNDB(step); Vector op_array; if (restarter.getNumDbNodes() != 4) { ndbout << "Need to have exactly 4 DB nodes for this test" << endl; ctx->stopTest(); return NDBT_FAILED; } for (int i = 0; i < multi_trans; i++) { op_array.push_back(new HugoOperations(*ctx->getTab())); if (op_array[i] == NULL) { ndbout << "Failed to allocate HugoOperations instance " << i << endl; cleanup_op_array(op_array, pNdb, i); return NDBT_FAILED; } } for (int i = 0; i < multi_trans; i++) { if (start_transaction_on_specific_place(op_array, i, pNdb, 2, /* node id */ 1) != NDBT_OK) { ndbout << "Failed to start transaction, i = " << i << endl; cleanup_op_array(op_array, pNdb, multi_trans); return NDBT_FAILED; } int record_no = records + (50 * 4 * 10) + i; if (op_array[i]->pkInsertRecord(pNdb, record_no, 1, rand())) { ndbout << "Failed to insert record no = " << record_no << endl; cleanup_op_array(op_array, pNdb, multi_trans); return NDBT_FAILED; } if (op_array[i]->execute_NoCommit(pNdb) != 0) { ndbout << "Failed to execute transaction " << i << endl; cleanup_op_array(op_array, pNdb, multi_trans); return NDBT_FAILED; } } /** * Wait until all preparations are complete until we restart node 2 that * holds those transactions. */ ndbout << "Run many transactions done" << endl; while (ctx->getProperty("runLargeDone", (Uint32)0) != 1) { NdbSleep_SecSleep(1); } /** * We ensure that node 2 and 4 fail together by inserting * error number 941 that fails in PREP_FAILREQ handling */ if (restarter.insertErrorInNode(4, 941)) { ndbout << "Failed to insert error 941" << endl; result = NDBT_FAILED; goto end; } ndbout << "Restart node " << "2" << endl; if (restarter.restartOneDbNode(2, false, false, true) != 0) { g_err << "Failed to restart Node 2" << endl; result = NDBT_FAILED; goto end; } ndbout << "Wait for node 2 and 4 to restart" << endl; if (restarter.waitClusterStarted() != 0) { g_err << "Cluster failed to start" << endl; result = NDBT_FAILED; goto end; } ndbout << "Cluster restarted" << endl; end: ctx->setProperty("restartsDone", (Uint32)1); cleanup_op_array(op_array, pNdb, multi_trans); return result; } int runPkUpdateUntilStopped(NDBT_Context* ctx, NDBT_Step* step){ int result = NDBT_OK; int records = ctx->getNumRecords(); int multiop = ctx->getProperty("MULTI_OP", 1); Ndb* pNdb = GETNDB(step); int i = 0; HugoOperations hugoOps(*ctx->getTab()); while (ctx->isTestStopped() == false) { g_info << i << ": "; int batch = (rand()%records)+1; int row = rand() % records; if (batch > 25) batch = 25; if(row + batch > records) batch = records - row; if(hugoOps.startTransaction(pNdb) != 0) goto err; if(hugoOps.pkUpdateRecord(pNdb, row, batch, rand()) != 0) goto err; for (int j = 1; jgetNdbError(); hugoOps.closeTransaction(pNdb); if (error.status == NdbError::TemporaryError){ NdbSleep_MilliSleep(50); continue; } return NDBT_FAILED; i++; } return result; } int runPkReadPkUpdateUntilStopped(NDBT_Context* ctx, NDBT_Step* step) { int records = ctx->getNumRecords(); Ndb* pNdb = GETNDB(step); int i = 0; HugoOperations hugoOps(*ctx->getTab()); while (ctx->isTestStopped() == false) { g_info << i++ << ": "; int rows = (rand()%records)+1; int batch = (rand()%rows)+1; int row = (records - rows) ? rand() % (records - rows) : 0; int j,k; for(j = 0; j rows) k = rows - j; if(hugoOps.startTransaction(pNdb) != 0) goto err; if(hugoOps.pkReadRecord(pNdb, row+j, k, NdbOperation::LM_Exclusive) != 0) goto err; if(hugoOps.execute_NoCommit(pNdb) != 0) goto err; if(hugoOps.pkUpdateRecord(pNdb, row+j, k, rand()) != 0) goto err; if(hugoOps.execute_Commit(pNdb) != 0) goto err; if(hugoOps.closeTransaction(pNdb) != 0) return NDBT_FAILED; } continue; err: NdbConnection* pCon = hugoOps.getTransaction(); if(pCon == 0) continue; NdbError error = pCon->getNdbError(); hugoOps.closeTransaction(pNdb); if (error.status == NdbError::TemporaryError){ NdbSleep_MilliSleep(50); continue; } return NDBT_FAILED; } return NDBT_OK; } int runPkReadPkUpdatePkUnlockUntilStopped(NDBT_Context* ctx, NDBT_Step* step) { int records = ctx->getNumRecords(); Ndb* pNdb = GETNDB(step); int i = 0; HugoOperations hugoOps(*ctx->getTab()); while (ctx->isTestStopped() == false) { g_info << i++ << ": "; int rows = (rand()%records)+1; int batch = (rand()%rows)+1; int row = (records - rows) ? rand() % (records - rows) : 0; int j,k; for(j = 0; j rows) k = rows - j; Vector lockHandles; if(hugoOps.startTransaction(pNdb) != 0) goto err; if(hugoOps.pkReadRecordLockHandle(pNdb, lockHandles, row+j, k, NdbOperation::LM_Exclusive) != 0) goto err; if(hugoOps.execute_NoCommit(pNdb) != 0) goto err; if(hugoOps.pkUpdateRecord(pNdb, row+j, k, rand()) != 0) goto err; if(hugoOps.execute_NoCommit(pNdb) != 0) goto err; if(hugoOps.pkUnlockRecord(pNdb, lockHandles) != 0) goto err; if(hugoOps.execute_Commit(pNdb) != 0) goto err; if(hugoOps.closeTransaction(pNdb) != 0) return NDBT_FAILED; } continue; err: NdbConnection* pCon = hugoOps.getTransaction(); if(pCon == 0) continue; NdbError error = pCon->getNdbError(); hugoOps.closeTransaction(pNdb); if (error.status == NdbError::TemporaryError){ NdbSleep_MilliSleep(50); continue; } return NDBT_FAILED; } return NDBT_OK; } int runDeleteInsertUntilStopped(NDBT_Context* ctx, NDBT_Step* step){ int result = NDBT_OK; int records = ctx->getNumRecords(); int i = 0; HugoTransactions hugoTrans(*ctx->getTab()); UtilTransactions utilTrans(*ctx->getTab()); while (ctx->isTestStopped() == false) { g_info << i << ": "; if (utilTrans.clearTable(GETNDB(step), records) != 0){ result = NDBT_FAILED; break; } if (hugoTrans.loadTable(GETNDB(step), records, 1) != 0){ result = NDBT_FAILED; break; } i++; } return result; } int runScanUpdateUntilStopped(NDBT_Context* ctx, NDBT_Step* step){ int result = NDBT_OK; int records = ctx->getNumRecords(); int parallelism = ctx->getProperty("Parallelism", 1); int abort = ctx->getProperty("AbortProb", (Uint32)0); int check = ctx->getProperty("ScanUpdateNoRowCountCheck", (Uint32)0); if (check) records = 0; int i = 0; HugoTransactions hugoTrans(*ctx->getTab()); while (ctx->isTestStopped() == false) { g_info << i << ": "; if (hugoTrans.scanUpdateRecords(GETNDB(step), records, abort, parallelism) == NDBT_FAILED){ return NDBT_FAILED; } i++; } return result; } int runScanReadVerify(NDBT_Context* ctx, NDBT_Step* step){ int records = ctx->getNumRecords(); HugoTransactions hugoTrans(*ctx->getTab()); if (hugoTrans.scanReadRecords(GETNDB(step), records, 0, 64) != 0){ return NDBT_FAILED; } return NDBT_OK; } int runRestarter(NDBT_Context* ctx, NDBT_Step* step){ int result = NDBT_OK; int loops = ctx->getNumLoops(); int sync_threads = ctx->getProperty("SyncThreads", (unsigned)0); int sleep0 = ctx->getProperty("Sleep0", (unsigned)0); int sleep1 = ctx->getProperty("Sleep1", (unsigned)0); int randnode = ctx->getProperty("RandNode", (unsigned)0); NdbRestarter restarter; int i = 0; int lastId = 0; if (restarter.getNumDbNodes() < 2){ ctx->stopTest(); return NDBT_OK; } if(restarter.waitClusterStarted() != 0){ g_err << "Cluster failed to start" << endl; return NDBT_FAILED; } loops *= (restarter.getNumDbNodes() > 2 ? 2 : restarter.getNumDbNodes()); if (loops < restarter.getNumDbNodes()) loops = restarter.getNumDbNodes(); while(iisTestStopped()){ int id = lastId % restarter.getNumDbNodes(); if (randnode == 1) { id = rand() % restarter.getNumDbNodes(); } int nodeId = restarter.getDbNodeId(id); ndbout << "Restart node " << nodeId << endl; if(restarter.restartOneDbNode(nodeId, false, true, true) != 0){ g_err << "Failed to restartNextDbNode" << endl; result = NDBT_FAILED; break; } if (restarter.waitNodesNoStart(&nodeId, 1)) { g_err << "Failed to waitNodesNoStart" << endl; result = NDBT_FAILED; break; } if (sleep1) NdbSleep_MilliSleep(sleep1); if (restarter.startNodes(&nodeId, 1)) { g_err << "Failed to start node" << endl; result = NDBT_FAILED; break; } if(restarter.waitClusterStarted() != 0){ g_err << "Cluster failed to start" << endl; result = NDBT_FAILED; break; } if (sleep0) NdbSleep_MilliSleep(sleep0); ctx->sync_up_and_wait("PauseThreads", sync_threads); lastId++; i++; } ctx->stopTest(); return result; } int runCheckAllNodesStarted(NDBT_Context* ctx, NDBT_Step* step){ NdbRestarter restarter; if(restarter.waitClusterStarted(1) != 0){ g_err << "All nodes was not started " << endl; return NDBT_FAILED; } return NDBT_OK; } int runRestarts(NDBT_Context* ctx, NDBT_Step* step){ int result = NDBT_OK; int loops = ctx->getNumLoops(); NDBT_TestCase* pCase = ctx->getCase(); NdbRestarts restarts; int i = 0; int timeout = 240; while (iisTestStopped()) { int safety = 0; if (i > 0) safety = 15; if (ctx->closeToTimeout(safety)) break; if(restarts.executeRestart(ctx, pCase->getName(), timeout, safety) != 0){ g_err << "Failed to executeRestart(" <getName() <<")" << endl; result = NDBT_FAILED; break; } i++; } ctx->stopTest(); return result; } int runDirtyRead(NDBT_Context* ctx, NDBT_Step* step){ int result = NDBT_OK; int loops = ctx->getNumLoops(); int records = ctx->getNumRecords(); NdbRestarter restarter; HugoOperations hugoOps(*ctx->getTab()); Ndb* pNdb = GETNDB(step); int i = 0; while(iisTestStopped()){ g_info << i << ": "; int id = i % restarter.getNumDbNodes(); int nodeId = restarter.getDbNodeId(id); ndbout << "Restart node " << nodeId << endl; restarter.insertErrorInNode(nodeId, 5041); restarter.insertErrorInAllNodes(8048 + (i & 1)); for(int j = 0; jgetNumLoops(); NdbRestarter restarter; HugoOperations hugoOps(*ctx->getTab()); Ndb* pNdb = GETNDB(step); int i = 0; while(iisTestStopped()){ g_info << i << ": "; if(hugoOps.startTransaction(pNdb) != 0) return NDBT_FAILED; if(hugoOps.pkUpdateRecord(pNdb, 1, 128) != 0) return NDBT_FAILED; if(hugoOps.execute_NoCommit(pNdb) != 0) return NDBT_FAILED; Uint32 transNode= hugoOps.getTransaction()->getConnectedNodeId(); int id = i % restarter.getNumDbNodes(); int nodeId; while((nodeId = restarter.getDbNodeId(id)) == (int)transNode) id = (id + 1) % restarter.getNumDbNodes(); ndbout << "Restart node " << nodeId << endl; restarter.restartOneDbNode(nodeId, /** initial */ false, /** nostart */ true, /** abort */ true); restarter.waitNodesNoStart(&nodeId, 1); int res; if(i & 1) res= hugoOps.execute_Commit(pNdb); else res= hugoOps.execute_Rollback(pNdb); ndbout_c("res= %d", res); hugoOps.closeTransaction(pNdb); restarter.startNodes(&nodeId, 1); restarter.waitNodesStarted(&nodeId, 1); if(i & 1) { if(res != 286) return NDBT_FAILED; } else { if(res != 0) return NDBT_FAILED; } i++; } return NDBT_OK; } int runBug15587(NDBT_Context* ctx, NDBT_Step* step) { NdbRestarter restarter; Uint32 tableId = ctx->getTab()->getTableId(); int dump[2] = { DumpStateOrd::LqhErrorInsert5042, 0 }; dump[1] = tableId; int nodeId = restarter.getDbNodeId(1); ndbout << "Restart node " << nodeId << endl; if (restarter.restartOneDbNode(nodeId, /** initial */ false, /** nostart */ true, /** abort */ true)) return NDBT_FAILED; if (restarter.waitNodesNoStart(&nodeId, 1)) return NDBT_FAILED; int val2[] = { DumpStateOrd::CmvmiSetRestartOnErrorInsert, 1 }; if (restarter.dumpStateOneNode(nodeId, val2, 2)) return NDBT_FAILED; if (restarter.dumpStateOneNode(nodeId, dump, 2)) return NDBT_FAILED; if (restarter.startNodes(&nodeId, 1)) return NDBT_FAILED; restarter.waitNodesStartPhase(&nodeId, 1, 3); if (restarter.waitNodesNoStart(&nodeId, 1)) return NDBT_FAILED; if (restarter.dumpStateOneNode(nodeId, val2, 1)) return NDBT_FAILED; if (restarter.startNodes(&nodeId, 1)) return NDBT_FAILED; if (restarter.waitNodesStarted(&nodeId, 1)) return NDBT_FAILED; ctx->stopTest(); return NDBT_OK; } int runBug15632(NDBT_Context* ctx, NDBT_Step* step) { NdbRestarter restarter; int nodeId = restarter.getDbNodeId(1); ndbout << "Restart node " << nodeId << endl; if (restarter.restartOneDbNode(nodeId, /** initial */ false, /** nostart */ true, /** abort */ true)) return NDBT_FAILED; if (restarter.waitNodesNoStart(&nodeId, 1)) return NDBT_FAILED; if (restarter.insertErrorInNode(nodeId, 7165)) return NDBT_FAILED; if (restarter.startNodes(&nodeId, 1)) return NDBT_FAILED; if (restarter.waitNodesStarted(&nodeId, 1)) return NDBT_FAILED; if (restarter.restartOneDbNode(nodeId, /** initial */ false, /** nostart */ true, /** abort */ true)) return NDBT_FAILED; if (restarter.waitNodesNoStart(&nodeId, 1)) return NDBT_FAILED; if (restarter.insertErrorInNode(nodeId, 7171)) return NDBT_FAILED; if (restarter.startNodes(&nodeId, 1)) return NDBT_FAILED; if (restarter.waitNodesStarted(&nodeId, 1)) return NDBT_FAILED; ctx->stopTest(); return NDBT_OK; } int runBug15685(NDBT_Context* ctx, NDBT_Step* step){ Ndb* pNdb = GETNDB(step); HugoOperations hugoOps(*ctx->getTab()); NdbRestarter restarter; HugoTransactions hugoTrans(*ctx->getTab()); if (hugoTrans.loadTable(GETNDB(step), 10) != 0){ return NDBT_FAILED; } if(hugoOps.startTransaction(pNdb) != 0) goto err; if(hugoOps.pkUpdateRecord(pNdb, 0, 1, rand()) != 0) goto err; if(hugoOps.execute_NoCommit(pNdb) != 0) goto err; if (restarter.insertErrorInAllNodes(5100)) return NDBT_FAILED; hugoOps.execute_Rollback(pNdb); if (restarter.waitClusterStarted() != 0) goto err; if (restarter.insertErrorInAllNodes(0)) return NDBT_FAILED; ctx->stopTest(); return NDBT_OK; err: ctx->stopTest(); return NDBT_FAILED; } int runBug16772(NDBT_Context* ctx, NDBT_Step* step){ NdbRestarter restarter; if (restarter.getNumDbNodes() < 2) { ctx->stopTest(); return NDBT_OK; } int aliveNodeId = restarter.getRandomNotMasterNodeId(rand()); int deadNodeId = aliveNodeId; while (deadNodeId == aliveNodeId) deadNodeId = restarter.getDbNodeId(rand() % restarter.getNumDbNodes()); // Suppress NDB_FAILCONF; simulates that it arrives late, // or out of order, relative to node restart. if (restarter.insertErrorInNode(aliveNodeId, 930)) return NDBT_FAILED; ndbout << "Restart node " << deadNodeId << endl; if (restarter.restartOneDbNode(deadNodeId, /** initial */ false, /** nostart */ true, /** abort */ true)) return NDBT_FAILED; // It should now be hanging since we throw away NDB_FAILCONF const int ret = restarter.waitNodesNoStart(&deadNodeId, 1); // So this should fail...i.e node should not restart (yet) if (ret) { // Now send a NDB_FAILCONF for deadNo int dump[] = { 7020, 323, 252, 0 }; dump[3] = deadNodeId; if (restarter.dumpStateOneNode(aliveNodeId, dump, 4)) return NDBT_FAILED; // Got (the delayed) NDB_NODECONF, and should now start. if (restarter.waitNodesNoStart(&deadNodeId, 1)) return NDBT_FAILED; } if (restarter.startNodes(&deadNodeId, 1)) return NDBT_FAILED; if (restarter.waitNodesStarted(&deadNodeId, 1)) return NDBT_FAILED; return ret ? NDBT_OK : NDBT_FAILED; } int runBug18414(NDBT_Context* ctx, NDBT_Step* step){ NdbRestarter restarter; if (restarter.getNumDbNodes() < 2) { ctx->stopTest(); return NDBT_OK; } Ndb* pNdb = GETNDB(step); HugoOperations hugoOps(*ctx->getTab()); HugoTransactions hugoTrans(*ctx->getTab()); int loop = 0; do { if(hugoOps.startTransaction(pNdb) != 0) goto err; if(hugoOps.pkUpdateRecord(pNdb, 0, 128, rand()) != 0) goto err; if(hugoOps.execute_NoCommit(pNdb) != 0) goto err; int node1 = hugoOps.getTransaction()->getConnectedNodeId(); int node2 = restarter.getRandomNodeSameNodeGroup(node1, rand()); if (node1 == -1 || node2 == -1) break; if (loop & 1) { if (restarter.insertErrorInNode(node1, 8080)) goto err; } int val2[] = { DumpStateOrd::CmvmiSetRestartOnErrorInsert, 1 }; if (restarter.dumpStateOneNode(node2, val2, 2)) goto err; if (restarter.insertErrorInNode(node2, 5003)) goto err; /** int res= */ hugoOps.execute_Rollback(pNdb); if (restarter.waitNodesNoStart(&node2, 1) != 0) goto err; if (restarter.insertErrorInAllNodes(0)) goto err; if (restarter.startNodes(&node2, 1) != 0) goto err; if (restarter.waitClusterStarted() != 0) goto err; if (hugoTrans.scanUpdateRecords(pNdb, 128) != 0) goto err; hugoOps.closeTransaction(pNdb); } while(++loop < 5); return NDBT_OK; err: hugoOps.closeTransaction(pNdb); return NDBT_FAILED; } int runBug18612(NDBT_Context* ctx, NDBT_Step* step){ // Assume two replicas NdbRestarter restarter; if (restarter.getNumDbNodes() < 2) { ctx->stopTest(); return NDBT_OK; } Uint32 cnt = restarter.getNumDbNodes(); for(int loop = 0; loop < ctx->getNumLoops(); loop++) { int partition0[256]; int partition1[256]; memset(partition0, 0, sizeof(partition0)); memset(partition1, 0, sizeof(partition1)); Bitmask<4> nodesmask; Uint32 node1 = restarter.getDbNodeId(rand()%cnt); for (Uint32 i = 0; istopTest(); return NDBT_OK; } node1 = tmp; } while(nodesmask.get(node1)); partition0[i] = node1; partition1[i] = restarter.getRandomNodeSameNodeGroup(node1, rand()); ndbout_c("nodes %d %d", node1, partition1[i]); require(!nodesmask.get(node1)); require(!nodesmask.get(partition1[i])); nodesmask.set(node1); nodesmask.set(partition1[i]); } ndbout_c("done"); int dump[255]; dump[0] = DumpStateOrd::NdbcntrStopNodes; memcpy(dump + 1, partition0, sizeof(int)*cnt/2); Uint32 master = restarter.getMasterNodeId(); if (restarter.dumpStateOneNode(master, dump, 1+cnt/2)) return NDBT_FAILED; if (restarter.waitNodesNoStart(partition0, cnt/2)) return NDBT_FAILED; int val2[] = { DumpStateOrd::CmvmiSetRestartOnErrorInsert, 1 }; if (restarter.dumpStateAllNodes(val2, 2)) return NDBT_FAILED; if (restarter.insertErrorInAllNodes(932)) return NDBT_FAILED; dump[0] = 9000; memcpy(dump + 1, partition0, sizeof(int)*cnt/2); for (Uint32 i = 0; istopTest(); return NDBT_OK; } Uint32 cnt = restarter.getNumDbNodes(); for(int loop = 0; loop < ctx->getNumLoops(); loop++) { int partition0[256]; int partition1[256]; memset(partition0, 0, sizeof(partition0)); memset(partition1, 0, sizeof(partition1)); Bitmask<4> nodesmask; Uint32 node1 = restarter.getDbNodeId(rand()%cnt); for (Uint32 i = 0; igetTab()); Ndb* pNdb = GETNDB(step); const int masterNode = restarter.getMasterNodeId(); int dump[] = { 7090, 20 } ; if (restarter.dumpStateAllNodes(dump, 2)) return NDBT_FAILED; NdbSleep_MilliSleep(3000); Vector nodes; for (int i = 0; igetConnectedNodeId(); if (node != masterNode) { hugoOps.closeTransaction(pNdb); goto retry; } int nodeId; do { nodeId = restarter.getDbNodeId(rand() % restarter.getNumDbNodes()); } while (nodeId == node); ndbout_c("7031 to %d", nodeId); if (restarter.insertErrorInNode(nodeId, 7031)) return NDBT_FAILED; for (Uint32 i = 0; igetNumLoops(); NdbRestarter restarter; Ndb* pNdb = GETNDB(step); HugoTransactions hugoTrans(*ctx->getTab()); int dump[] = { 9002, 0 } ; Uint32 ownNode = refToNode(pNdb->getReference()); dump[1] = ownNode; for (; loops; loops --) { int nodeId = restarter.getRandomNotMasterNodeId(rand()); restarter.restartOneDbNode(nodeId, false, true, true); restarter.waitNodesNoStart(&nodeId, 1); if (restarter.dumpStateOneNode(nodeId, dump, 2)) return NDBT_FAILED; restarter.startNodes(&nodeId, 1); do { for (Uint32 i = 0; i < 100; i++) { hugoTrans.pkReadRecords(pNdb, 100, 1, NdbOperation::LM_CommittedRead); } } while (restarter.waitClusterStarted(5) != 0); } return NDBT_OK; } int runBug29364(NDBT_Context* ctx, NDBT_Step* step) { int loops = ctx->getNumLoops(); NdbRestarter restarter; Ndb* pNdb = GETNDB(step); HugoTransactions hugoTrans(*ctx->getTab()); if (restarter.getNumDbNodes() < 4) return NDBT_OK; int dump0[] = { 9000, 0 } ; int dump1[] = { 9001, 0 } ; Uint32 ownNode = refToNode(pNdb->getReference()); dump0[1] = ownNode; for (; loops; loops --) { int node0 = restarter.getDbNodeId(rand() % restarter.getNumDbNodes()); int node1 = restarter.getRandomNodeOtherNodeGroup(node0, rand()); restarter.restartOneDbNode(node0, false, true, true); restarter.waitNodesNoStart(&node0, 1); restarter.startNodes(&node0, 1); restarter.waitClusterStarted(); restarter.restartOneDbNode(node1, false, true, true); restarter.waitNodesNoStart(&node1, 1); if (restarter.dumpStateOneNode(node1, dump0, 2)) return NDBT_FAILED; restarter.startNodes(&node1, 1); do { for (Uint32 i = 0; i < 100; i++) { hugoTrans.pkReadRecords(pNdb, 100, 1, NdbOperation::LM_CommittedRead); } } while (restarter.waitClusterStarted(5) != 0); if (restarter.dumpStateOneNode(node1, dump1, 1)) return NDBT_FAILED; } return NDBT_OK; } int runBug25364(NDBT_Context* ctx, NDBT_Step* step) { NdbRestarter restarter; int loops = ctx->getNumLoops(); if (restarter.getNumDbNodes() < 4) return NDBT_OK; int val2[] = { DumpStateOrd::CmvmiSetRestartOnErrorInsert, 1 }; for (; loops; loops --) { int master = restarter.getMasterNodeId(); int victim = restarter.getRandomNodeOtherNodeGroup(master, rand()); int second = restarter.getRandomNodeSameNodeGroup(victim, rand()); int dump[] = { 935, victim } ; if (restarter.dumpStateOneNode(master, dump, 2)) return NDBT_FAILED; if (restarter.dumpStateOneNode(master, val2, 2)) return NDBT_FAILED; if (restarter.restartOneDbNode(second, false, true, true)) return NDBT_FAILED; int nodes[2] = { master, second }; if (restarter.waitNodesNoStart(nodes, 2)) return NDBT_FAILED; restarter.startNodes(nodes, 2); if (restarter.waitNodesStarted(nodes, 2)) return NDBT_FAILED; } return NDBT_OK; } int runBug21271(NDBT_Context* ctx, NDBT_Step* step) { NdbRestarter restarter; HugoOperations hugoOps(*ctx->getTab()); const int masterNode = restarter.getMasterNodeId(); const int nodeId = restarter.getRandomNodeSameNodeGroup(masterNode, rand()); int val2[] = { DumpStateOrd::CmvmiSetRestartOnErrorInsert, 1 }; if (restarter.dumpStateOneNode(nodeId, val2, 2)) return NDBT_FAILED; Uint32 tableId = ctx->getTab()->getTableId(); int dump[] = { DumpStateOrd::LqhErrorInsert5042, 0, 5044 }; dump[1] = tableId; if (restarter.dumpStateOneNode(nodeId, dump, 3)) return NDBT_FAILED; restarter.waitNodesNoStart(&nodeId, 1); ctx->stopTest(); restarter.startNodes(&nodeId, 1); if (restarter.waitClusterStarted() != 0) return NDBT_FAILED; return NDBT_OK; return NDBT_OK; } int runBug24543(NDBT_Context* ctx, NDBT_Step* step){ NdbRestarter restarter; int val2[] = { DumpStateOrd::CmvmiSetRestartOnErrorInsert, 1 }; if (restarter.dumpStateAllNodes(val2, 2)) return NDBT_FAILED; int nodes[2]; nodes[0] = restarter.getMasterNodeId(); restarter.insertErrorInNode(nodes[0], 934); nodes[1] = restarter.getRandomNodeOtherNodeGroup(nodes[0], rand()); if (nodes[1] == -1) { nodes[1] = restarter.getRandomNodeSameNodeGroup(nodes[0], rand()); } restarter.restartOneDbNode(nodes[1], false, true, true); if (restarter.waitNodesNoStart(nodes, 2)) return NDBT_FAILED; restarter.startNodes(nodes, 2); if (restarter.waitNodesStarted(nodes, 2)) { return NDBT_FAILED; } return NDBT_OK; } int runBug25468(NDBT_Context* ctx, NDBT_Step* step) { int loops = ctx->getNumLoops(); NdbRestarter restarter; for (int i = 0; igetNumLoops(); NdbRestarter restarter; if (restarter.getNumDbNodes() < 4) return NDBT_OK; for (int i = 0; igetTab(); NdbDictionary::Dictionary* pDict = GETNDB(step)->getDictionary(); if (restarter.getNumDbNodes() < 2) return NDBT_OK; pDict->dropTable(tab.getName()); if (restarter.restartAll(true, true, true)) return NDBT_FAILED; if (restarter.waitClusterNoStart()) return NDBT_FAILED; if (restarter.startAll()) return NDBT_FAILED; if (restarter.waitClusterStarted()) return NDBT_FAILED; int res = pDict->createTable(tab); if (res) { return NDBT_FAILED; } HugoTransactions trans(* pDict->getTable(tab.getName())); trans.loadTable(pNdb, ctx->getNumRecords()); int val2[] = { DumpStateOrd::CmvmiSetRestartOnErrorInsert, 1 }; int master = restarter.getMasterNodeId(); int victim = restarter.getRandomNodeOtherNodeGroup(master, rand()); if (victim == -1) victim = restarter.getRandomNodeSameNodeGroup(master, rand()); restarter.restartOneDbNode(victim, false, true, true); for (Uint32 i = 0; i<10; i++) { ndbout_c("Loop: %d", i); if (restarter.waitNodesNoStart(&victim, 1)) return NDBT_FAILED; if (restarter.dumpStateOneNode(victim, val2, 2)) return NDBT_FAILED; if (restarter.insertErrorInNode(victim, 7191)) return NDBT_FAILED; trans.scanUpdateRecords(pNdb, ctx->getNumRecords()); if (restarter.startNodes(&victim, 1)) return NDBT_FAILED; NdbSleep_SecSleep(3); } if (restarter.waitNodesNoStart(&victim, 1)) return NDBT_FAILED; if (restarter.restartAll(false, false, true)) return NDBT_FAILED; if (restarter.waitClusterStarted()) return NDBT_FAILED; trans.scanUpdateRecords(pNdb, ctx->getNumRecords()); restarter.restartOneDbNode(victim, false, true, true); for (Uint32 i = 0; i<1; i++) { ndbout_c("Loop: %d", i); if (restarter.waitNodesNoStart(&victim, 1)) return NDBT_FAILED; if (restarter.dumpStateOneNode(victim, val2, 2)) return NDBT_FAILED; if (restarter.insertErrorInNode(victim, 7016)) return NDBT_FAILED; trans.scanUpdateRecords(pNdb, ctx->getNumRecords()); if (restarter.startNodes(&victim, 1)) return NDBT_FAILED; NdbSleep_SecSleep(3); } if (restarter.waitNodesNoStart(&victim, 1)) return NDBT_FAILED; if (restarter.startNodes(&victim, 1)) return NDBT_FAILED; if (restarter.waitClusterStarted()) return NDBT_FAILED; return NDBT_OK; } int runBug26457(NDBT_Context* ctx, NDBT_Step* step) { NdbRestarter res; if (res.getNumDbNodes() < 4) return NDBT_OK; int loops = ctx->getNumLoops(); while (loops --) { retry: int master = res.getMasterNodeId(); int next = res.getNextMasterNodeId(master); ndbout_c("master: %d next: %d", master, next); if (res.getNodeGroup(master) == res.getNodeGroup(next)) { res.restartOneDbNode(next, false, false, true); if (res.waitClusterStarted()) return NDBT_FAILED; goto retry; } int val2[] = { DumpStateOrd::CmvmiSetRestartOnErrorInsert, 2 }; if (res.dumpStateOneNode(next, val2, 2)) return NDBT_FAILED; if (res.insertErrorInNode(next, 7180)) return NDBT_FAILED; res.restartOneDbNode(master, false, false, true); if (res.waitClusterStarted()) return NDBT_FAILED; } return NDBT_OK; } int runBug26481(NDBT_Context* ctx, NDBT_Step* step) { NdbRestarter res; int node = res.getRandomNotMasterNodeId(rand()); ndbout_c("node: %d", node); if (res.restartOneDbNode(node, true, true, true)) return NDBT_FAILED; if (res.waitNodesNoStart(&node, 1)) return NDBT_FAILED; int val2[] = { DumpStateOrd::CmvmiSetRestartOnErrorInsert, 1 }; if (res.dumpStateOneNode(node, val2, 2)) return NDBT_FAILED; if (res.insertErrorInNode(node, 7018)) return NDBT_FAILED; if (res.startNodes(&node, 1)) return NDBT_FAILED; res.waitNodesStartPhase(&node, 1, 3); if (res.waitNodesNoStart(&node, 1)) return NDBT_FAILED; res.startNodes(&node, 1); if (res.waitClusterStarted()) return NDBT_FAILED; return NDBT_OK; } int runBug26450(NDBT_Context* ctx, NDBT_Step* step) { NdbRestarter res; Ndb* pNdb = GETNDB(step); int node = res.getRandomNotMasterNodeId(rand()); Vector nodes; for (int i = 0; i 0)) return NDBT_FAILED; if (res.waitClusterNoStart()) return NDBT_FAILED; if (res.startNodes(nodes.getBase(), nodes.size())) return NDBT_FAILED; if (res.waitNodesStarted(nodes.getBase(), nodes.size())) return NDBT_FAILED; } if (res.startNodes(&node, 1)) return NDBT_FAILED; if (res.waitNodesStarted(&node, 1)) return NDBT_FAILED; HugoTransactions trans (* ctx->getTab()); if (trans.selectCount(pNdb) != 0) return NDBT_FAILED; return NDBT_OK; } int runBug27003(NDBT_Context* ctx, NDBT_Step* step) { int loops = ctx->getNumLoops(); NdbRestarter res; static const int errnos[] = { 4025, 4026, 4027, 4028, 0 }; int node = res.getRandomNotMasterNodeId(rand()); ndbout_c("node: %d", node); if (res.restartOneDbNode(node, true, true, true)) return NDBT_FAILED; Uint32 pos = 0; for (int i = 0; igetNumLoops(); NdbRestarter res; if (res.getNumDbNodes() < 2) { return NDBT_OK; } static const int errnos[] = { 7181, 7182, 0 }; Uint32 pos = 0; for (Uint32 i = 0; i<(Uint32)loops; i++) { while (errnos[pos] != 0) { int master = res.getMasterNodeId(); int next = res.getNextMasterNodeId(master); //int next2 = res.getNextMasterNodeId(next); //int node = (i & 1) ? next : next2; ndbout_c("Testing err: %d", errnos[pos]); if (res.insertErrorInNode(next, errnos[pos])) return NDBT_FAILED; NdbSleep_SecSleep(3); if (res.waitClusterStarted()) return NDBT_FAILED; pos++; } pos = 0; } return NDBT_OK; } int runBug27466(NDBT_Context* ctx, NDBT_Step* step) { int loops = ctx->getNumLoops(); NdbRestarter res; if (res.getNumDbNodes() < 2) { return NDBT_OK; } for (Uint32 i = 0; i<(Uint32)loops; i++) { int node1 = res.getDbNodeId(rand() % res.getNumDbNodes()); int node2 = node1; while (node1 == node2) { node2 = res.getDbNodeId(rand() % res.getNumDbNodes()); } ndbout_c("nodes %u %u", node1, node2); if (res.restartOneDbNode(node1, false, true, true)) return NDBT_FAILED; if (res.waitNodesNoStart(&node1, 1)) return NDBT_FAILED; int val2[] = { DumpStateOrd::CmvmiSetRestartOnErrorInsert, 1 }; if (res.dumpStateOneNode(node1, val2, 2)) return NDBT_FAILED; if (res.insertErrorInNode(node2, 8039)) return NDBT_FAILED; res.startNodes(&node1, 1); NdbSleep_SecSleep(3); if (res.waitNodesNoStart(&node1, 1)) return NDBT_FAILED; NdbSleep_SecSleep(5); // Wait for delayed INCL_NODECONF to arrive res.startNodes(&node1, 1); if (res.waitClusterStarted()) return NDBT_FAILED; } return NDBT_OK; } int runBug28023(NDBT_Context* ctx, NDBT_Step* step) { int loops = ctx->getNumLoops(); int records = ctx->getNumRecords(); Ndb* pNdb = GETNDB(step); NdbRestarter res; if (res.getNumDbNodes() < 2) { return NDBT_OK; } HugoTransactions hugoTrans(*ctx->getTab()); if (hugoTrans.loadTable(pNdb, records) != 0){ return NDBT_FAILED; } if (hugoTrans.clearTable(pNdb, records) != 0) { return NDBT_FAILED; } for (Uint32 i = 0; i<(Uint32)loops; i++) { int node1 = res.getDbNodeId(rand() % res.getNumDbNodes()); if (res.restartOneDbNode2(node1, NdbRestarter::NRRF_ABORT | NdbRestarter::NRRF_NOSTART)) return NDBT_FAILED; if (res.waitNodesNoStart(&node1, 1)) return NDBT_FAILED; if (hugoTrans.loadTable(pNdb, records) != 0){ return NDBT_FAILED; } if (hugoTrans.clearTable(pNdb, records) != 0) { return NDBT_FAILED; } res.startNodes(&node1, 1); if (res.waitClusterStarted()) return NDBT_FAILED; if (hugoTrans.loadTable(pNdb, records) != 0){ return NDBT_FAILED; } if (hugoTrans.scanUpdateRecords(pNdb, records) != 0) return NDBT_FAILED; if (hugoTrans.clearTable(pNdb, records) != 0) { return NDBT_FAILED; } } return NDBT_OK; } int runBug28717(NDBT_Context* ctx, NDBT_Step* step) { NdbRestarter res; if (res.getNumDbNodes() < 4) { return NDBT_OK; } int master = res.getMasterNodeId(); int node0 = res.getRandomNodeOtherNodeGroup(master, rand()); int node1 = res.getRandomNodeSameNodeGroup(node0, rand()); ndbout_c("master: %d node0: %d node1: %d", master, node0, node1); if (res.restartOneDbNode(node0, false, true, true)) { return NDBT_FAILED; } { int filter[] = { 15, NDB_MGM_EVENT_CATEGORY_CHECKPOINT, 0 }; NdbLogEventHandle handle = ndb_mgm_create_logevent_handle(res.handle, filter); int dump[] = { DumpStateOrd::DihStartLcpImmediately }; struct ndb_logevent event; for (Uint32 i = 0; i<3; i++) { res.dumpStateOneNode(master, dump, 1); while(ndb_logevent_get_next(handle, &event, 0) >= 0 && event.type != NDB_LE_LocalCheckpointStarted); while(ndb_logevent_get_next(handle, &event, 0) >= 0 && event.type != NDB_LE_LocalCheckpointCompleted); } } if (res.waitNodesNoStart(&node0, 1)) return NDBT_FAILED; int val2[] = { DumpStateOrd::CmvmiSetRestartOnErrorInsert, 1 }; if (res.dumpStateOneNode(node0, val2, 2)) return NDBT_FAILED; if (res.insertErrorInNode(node0, 5010)) return NDBT_FAILED; if (res.insertErrorInNode(node1, 1001)) return NDBT_FAILED; if (res.startNodes(&node0, 1)) return NDBT_FAILED; NdbSleep_SecSleep(3); if (res.insertErrorInNode(node1, 0)) return NDBT_FAILED; if (res.waitNodesNoStart(&node0, 1)) return NDBT_FAILED; if (res.startNodes(&node0, 1)) return NDBT_FAILED; if (res.waitClusterStarted()) return NDBT_FAILED; return NDBT_OK; } static int f_master_failure [] = { 7000, 7001, 7002, 7003, 7004, 7186, 7187, 7188, 7189, 7190, 0 }; static int f_participant_failure [] = { 7005, 7006, 7007, 7008, 5000, 7228, 0 }; int runerrors(NdbRestarter& res, NdbRestarter::NodeSelector sel, const int* errors) { for (Uint32 i = 0; errors[i]; i++) { int node = res.getNode(sel); int val2[] = { DumpStateOrd::CmvmiSetRestartOnErrorInsert, 1 }; if (res.dumpStateOneNode(node, val2, 2)) return NDBT_FAILED; ndbout << "node " << node << " err: " << errors[i]<< endl; if (res.insertErrorInNode(node, errors[i])) return NDBT_FAILED; if (res.waitNodesNoStart(&node, 1) != 0) return NDBT_FAILED; res.startNodes(&node, 1); if (res.waitClusterStarted() != 0) return NDBT_FAILED; } return NDBT_OK; } int runGCP(NDBT_Context* ctx, NDBT_Step* step) { NdbRestarter res; int loops = ctx->getNumLoops(); if (res.getNumDbNodes() < 2) { return NDBT_OK; } if (res.getNumDbNodes() < 4) { /** * 7186++ is only usable for 4 nodes and above */ Uint32 i; for (i = 0; f_master_failure[i] && f_master_failure[i] != 7186; i++); f_master_failure[i] = 0; } while (loops >= 0 && !ctx->isTestStopped()) { loops --; #if 0 if (runerrors(res, NdbRestarter::NS_NON_MASTER, f_participant_failure)) { return NDBT_FAILED; } if (runerrors(res, NdbRestarter::NS_MASTER, f_participant_failure)) { return NDBT_FAILED; } #endif if (runerrors(res, NdbRestarter::NS_RANDOM, f_participant_failure)) { return NDBT_FAILED; } if (runerrors(res, NdbRestarter::NS_MASTER, f_master_failure)) { return NDBT_FAILED; } } ctx->stopTest(); return NDBT_OK; } int runCommitAck(NDBT_Context* ctx, NDBT_Step* step) { int loops = ctx->getNumLoops(); int records = ctx->getNumRecords(); NdbRestarter restarter; Ndb* pNdb = GETNDB(step); if (records < 2) return NDBT_OK; if (restarter.getNumDbNodes() < 2) return NDBT_OK; int trans_type= -1; NdbConnection *pCon; int node; while (loops--) { trans_type++; if (trans_type > 2) trans_type= 0; HugoTransactions hugoTrans(*ctx->getTab()); switch (trans_type) { case 0: /* - load records less 1 */ g_info << "case 0\n"; if (hugoTrans.loadTable(GETNDB(step), records - 1)) { return NDBT_FAILED; } break; case 1: /* - load 1 record */ g_info << "case 1\n"; if (hugoTrans.loadTable(GETNDB(step), 1)) { return NDBT_FAILED; } break; case 2: /* - load 1 record in the end */ g_info << "case 2\n"; { HugoOperations hugoOps(*ctx->getTab()); if (hugoOps.startTransaction(pNdb)) abort(); if (hugoOps.pkInsertRecord(pNdb, records-1)) abort(); if (hugoOps.execute_Commit(pNdb)) abort(); if (hugoOps.closeTransaction(pNdb)) abort(); } break; default: abort(); } /* run transaction that should be tested */ HugoOperations hugoOps(*ctx->getTab()); if (hugoOps.startTransaction(pNdb)) return NDBT_FAILED; pCon= hugoOps.getTransaction(); node= pCon->getConnectedNodeId(); switch (trans_type) { case 0: case 1: /* insert records with ignore error - insert rows, some exist already */ for (int i= 0; i < records; i++) { if (hugoOps.pkInsertRecord(pNdb, i)) goto err; } break; case 2: /* insert records with ignore error - insert rows, some exist already */ for (int i= 0; i < records; i++) { if (hugoOps.pkInsertRecord(pNdb, i)) goto err; } break; default: abort(); } /* insert error in ndb kernel (TC) that throws away acknowledge of commit and then die 5 seconds later */ { if (restarter.insertErrorInNode(node, 8054)) goto err; } { int val2[] = { DumpStateOrd::CmvmiSetRestartOnErrorInsert, 1 }; if (restarter.dumpStateOneNode(node, val2, 2)) goto err; } /* execute transaction and verify return code */ g_info << " execute... hangs for 5 seconds\n"; { const NdbOperation *first= pCon->getFirstDefinedOperation(); int check= pCon->execute(Commit, AO_IgnoreError); const NdbError err = pCon->getNdbError(); while (first) { const NdbError &err= first->getNdbError(); g_info << " error " << err.code << endl; first= pCon->getNextCompletedOperation(first); } int expected_commit_res[3]= { 630, 630, 630 }; if (check == -1 || err.code != expected_commit_res[trans_type]) { g_err << "check == " << check << endl; g_err << "got error: " << err.code << " expected: " << expected_commit_res[trans_type] << endl; goto err; } } g_info << " wait node nostart\n"; if (restarter.waitNodesNoStart(&node, 1)) { g_err << " wait node nostart failed\n"; goto err; } /* close transaction */ if (hugoOps.closeTransaction(pNdb)) return NDBT_FAILED; /* commit ack marker pools should be empty */ g_info << " dump pool status\n"; { int dump[255]; dump[0] = 2552; if (restarter.dumpStateAllNodes(dump, 1)) return NDBT_FAILED; } /* wait for cluster to come up again */ g_info << " wait cluster started\n"; if (restarter.startNodes(&node, 1) || restarter.waitNodesStarted(&node, 1)) { g_err << "Cluster failed to start\n"; return NDBT_FAILED; } /* verify data */ g_info << " verifying\n"; switch (trans_type) { case 0: case 1: case 2: /* insert records with ignore error - should have all records */ if (hugoTrans.scanReadRecords(GETNDB(step), records, 0, 64) != 0){ return NDBT_FAILED; } break; default: abort(); } /* cleanup for next round in loop */ g_info << " cleaning\n"; if (hugoTrans.clearTable(GETNDB(step), records)) { return NDBT_FAILED; } continue; err: hugoOps.closeTransaction(pNdb); return NDBT_FAILED; } return NDBT_OK; } int max_cnt(int arr[], int cnt) { int res = 0; for (int i = 0; i res) { res = arr[i]; } } return res; } int runPnr(NDBT_Context* ctx, NDBT_Step* step) { int loops = ctx->getNumLoops(); NdbRestarter res(0, &ctx->m_cluster_connection); bool lcp = ctx->getProperty("LCP", (unsigned)0); int nodegroups[MAX_NDB_NODES]; bzero(nodegroups, sizeof(nodegroups)); for (int i = 0; istopTest(); return NDBT_OK; } } for (int i = 0; iisTestStopped() == false; i++) { if (lcp) { int lcpdump = DumpStateOrd::DihMinTimeBetweenLCP; res.dumpStateAllNodes(&lcpdump, 1); } int ng_copy[MAX_NDB_NODES]; memcpy(ng_copy, nodegroups, sizeof(ng_copy)); Vector nodes; printf("restarting "); while (max_cnt(ng_copy, MAX_NDB_NODES) > 1) { int node = res.getNode(NdbRestarter::NS_RANDOM); int ng = res.getNodeGroup(node); if (ng_copy[ng] > 1) { printf("%u ", node); nodes.push_back(node); ng_copy[ng]--; } } printf("\n"); int val2[] = { DumpStateOrd::CmvmiSetRestartOnErrorInsert, 1 }; for (Uint32 j = 0; jstopTest(); return NDBT_OK; } int runCreateBigTable(NDBT_Context* ctx, NDBT_Step* step) { const char * prefix = ctx->getProperty("PREFIX", ""); NdbDictionary::Table tab = *ctx->getTab(); BaseString tmp; tmp.assfmt("%s_%s", prefix, tab.getName()); tab.setName(tmp.c_str()); NdbDictionary::Dictionary* pDict = GETNDB(step)->getDictionary(); int res = pDict->createTable(tab); if (res) { return NDBT_FAILED; } const NdbDictionary::Table* pTab = pDict->getTable(tmp.c_str()); if (pTab == 0) { return NDBT_FAILED; } int bytes = tab.getRowSizeInBytes(); int size = 50*1024*1024; // 50Mb int rows = size / bytes; if (rows > 1000000) rows = 1000000; ndbout_c("Loading %u rows into %s", rows, tmp.c_str()); Uint64 now = NdbTick_CurrentMillisecond(); HugoTransactions hugoTrans(*pTab); int cnt = 0; do { hugoTrans.loadTableStartFrom(GETNDB(step), cnt, 10000); cnt += 10000; } while (cnt < rows && (NdbTick_CurrentMillisecond() - now) < 30000); //30s ndbout_c("Loaded %u rows in %llums", cnt, NdbTick_CurrentMillisecond() - now); return NDBT_OK; } int runDropBigTable(NDBT_Context* ctx, NDBT_Step* step) { const char * prefix = ctx->getProperty("PREFIX", ""); NdbDictionary::Table tab = *ctx->getTab(); BaseString tmp; tmp.assfmt("%s_%s", prefix, tab.getName()); GETNDB(step)->getDictionary()->dropTable(tmp.c_str()); return NDBT_OK; } int runBug31525(NDBT_Context* ctx, NDBT_Step* step) { //int result = NDBT_OK; //int loops = ctx->getNumLoops(); //int records = ctx->getNumRecords(); //Ndb* pNdb = GETNDB(step); NdbRestarter res; if (res.getNumDbNodes() < 2) { return NDBT_OK; } int nodes[2]; nodes[0] = res.getMasterNodeId(); nodes[1] = res.getNextMasterNodeId(nodes[0]); while (res.getNodeGroup(nodes[0]) != res.getNodeGroup(nodes[1])) { ndbout_c("Restarting %u as it not in same node group as %u", nodes[1], nodes[0]); if (res.restartOneDbNode(nodes[1], false, true, true)) return NDBT_FAILED; if (res.waitNodesNoStart(nodes+1, 1)) return NDBT_FAILED; if (res.startNodes(nodes+1, 1)) return NDBT_FAILED; if (res.waitClusterStarted()) return NDBT_FAILED; nodes[1] = res.getNextMasterNodeId(nodes[0]); } ndbout_c("nodes[0]: %u nodes[1]: %u", nodes[0], nodes[1]); int val = DumpStateOrd::DihMinTimeBetweenLCP; if (res.dumpStateAllNodes(&val, 1)) return NDBT_FAILED; int val2[] = { DumpStateOrd::CmvmiSetRestartOnErrorInsert, 1 }; if (res.dumpStateAllNodes(val2, 2)) return NDBT_FAILED; if (res.insertErrorInAllNodes(932)) return NDBT_FAILED; if (res.insertErrorInNode(nodes[1], 7192)) return NDBT_FAILED; if (res.insertErrorInNode(nodes[0], 7191)) return NDBT_FAILED; if (res.waitClusterNoStart()) return NDBT_FAILED; if (res.startAll()) return NDBT_FAILED; if (res.waitClusterStarted()) return NDBT_FAILED; if (res.restartOneDbNode(nodes[1], false, false, true)) return NDBT_FAILED; if (res.waitClusterStarted()) return NDBT_FAILED; return NDBT_OK; } int runBug31980(NDBT_Context* ctx, NDBT_Step* step) { //int result = NDBT_OK; //int loops = ctx->getNumLoops(); //int records = ctx->getNumRecords(); Ndb* pNdb = GETNDB(step); NdbRestarter res; if (res.getNumDbNodes() < 2) { return NDBT_OK; } HugoOperations hugoOps (* ctx->getTab()); if(hugoOps.startTransaction(pNdb) != 0) return NDBT_FAILED; if(hugoOps.pkInsertRecord(pNdb, 1) != 0) return NDBT_FAILED; if(hugoOps.execute_NoCommit(pNdb) != 0) return NDBT_FAILED; int transNode= hugoOps.getTransaction()->getConnectedNodeId(); int val2[] = { DumpStateOrd::CmvmiSetRestartOnErrorInsert, 1 }; if (res.dumpStateOneNode(transNode, val2, 2)) { return NDBT_FAILED; } if (res.insertErrorInNode(transNode, 8055)) { return NDBT_FAILED; } hugoOps.execute_Commit(pNdb); // This should hang/fail if (res.waitNodesNoStart(&transNode, 1)) return NDBT_FAILED; if (res.startNodes(&transNode, 1)) return NDBT_FAILED; if (res.waitClusterStarted()) return NDBT_FAILED; return NDBT_OK; } int runBug32160(NDBT_Context* ctx, NDBT_Step* step) { //int result = NDBT_OK; //int loops = ctx->getNumLoops(); //int records = ctx->getNumRecords(); //Ndb* pNdb = GETNDB(step); NdbRestarter res; if (res.getNumDbNodes() < 2) { return NDBT_OK; } int master = res.getMasterNodeId(); int next = res.getNextMasterNodeId(master); if (res.insertErrorInNode(next, 7194)) { return NDBT_FAILED; } int val2[] = { DumpStateOrd::CmvmiSetRestartOnErrorInsert, 1 }; if (res.dumpStateOneNode(master, val2, 2)) return NDBT_FAILED; if (res.insertErrorInNode(master, 7193)) return NDBT_FAILED; int val3[] = { 7099 }; if (res.dumpStateOneNode(master, val3, 1)) return NDBT_FAILED; if (res.waitNodesNoStart(&master, 1)) return NDBT_FAILED; if (res.startNodes(&master, 1)) return NDBT_FAILED; if (res.waitClusterStarted()) return NDBT_FAILED; return NDBT_OK; } int runBug32922(NDBT_Context* ctx, NDBT_Step* step) { //int result = NDBT_OK; int loops = ctx->getNumLoops(); //int records = ctx->getNumRecords(); //Ndb* pNdb = GETNDB(step); NdbRestarter res; if (res.getNumDbNodes() < 2) { return NDBT_OK; } while (loops--) { int master = res.getMasterNodeId(); int victim = 32768; for (Uint32 i = 0; i<(Uint32)res.getNumDbNodes(); i++) { int node = res.getDbNodeId(i); if (node != master && node < victim) victim = node; } int val2[] = { DumpStateOrd::CmvmiSetRestartOnErrorInsert, 1 }; if (res.dumpStateOneNode(victim, val2, 2)) return NDBT_FAILED; if (res.insertErrorInNode(master, 7200)) return NDBT_FAILED; if (res.waitNodesNoStart(&victim, 1)) return NDBT_FAILED; if (res.startNodes(&victim, 1)) return NDBT_FAILED; if (res.waitClusterStarted()) return NDBT_FAILED; } return NDBT_OK; } int runBug34216(NDBT_Context* ctx, NDBT_Step* step) { int result = NDBT_OK; int loops = ctx->getNumLoops(); NdbRestarter restarter; int i = 0; int lastId = 0; HugoOperations hugoOps(*ctx->getTab()); int records = ctx->getNumRecords(); Ndb* pNdb = GETNDB(step); if (restarter.getNumDbNodes() < 2) { ctx->stopTest(); return NDBT_OK; } if(restarter.waitClusterStarted() != 0){ g_err << "Cluster failed to start" << endl; return NDBT_FAILED; } #ifdef NDB_USE_GET_ENV char buf[100]; const char * off = NdbEnv_GetEnv("NDB_ERR_OFFSET", buf, sizeof(buf)); #else const char * off = NULL; #endif int offset = off ? atoi(off) : 0; while(iisTestStopped()) { if (i > 0 && ctx->closeToTimeout(100 / loops)) break; int id = lastId % restarter.getNumDbNodes(); int nodeId = restarter.getDbNodeId(id); int err = 5048 + ((i+offset) % 2); int val2[] = { DumpStateOrd::CmvmiSetRestartOnErrorInsert, 1 }; if(hugoOps.startTransaction(pNdb) != 0) goto err; nodeId = hugoOps.getTransaction()->getConnectedNodeId(); ndbout << "Restart node " << nodeId << " " << err < records) rows = records; int batch = 1; int row = (records - rows) ? rand() % (records - rows) : 0; if (row + rows > records) row = records - row; /** * We should really somehow check that one of the 25 rows * resides in the node we're targeting */ for (int r = row; r < row + rows; r++) { if(hugoOps.pkUpdateRecord(pNdb, r, batch, rand()) != 0) goto err; for (int l = 1; l<5; l++) { if (hugoOps.execute_NoCommit(pNdb) != 0) goto err; if(hugoOps.pkUpdateRecord(pNdb, r, batch, rand()) != 0) goto err; } } hugoOps.execute_Commit(pNdb); hugoOps.closeTransaction(pNdb); if (restarter.waitNodesNoStart(&nodeId, 1)) { g_err << "Failed to waitNodesNoStart" << endl; result = NDBT_FAILED; break; } if (restarter.startNodes(&nodeId, 1)) { g_err << "Failed to startNodes" << endl; result = NDBT_FAILED; break; } if(restarter.waitClusterStarted() != 0){ g_err << "Cluster failed to start" << endl; result = NDBT_FAILED; break; } lastId++; i++; } ctx->stopTest(); return result; err: return NDBT_FAILED; } int runNF_commit(NDBT_Context* ctx, NDBT_Step* step) { int result = NDBT_OK; int loops = ctx->getNumLoops(); NdbRestarter restarter(0, &ctx->m_cluster_connection); if (restarter.getNumDbNodes() < 2) { ctx->stopTest(); return NDBT_OK; } if(restarter.waitClusterStarted() != 0){ g_err << "Cluster failed to start" << endl; return NDBT_FAILED; } int i = 0; while(iisTestStopped()) { int nodeId = restarter.getDbNodeId(rand() % restarter.getNumDbNodes()); int err = 5048; ndbout << "Restart node " << nodeId << " " << err <stopTest(); return result; } int runBug34702(NDBT_Context* ctx, NDBT_Step* step) { //int result = NDBT_OK; int loops = ctx->getNumLoops(); //int records = ctx->getNumRecords(); //Ndb* pNdb = GETNDB(step); NdbRestarter res; if (res.getNumDbNodes() < 2) { return NDBT_OK; } /* Account for 3 tests per loop */ loops = (loops + 2) / 3; while (loops > 0) { loops--; for (Uint32 i = 0; i <= 2; i++) { int victim = res.getDbNodeId(rand()%res.getNumDbNodes()); res.restartOneDbNode(victim, /** initial */ true, /** nostart */ true, /** abort */ true); if (res.waitNodesNoStart(&victim, 1)) return NDBT_FAILED; if (i == 0) { res.insertErrorInAllNodes(7204); } else if (i == 1) { res.insertErrorInAllNodes(7245); } else if (i == 2) { res.insertErrorInAllNodes(7246); } res.insertErrorInNode(victim, 7203); res.startNodes(&victim, 1); if (res.waitClusterStarted()) return NDBT_FAILED; } } return NDBT_OK; } int runMNF(NDBT_Context* ctx, NDBT_Step* step) { //int result = NDBT_OK; NdbRestarter res; if (res.getNumDbNodes() < 2) { return NDBT_OK; } Vector part0; // One node per ng Vector part1; // All other nodes Bitmask<255> part0mask; Bitmask<255> part1mask; Bitmask<255> ngmask; for (int i = 0; igetNumLoops(); while (loops-- && !ctx->isTestStopped()) { int cnt, *nodes; int master = res.getMasterNodeId(); int nextMaster = res.getNextMasterNodeId(master); bool obsolete_error = false; bool cmf = false; // true if both master and nextMaster will crash if (part0mask.get(master) && part0mask.get(nextMaster)) { cmf = true; cnt = part0.size(); nodes = part0.getBase(); printf("restarting part0"); } else if(part1mask.get(master) && part1mask.get(nextMaster)) { cmf = true; cnt = part1.size(); nodes = part1.getBase(); printf("restarting part1"); } else { cmf = false; if (loops & 1) { cnt = part0.size(); nodes = part0.getBase(); printf("restarting part0"); } else { cnt = part0.size(); nodes = part0.getBase(); printf("restarting part0"); } } int val2[] = { DumpStateOrd::CmvmiSetRestartOnErrorInsert, 1 }; for (int i = 0; i= 7.4.3, the EMPTY_LCP protocol * tested by case 5 & 6 above has become obsolete. * Thus, the error insert 7206 / 5008 in all nodes * has no effect in case 5 & 6 * (EMPTY_LCP code still kept for backward compat.) * -> Only master node is now killed by error 7193 insert, * and test below now verify that EMPTY_LCP not * being used. * * Test will fail if mixing versions with and * without EMPTY_LCP in use. */ if (obsolete_error) // Error no longer in use, only master will crash { if (res.waitNodesNoStart(&master, 1)) return NDBT_FAILED; if (res.startNodes(&master, 1)) return NDBT_FAILED; } else { if (res.waitNodesNoStart(nodes, cnt)) return NDBT_FAILED; if (res.startNodes(nodes, cnt)) return NDBT_FAILED; } if (res.waitClusterStarted()) return NDBT_FAILED; if (obsolete_error) // Error never cleared nor node restarted { /* * For obsolete error inserts, error is never cleared nor node * restarted. Clearing those here after test case succeeded. */ for (int i = 0; istopTest(); return NDBT_OK; } int runBug36199(NDBT_Context* ctx, NDBT_Step* step) { //int result = NDBT_OK; //int loops = ctx->getNumLoops(); NdbRestarter res; if (res.getNumDbNodes() < 4) return NDBT_OK; int master = res.getMasterNodeId(); int nextMaster = res.getNextMasterNodeId(master); int victim = res.getRandomNodeSameNodeGroup(nextMaster, rand()); if (victim == master) { victim = res.getRandomNodeOtherNodeGroup(nextMaster, rand()); } ndbout_c("master: %u next master: %u victim: %u", master, nextMaster, victim); int val2[] = { DumpStateOrd::CmvmiSetRestartOnErrorInsert, 1 }; res.dumpStateOneNode(master, val2, 2); res.dumpStateOneNode(victim, val2, 2); res.insertErrorInNode(victim, 7205); res.insertErrorInNode(master, 7014); int lcp = 7099; res.dumpStateOneNode(master, &lcp, 1); int nodes[2]; nodes[0] = master; nodes[1] = victim; if (res.waitNodesNoStart(nodes, 2)) { return NDBT_FAILED; } if (res.startNodes(nodes, 2)) { return NDBT_FAILED; } if (res.waitClusterStarted()) return NDBT_FAILED; return NDBT_OK; } int runBug36246(NDBT_Context* ctx, NDBT_Step* step) { //int result = NDBT_OK; //int loops = ctx->getNumLoops(); NdbRestarter res; Ndb* pNdb = GETNDB(step); if (res.getNumDbNodes() < 4) return NDBT_OK; HugoOperations hugoOps(*ctx->getTab()); restartloop: int tryloop = 0; int master = res.getMasterNodeId(); int nextMaster = res.getNextMasterNodeId(master); loop: if(hugoOps.startTransaction(pNdb) != 0) return NDBT_FAILED; if(hugoOps.pkUpdateRecord(pNdb, 1, 1) != 0) return NDBT_FAILED; if(hugoOps.execute_NoCommit(pNdb) != 0) return NDBT_FAILED; int victim = hugoOps.getTransaction()->getConnectedNodeId(); printf("master: %u nextMaster: %u victim: %u", master, nextMaster, victim); if (victim == master || victim == nextMaster || res.getNodeGroup(victim) == res.getNodeGroup(master) || res.getNodeGroup(victim) == res.getNodeGroup(nextMaster)) { hugoOps.execute_Rollback(pNdb); hugoOps.closeTransaction(pNdb); tryloop++; if (tryloop == 10) { ndbout_c(" -> restarting next master: %u", nextMaster); res.restartOneDbNode(nextMaster, /** initial */ false, /** nostart */ true, /** abort */ true); res.waitNodesNoStart(&nextMaster, 1); res.startNodes(&nextMaster, 1); if (res.waitClusterStarted()) return NDBT_FAILED; goto restartloop; } else { ndbout_c(" -> loop"); goto loop; } } ndbout_c(" -> go go gadget skates"); int val2[] = { DumpStateOrd::CmvmiSetRestartOnErrorInsert, 1 }; res.dumpStateOneNode(master, val2, 2); res.dumpStateOneNode(victim, val2, 2); res.insertErrorInNode(master, 8060); res.insertErrorInNode(victim, 9999); int nodes[2]; nodes[0] = master; nodes[1] = victim; if (res.waitNodesNoStart(nodes, 2)) { return NDBT_FAILED; } if (res.startNodes(nodes, 2)) { return NDBT_FAILED; } if (res.waitClusterStarted()) return NDBT_FAILED; hugoOps.execute_Rollback(pNdb); hugoOps.closeTransaction(pNdb); return NDBT_OK; } int runBug36247(NDBT_Context* ctx, NDBT_Step* step) { //int result = NDBT_OK; //int loops = ctx->getNumLoops(); NdbRestarter res; Ndb* pNdb = GETNDB(step); if (res.getNumDbNodes() < 4) return NDBT_OK; HugoOperations hugoOps(*ctx->getTab()); restartloop: int tryloop = 0; int master = res.getMasterNodeId(); int nextMaster = res.getNextMasterNodeId(master); loop: if(hugoOps.startTransaction(pNdb) != 0) return NDBT_FAILED; if(hugoOps.pkUpdateRecord(pNdb, 1, 100) != 0) return NDBT_FAILED; if(hugoOps.execute_NoCommit(pNdb) != 0) return NDBT_FAILED; int victim = hugoOps.getTransaction()->getConnectedNodeId(); printf("master: %u nextMaster: %u victim: %u", master, nextMaster, victim); if (victim == master || victim == nextMaster || res.getNodeGroup(victim) == res.getNodeGroup(master) || res.getNodeGroup(victim) == res.getNodeGroup(nextMaster)) { hugoOps.execute_Rollback(pNdb); hugoOps.closeTransaction(pNdb); tryloop++; if (tryloop == 10) { ndbout_c(" -> restarting next master: %u", nextMaster); res.restartOneDbNode(nextMaster, /** initial */ false, /** nostart */ true, /** abort */ true); res.waitNodesNoStart(&nextMaster, 1); res.startNodes(&nextMaster, 1); if (res.waitClusterStarted()) return NDBT_FAILED; goto restartloop; } else { ndbout_c(" -> loop"); goto loop; } } ndbout_c(" -> go go gadget skates"); int val2[] = { DumpStateOrd::CmvmiSetRestartOnErrorInsert, 1 }; res.dumpStateOneNode(master, val2, 2); res.dumpStateOneNode(victim, val2, 2); int err5050[] = { 5050 }; res.dumpStateAllNodes(err5050, 1); res.insertErrorInNode(victim, 9999); int nodes[2]; nodes[0] = master; nodes[1] = victim; if (res.waitNodesNoStart(nodes, 2)) { return NDBT_FAILED; } if (res.startNodes(nodes, 2)) { return NDBT_FAILED; } if (res.waitClusterStarted()) return NDBT_FAILED; hugoOps.execute_Rollback(pNdb); hugoOps.closeTransaction(pNdb); return NDBT_OK; } int runBug36276(NDBT_Context* ctx, NDBT_Step* step) { /** * This test case was introduced to test the EMPTY_LCP protocol. * This protocol was removed in 7.4, so now this function simply * tests shooting down the master node at the end phases of an LCP. */ //int result = NDBT_OK; //int loops = ctx->getNumLoops(); NdbRestarter res; //Ndb* pNdb = GETNDB(step); if (res.getNumDbNodes() < 4) return NDBT_OK; int master = res.getMasterNodeId(); int nextMaster = res.getNextMasterNodeId(master); int victim = res.getRandomNodeSameNodeGroup(nextMaster, rand()); if (victim == master) { victim = res.getRandomNodeOtherNodeGroup(nextMaster, rand()); } ndbout_c("master: %u nextMaster: %u victim: %u", master, nextMaster, victim); int val2[] = { DumpStateOrd::CmvmiSetRestartOnErrorInsert, 1 }; res.dumpStateOneNode(master, val2, 2); res.insertErrorInNode(victim, 7209); int lcp = 7099; res.dumpStateOneNode(master, &lcp, 1); if (res.waitNodesNoStart(&master, 1)) { return NDBT_FAILED; } if (res.startNodes(&master, 1)) { return NDBT_FAILED; } if (res.waitClusterStarted()) return NDBT_FAILED; return NDBT_OK; } int runBug36245(NDBT_Context* ctx, NDBT_Step* step) { //int result = NDBT_OK; //int loops = ctx->getNumLoops(); NdbRestarter res; Ndb* pNdb = GETNDB(step); if (res.getNumDbNodes() < 4) return NDBT_OK; /** * Make sure master and nextMaster is in different node groups */ loop1: int master = res.getMasterNodeId(); int nextMaster = res.getNextMasterNodeId(master); printf("master: %u nextMaster: %u", master, nextMaster); if (res.getNodeGroup(master) == res.getNodeGroup(nextMaster)) { ndbout_c(" -> restarting next master: %u", nextMaster); res.restartOneDbNode(nextMaster, /** initial */ false, /** nostart */ true, /** abort */ true); res.waitNodesNoStart(&nextMaster, 1); res.startNodes(&nextMaster, 1); if (res.waitClusterStarted()) { ndbout_c("cluster didnt restart!!"); return NDBT_FAILED; } goto loop1; } ndbout_c(" -> go go gadget skates"); int val2[] = { DumpStateOrd::CmvmiSetRestartOnErrorInsert, 1 }; res.dumpStateOneNode(master, val2, 2); res.dumpStateOneNode(nextMaster, val2, 2); res.insertErrorInNode(master, 8063); res.insertErrorInNode(nextMaster, 936); int err = 0; HugoOperations hugoOps(*ctx->getTab()); loop2: if((err = hugoOps.startTransaction(pNdb)) != 0) { ndbout_c("failed to start transaction: %u", err); return NDBT_FAILED; } int victim = hugoOps.getTransaction()->getConnectedNodeId(); if (victim != master) { ndbout_c("transnode: %u != master: %u -> loop", victim, master); hugoOps.closeTransaction(pNdb); goto loop2; } if((err = hugoOps.pkUpdateRecord(pNdb, 1)) != 0) { ndbout_c("failed to update: %u", err); return NDBT_FAILED; } if((err = hugoOps.execute_Commit(pNdb)) != 4010) { ndbout_c("incorrect error code: %u", err); return NDBT_FAILED; } hugoOps.closeTransaction(pNdb); int nodes[2]; nodes[0] = master; nodes[1] = nextMaster; if (res.waitNodesNoStart(nodes, 2)) { return NDBT_FAILED; } if (res.startNodes(nodes, 2)) { return NDBT_FAILED; } if (res.waitClusterStarted()) return NDBT_FAILED; return NDBT_OK; } int runHammer(NDBT_Context* ctx, NDBT_Step* step) { int records = ctx->getNumRecords(); Ndb* pNdb = GETNDB(step); HugoOperations hugoOps(*ctx->getTab()); while (!ctx->isTestStopped()) { int r = rand() % records; if (hugoOps.startTransaction(pNdb) != 0) continue; if ((rand() % 100) < 50) { if (hugoOps.pkUpdateRecord(pNdb, r, 1, rand()) != 0) goto err; } else { if (hugoOps.pkWriteRecord(pNdb, r, 1, rand()) != 0) goto err; } if (hugoOps.execute_NoCommit(pNdb) != 0) goto err; if (hugoOps.pkDeleteRecord(pNdb, r, 1) != 0) goto err; if (hugoOps.execute_NoCommit(pNdb) != 0) goto err; if ((rand() % 100) < 50) { if (hugoOps.pkInsertRecord(pNdb, r, 1, rand()) != 0) goto err; } else { if (hugoOps.pkWriteRecord(pNdb, r, 1, rand()) != 0) goto err; } if ((rand() % 100) < 90) { hugoOps.execute_Commit(pNdb); } else { err: hugoOps.execute_Rollback(pNdb); } hugoOps.closeTransaction(pNdb); } return NDBT_OK; } int runMixedLoad(NDBT_Context* ctx, NDBT_Step* step) { int res = 0; int records = ctx->getNumRecords(); Ndb* pNdb = GETNDB(step); HugoOperations hugoOps(*ctx->getTab()); unsigned id = (unsigned)rand(); while (!ctx->isTestStopped()) { if (ctx->getProperty("Pause", (Uint32)0)) { ndbout_c("thread %u stopped", id); ctx->sync_down("WaitThreads"); while (ctx->getProperty("Pause", (Uint32)0) && !ctx->isTestStopped()) NdbSleep_MilliSleep(15); if (ctx->isTestStopped()) break; ndbout_c("thread %u continue", id); } if ((res = hugoOps.startTransaction(pNdb)) != 0) { if (res == 4009) return NDBT_FAILED; continue; } for (int i = 0; i < 10; i++) { int r = rand() % records; if ((rand() % 100) < 50) { if (hugoOps.pkUpdateRecord(pNdb, r, 1, rand()) != 0) goto err; } else { if (hugoOps.pkWriteRecord(pNdb, r, 1, rand()) != 0) goto err; } } if ((rand() % 100) < 90) { res = hugoOps.execute_Commit(pNdb); } else { err: res = hugoOps.execute_Rollback(pNdb); } hugoOps.closeTransaction(pNdb); if (res == 4009) { return NDBT_FAILED; } } return NDBT_OK; } int runBug41295(NDBT_Context* ctx, NDBT_Step* step) { NdbRestarter res; if (res.getNumDbNodes() < 2) { ctx->stopTest(); return NDBT_OK; } int leak = 4002; const int cases = 1; int loops = ctx->getNumLoops(); if (loops <= cases) loops = cases + 1; for (int i = 0; isetProperty("Pause", 1); ctx->sync_up_and_wait("WaitThreads", ctx->getProperty("Threads", 1)); ndbout_c("all threads paused"); NdbSleep_MilliSleep(5000); res.dumpStateAllNodes(&leak, 1); NdbSleep_MilliSleep(1000); if (res.checkClusterAlive(&next, 1)) { return NDBT_FAILED; } ndbout_c("restarting threads"); ctx->setProperty("Pause", (Uint32)0); ndbout_c("starting %u", next); res.startNodes(&next, 1); ndbout_c("waiting for cluster started"); if (res.waitClusterStarted()) { return NDBT_FAILED; } ndbout_c("pausing all threads"); ctx->setProperty("Pause", 1); ctx->sync_up_and_wait("WaitThreads", ctx->getProperty("Threads", 1)); ndbout_c("all threads paused"); NdbSleep_MilliSleep(5000); res.dumpStateAllNodes(&leak, 1); NdbSleep_MilliSleep(1000); ndbout_c("restarting threads"); ctx->setProperty("Pause", (Uint32)0); } ctx->stopTest(); return NDBT_OK; } int runBug41469(NDBT_Context* ctx, NDBT_Step* step) { NdbRestarter res; if (res.getNumDbNodes() < 4) { ctx->stopTest(); return NDBT_OK; } int loops = ctx->getNumLoops(); int val0[] = { 7216, 0 }; int val2[] = { DumpStateOrd::CmvmiSetRestartOnErrorInsert, 1 }; for (int i = 0; istopTest(); return NDBT_OK; } int runBug42422(NDBT_Context* ctx, NDBT_Step* step) { NdbRestarter res; if (res.getNumDbNodes() < 4) { ctx->stopTest(); return NDBT_OK; } int loops = ctx->getNumLoops(); while (--loops >= 0) { int master = res.getMasterNodeId(); ndbout_c("master: %u", master); int nodeId = res.getRandomNodeSameNodeGroup(master, rand()); ndbout_c("target: %u", nodeId); int node2 = res.getRandomNodeOtherNodeGroup(nodeId, rand()); ndbout_c("node 2: %u", node2); res.restartOneDbNode(nodeId, /** initial */ false, /** nostart */ true, /** abort */ true); res.waitNodesNoStart(&nodeId, 1); int dump[] = { 9000, 0 }; dump[1] = node2; if (res.dumpStateOneNode(nodeId, dump, 2)) return NDBT_FAILED; int val2[] = { DumpStateOrd::CmvmiSetRestartOnErrorInsert, 1 }; if (res.dumpStateOneNode(nodeId, val2, 2)) return NDBT_FAILED; res.insertErrorInNode(nodeId, 937); ndbout_c("%u : starting %u", __LINE__, nodeId); res.startNodes(&nodeId, 1); NdbSleep_SecSleep(3); ndbout_c("%u : waiting for %u to not get not-started", __LINE__, nodeId); res.waitNodesNoStart(&nodeId, 1); ndbout_c("%u : starting %u", __LINE__, nodeId); res.startNodes(&nodeId, 1); ndbout_c("%u : waiting for cluster started", __LINE__); if (res.waitClusterStarted()) { return NDBT_FAILED; } } ctx->stopTest(); return NDBT_OK; } int runBug43224(NDBT_Context* ctx, NDBT_Step* step) { NdbRestarter res; if (res.getNumDbNodes() < 2) { ctx->stopTest(); return NDBT_OK; } int loops = ctx->getNumLoops(); while (--loops >= 0) { int nodeId = res.getNode(NdbRestarter::NS_RANDOM); res.restartOneDbNode(nodeId, /** initial */ false, /** nostart */ true, /** abort */ true); res.waitNodesNoStart(&nodeId, 1); NdbSleep_SecSleep(10); int val2[] = { DumpStateOrd::CmvmiSetRestartOnErrorInsert, 1 }; if (res.dumpStateOneNode(nodeId, val2, 2)) return NDBT_FAILED; res.insertErrorInNode(nodeId, 9994); res.startNodes(&nodeId, 1); NdbSleep_SecSleep(3); ndbout_c("%u : waiting for %u to not get not-started", __LINE__, nodeId); res.waitNodesNoStart(&nodeId, 1); if (res.dumpStateOneNode(nodeId, val2, 2)) return NDBT_FAILED; res.insertErrorInNode(nodeId, 9994); res.startNodes(&nodeId, 1); NdbSleep_SecSleep(3); ndbout_c("%u : waiting for %u to not get not-started", __LINE__, nodeId); res.waitNodesNoStart(&nodeId, 1); NdbSleep_SecSleep(20); // Hardcoded in ndb_mgmd (alloc timeout) ndbout_c("%u : starting %u", __LINE__, nodeId); res.startNodes(&nodeId, 1); ndbout_c("%u : waiting for cluster started", __LINE__); if (res.waitClusterStarted()) { return NDBT_FAILED; } } ctx->stopTest(); return NDBT_OK; } int runBug43888(NDBT_Context* ctx, NDBT_Step* step) { NdbRestarter res; if (res.getNumDbNodes() < 2) { ctx->stopTest(); return NDBT_OK; } int loops = ctx->getNumLoops(); while (--loops >= 0) { int master = res.getMasterNodeId(); ndbout_c("master: %u", master); int nodeId = master; do { nodeId = res.getNode(NdbRestarter::NS_RANDOM); } while (nodeId == master); ndbout_c("target: %u", nodeId); res.restartOneDbNode(nodeId, /** initial */ false, /** nostart */ true, /** abort */ true); res.waitNodesNoStart(&nodeId, 1); int val2[] = { DumpStateOrd::CmvmiSetRestartOnErrorInsert, 1 }; if (res.dumpStateOneNode(nodeId, val2, 2)) return NDBT_FAILED; res.insertErrorInNode(master, 7217); res.startNodes(&nodeId, 1); NdbSleep_SecSleep(3); ndbout_c("%u : waiting for %u to not get not-started", __LINE__, nodeId); res.waitNodesNoStart(&nodeId, 1); ndbout_c("%u : starting %u", __LINE__, nodeId); res.startNodes(&nodeId, 1); ndbout_c("%u : waiting for cluster started", __LINE__); if (res.waitClusterStarted()) { return NDBT_FAILED; } } ctx->stopTest(); return NDBT_OK; } #define CHECK(b, m) { int _xx = b; if (!(_xx)) { \ ndbout << "ERR: "<< m \ << " " << "File: " << __FILE__ \ << " (Line: " << __LINE__ << ")" << "- " << _xx << endl; \ return NDBT_FAILED; } } int runBug44952(NDBT_Context* ctx, NDBT_Step* step) { NdbRestarter res; NdbDictionary::Dictionary* pDict = GETNDB(step)->getDictionary(); const int codes [] = { 5051, 5052, 5053, 0 }; (void)codes; //int randomId = myRandom48(res.getNumDbNodes()); //int nodeId = res.getDbNodeId(randomId); int loops = ctx->getNumLoops(); const int val[] = { DumpStateOrd::CmvmiSetRestartOnErrorInsert, 1 } ; for (int l = 0; l < loops; l++) { int randomId = myRandom48(res.getNumDbNodes()); int nodeId = res.getDbNodeId(randomId); ndbout_c("killing node %u error 5051 loop %u/%u", nodeId, l+1, loops); CHECK(res.dumpStateOneNode(nodeId, val, 2) == 0, "failed to set RestartOnErrorInsert"); CHECK(res.insertErrorInNode(nodeId, 5051) == 0, "failed to insert error 5051"); while (res.waitNodesNoStart(&nodeId, 1, 1 /* seconds */) != 0) { pDict->forceGCPWait(); } ndbout_c("killing node %u during restart error 5052", nodeId); for (int j = 0; j < 3; j++) { ndbout_c("loop: %d - killing node %u during restart error 5052", j, nodeId); int val[] = { DumpStateOrd::CmvmiSetRestartOnErrorInsert, 1 } ; CHECK(res.dumpStateOneNode(nodeId, val, 2) == 0, "failed to set RestartOnErrorInsert"); CHECK(res.insertErrorInNode(nodeId, 5052) == 0, "failed to set error insert"); NdbSleep_SecSleep(3); // ... CHECK(res.startNodes(&nodeId, 1) == 0, "failed to start node"); NdbSleep_SecSleep(3); CHECK(res.waitNodesNoStart(&nodeId, 1) == 0, "waitNodesNoStart failed"); } CHECK(res.startNodes(&nodeId, 1) == 0, "failed to start node"); CHECK(res.waitNodesStarted(&nodeId, 1) == 0, "waitNodesStarted failed"); } ctx->stopTest(); return NDBT_OK; } static BaseString tab_48474; int initBug48474(NDBT_Context* ctx, NDBT_Step* step) { NdbDictionary::Table tab = * ctx->getTab(); NdbDictionary::Dictionary* pDict = GETNDB(step)->getDictionary(); const NdbDictionary::Table * pTab = pDict->getTable(tab.getName()); if (pTab == 0) return NDBT_FAILED; /** * Create a table with tableid > ctx->getTab() */ Uint32 cnt = 0; Vector tables; do { BaseString tmp; tmp.assfmt("%s_%u", tab.getName(), cnt); tab.setName(tmp.c_str()); pDict->dropTable(tab.getName()); if (pDict->createTable(tab) != 0) return NDBT_FAILED; const NdbDictionary::Table * pTab2 = pDict->getTable(tab.getName()); if (pTab2->getObjectId() < pTab->getObjectId()) { tables.push_back(tmp); } else { tab_48474 = tmp; HugoTransactions hugoTrans(* pTab2); if (hugoTrans.loadTable(GETNDB(step), 1000) != 0) { return NDBT_FAILED; } break; } cnt++; } while(true); // Now delete the extra one... for (Uint32 i = 0; idropTable(tables[i].c_str()); } tables.clear(); return NDBT_OK; } int runBug48474(NDBT_Context* ctx, NDBT_Step* step) { NdbRestarter res; NdbDictionary::Dictionary* pDict = GETNDB(step)->getDictionary(); const NdbDictionary::Table * pTab = pDict->getTable(tab_48474.c_str()); Ndb* pNdb = GETNDB(step); HugoOperations hugoOps(* pTab); int nodeId = res.getNode(NdbRestarter::NS_RANDOM); ndbout_c("restarting %d", nodeId); res.restartOneDbNode(nodeId, false, true, true); res.waitNodesNoStart(&nodeId, 1); int minlcp[] = { 7017, 1 }; res.dumpStateAllNodes(minlcp, 1); // Set min time between LCP ndbout_c("starting %d", nodeId); res.startNodes(&nodeId, 1); Uint32 minutes = 5; ndbout_c("starting uncommitted transaction %u minutes", minutes); for (Uint32 m = 0; m < minutes; m++) { if (hugoOps.startTransaction(pNdb) != 0) return NDBT_FAILED; if (hugoOps.pkUpdateRecord(pNdb, 0, 50, rand()) != 0) return NDBT_FAILED; if (hugoOps.execute_NoCommit(pNdb) != 0) return NDBT_FAILED; ndbout_c("sleeping 60s"); for (Uint32 i = 0; i<600 && !ctx->isTestStopped(); i++) { hugoOps.getTransaction()->refresh(); NdbSleep_MilliSleep(100); } if (hugoOps.execute_Commit(pNdb) != 0) return NDBT_FAILED; hugoOps.closeTransaction(pNdb); if (ctx->isTestStopped()) break; } res.dumpStateAllNodes(minlcp, 2); // reset min time between LCP ctx->stopTest(); return NDBT_OK; } int cleanupBug48474(NDBT_Context* ctx, NDBT_Step* step) { NdbDictionary::Dictionary* pDict = GETNDB(step)->getDictionary(); pDict->dropTable(tab_48474.c_str()); return NDBT_OK; } int runBug56044(NDBT_Context* ctx, NDBT_Step* step) { int loops = ctx->getNumLoops(); NdbRestarter res; if (res.getNumDbNodes() < 2) return NDBT_OK; for (int i = 0; igetTab()); hugoTrans.scanUpdateRecords(GETNDB(step), 0); res.insertErrorInNode(node1, 5060); res.startNodes(&node0, 1); NdbSleep_SecSleep(3); res.waitNodesNoStart(&node0, 1); res.insertErrorInNode(node1, 0); res.startNodes(&node0, 1); res.waitClusterStarted(); return NDBT_OK; } int runBug57522(NDBT_Context* ctx, NDBT_Step* step) { int loops = ctx->getNumLoops(); NdbRestarter res; if (res.getNumDbNodes() < 4) return NDBT_OK; for (int i = 0; i group1; Vector group2; Bitmask<256/32> nodeGroupMap; for (int j = 0; jgetTab()); int loops = ctx->getNumLoops(); while (loops--) { if (hugoOps.startTransaction(pNdb) != 0) return NDBT_FAILED; if (hugoOps.pkInsertRecord(pNdb, 0, 128 /* records */) != 0) return NDBT_FAILED; int err = 5062; switch(loops & 1){ case 0: err = 5062; break; case 1: err = 5063; break; } int node = (int)hugoOps.getTransaction()->getConnectedNodeId(); int node0 = res.getRandomNodeOtherNodeGroup(node, rand()); int node1 = res.getRandomNodeSameNodeGroup(node0, rand()); ndbout_c("node %u err: %u, node: %u err: %u", node0, 5061, node1, err); int val2[] = { DumpStateOrd::CmvmiSetRestartOnErrorInsert, 1 }; res.dumpStateOneNode(node, val2, 2); res.insertErrorInNode(node0, 5061); res.insertErrorInNode(node1, err); hugoOps.execute_Commit(pNdb); hugoOps.closeTransaction(pNdb); res.waitNodesNoStart(&node, 1); res.startNodes(&node, 1); res.waitClusterStarted(); hugoOps.clearTable(pNdb); } return NDBT_OK; } int runRestartToDynamicOrder(NDBT_Context* ctx, NDBT_Step* step) { /* Here we perform node restarts to get the various node's * dynamic ids in a particular order * This affects which nodes heartbeat which (low -> high) * and which is the president (lowest). * Each restarting node gets a higher dynamic id, so the * first node restarted will eventually become president * Note that we're assuming NoOfReplicas == 2 here. */ /* TODO : * Refactor into * 1) Get current cluster dynorder info * 2) Choose a dynorder info * 3) Restart to given dynorder if necessary */ Uint32 dynOrder = ctx->getProperty("DynamicOrder", Uint32(0)); NdbRestarter restarter; Uint32 numNodes = restarter.getNumDbNodes(); Vector currOrder; Vector newOrder; Vector odds; Vector evens; if (numNodes == 2) { ndbout_c("No Dynamic reordering possible with 2 nodes"); return NDBT_OK; } if (numNodes & 1) { ndbout_c("Non multiple-of-2 number of nodes. Not supported"); return NDBT_FAILED; } Uint32 master = restarter.getMasterNodeId(); for (Uint32 n=0; n < numNodes; n++) { currOrder.push_back(master); master = restarter.getNextMasterNodeId(master); } for (Uint32 n=0; n < numNodes; n++) { Uint32 nodeId = restarter.getDbNodeId(n); if (nodeId & 1) { odds.push_back(nodeId); } else { evens.push_back(nodeId); } } if (odds.size() != evens.size()) { ndbout_c("Failed - odds.size() (%u) != evens.size() (%u)", odds.size(), evens.size()); return NDBT_FAILED; } ndbout_c("Current dynamic ordering : "); for (Uint32 n=0; n4->6->3->5->7 * 2) Odd first, no interleave, no reverse * e.g. 3->5->7->2->4->6 * 3) Even first, interleave, no reverse * e.g. 2->3->4->5->6->7 * 9) Even first, no interleave, reverse B * e.g. 2->4->6->7->5->3 * * 'First' node becomes president. * Which node(s) monitor president affects when * arbitration may be required */ ndbout_c("Generating ordering with %s president, sides %sinterleaved", (oddPresident?"odd": "even"), (interleave?"":"not ")); if (reverseSideA) ndbout_c(" %s reversed", (oddPresident?"odds": "evens")); if (reverseSideB) ndbout_c(" %s reversed", (oddPresident?"evens": "odds")); Vector* sideA; Vector* sideB; if (oddPresident) { sideA = &odds; sideB = &evens; } else { sideA = &evens; sideB = &odds; } if (interleave) { for (Uint32 n=0; n < sideA->size(); n++) { Uint32 indexA = reverseSideA? (sideA->size() - (n+1)) : n; newOrder.push_back((*sideA)[indexA]); Uint32 indexB = reverseSideB? (sideB->size() - (n+1)) : n; newOrder.push_back((*sideB)[indexB]); } } else { for (Uint32 n=0; n < sideA->size(); n++) { Uint32 indexA = reverseSideA? (sideA->size() - (n+1)) : n; newOrder.push_back((*sideA)[indexA]); } for (Uint32 n=0; n < sideB->size(); n++) { Uint32 indexB = reverseSideB? (sideB->size() - (n+1)) : n; newOrder.push_back((*sideB)[indexB]); } } bool diff = false; for (Uint32 n=0; n < newOrder.size(); n++) { ndbout_c(" %u %s", newOrder[n], ((n==0)?"*":" ")); diff |= (newOrder[n] != currOrder[n]); } if (!diff) { ndbout_c("Cluster already in correct configuration"); return NDBT_OK; } for (Uint32 n=0; n < newOrder.size(); n++) { ndbout_c("Now restarting node %u", newOrder[n]); if (restarter.restartOneDbNode(newOrder[n], false, // initial true, // nostart true) // abort != NDBT_OK) { ndbout_c("Failed to restart node"); return NDBT_FAILED; } if (restarter.waitNodesNoStart((const int*) &newOrder[n], 1) != NDBT_OK) { ndbout_c("Failed waiting for node to enter NOSTART state"); return NDBT_FAILED; } if (restarter.startNodes((const int*) &newOrder[n], 1) != NDBT_OK) { ndbout_c("Failed to start node"); return NDBT_FAILED; } if (restarter.waitNodesStarted((const int*) &newOrder[n], 1) != NDBT_OK) { ndbout_c("Failed waiting for node to start"); return NDBT_FAILED; } ndbout_c(" Done."); } ndbout_c("All restarts completed. NdbRestarter says master is %u", restarter.getMasterNodeId()); if (restarter.getMasterNodeId() != (int) newOrder[0]) { ndbout_c(" Should be %u, failing", newOrder[0]); return NDBT_FAILED; } return NDBT_OK; } struct NodeGroupMembers { Uint32 ngid; Uint32 membCount; Uint32 members[4]; }; template class Vector; int analyseDynamicOrder(NDBT_Context* ctx, NDBT_Step* step) { NdbRestarter restarter; Uint32 numNodes = restarter.getNumDbNodes(); Uint32 master = restarter.getMasterNodeId(); Vector dynamicOrder; Vector nodeGroup; Vector monitorsNode; Vector monitoredByNode; Vector monitorsRemote; Vector remoteMonitored; Vector sameNGMonitored; Vector distanceToRemote; Vector nodeIdToDynamicIndex; Uint32 maxDistanceToRemoteLink = 0; /* TODO : * Refactor into : * 1) Determine dynorder from running cluster * 2) Analyse dynorder in general * 3) Analyse dynorder from point of view of latency split * * 4) Support splits other than odd/even total * - Partial split * - Some link failures */ /* Determine dynamic order from running cluster */ for (Uint32 n=0; n < numNodes; n++) { dynamicOrder.push_back(master); nodeGroup.push_back(restarter.getNodeGroup(master)); Uint32 zero=0; nodeIdToDynamicIndex.set(n, master, zero); master = restarter.getNextMasterNodeId(master); } /* Look at implied HB links */ for (Uint32 n=0; n < numNodes; n++) { Uint32 nodeId = dynamicOrder[n]; Uint32 monitoredByIndex = (n+1) % numNodes; Uint32 monitorsIndex = (n+ numNodes - 1) % numNodes; monitoredByNode.push_back(dynamicOrder[monitoredByIndex]); monitorsNode.push_back(dynamicOrder[monitorsIndex]); remoteMonitored.push_back((nodeId & 1) != (monitoredByNode[n] & 1)); monitorsRemote.push_back((nodeId & 1) != (monitorsNode[n] & 1)); sameNGMonitored.push_back(nodeGroup[n] == nodeGroup[monitoredByIndex]); } /* Look at split implications */ for (Uint32 n=0; n < numNodes; n++) { Uint32 distanceToRemoteHBLink = 0; for (Uint32 m=0; m < numNodes; m++) { if (remoteMonitored[n+m]) break; distanceToRemoteHBLink++; } distanceToRemote.push_back(distanceToRemoteHBLink); maxDistanceToRemoteLink = MAX(maxDistanceToRemoteLink, distanceToRemoteHBLink); } ndbout_c("Dynamic order analysis"); for (Uint32 n=0; n < numNodes; n++) { ndbout_c(" %u %s %u%s%u%s%u \t Monitored by %s nodegroup, Dist to remote link : %u", dynamicOrder[n], ((n==0)?"*":" "), monitorsNode[n], ((monitorsRemote[n])?" >":"-->"), dynamicOrder[n], ((remoteMonitored[n])?" >":"-->"), monitoredByNode[n], ((sameNGMonitored[n])?"same":"other"), distanceToRemote[n]); } ndbout_c("\n"); Vector nodeGroupMembers; for (Uint32 n=0; n < numNodes; n++) { Uint32 ng = nodeGroup[n]; bool ngfound = false; for (Uint32 m = 0; m < nodeGroupMembers.size(); m++) { if (nodeGroupMembers[m].ngid == ng) { NodeGroupMembers& ngInfo = nodeGroupMembers[m]; ngInfo.members[ngInfo.membCount++] = dynamicOrder[n]; ngfound = true; break; } } if (!ngfound) { NodeGroupMembers newGroupInfo; newGroupInfo.ngid = ng; newGroupInfo.membCount = 1; newGroupInfo.members[0] = dynamicOrder[n]; nodeGroupMembers.push_back(newGroupInfo); } } ndbout_c("Nodegroups"); for (Uint32 n=0; n < nodeGroupMembers.size(); n++) { ndbout << " " << nodeGroupMembers[n].ngid << " ("; bool allRemoteMonitored = true; for (Uint32 m=0; m < nodeGroupMembers[n].membCount; m++) { Uint32 nodeId = nodeGroupMembers[n].members[m]; ndbout << nodeId; if ((m+1) < nodeGroupMembers[n].membCount) ndbout << ","; Uint32 dynamicIndex = nodeIdToDynamicIndex[nodeId]; allRemoteMonitored &= remoteMonitored[dynamicIndex]; } ndbout << ") Entirely remote monitored NGs risk : " << (allRemoteMonitored?"Y":"N") << "\n"; } ndbout_c("\n"); ndbout_c("Cluster-split latency behaviour"); Uint32 oddPresident = dynamicOrder[0]; Uint32 evenPresident = dynamicOrder[0]; for (Uint32 n=0; n <= maxDistanceToRemoteLink; n++) { Vector failedNodeGroups; ndbout << " " << n <<" HB latency period(s), nodes ("; bool useComma = false; bool presidentFailed = false; for (Uint32 m=0; m < numNodes; m++) { if (distanceToRemote[m] == n) { Uint32 failingNodeId = dynamicOrder[m]; if (useComma) ndbout << ","; useComma = true; ndbout << failingNodeId; if ((failingNodeId == evenPresident) || (failingNodeId == oddPresident)) { ndbout << "*"; presidentFailed = true; } { Uint32 ng = nodeGroup[m]; for (Uint32 i=0; i< nodeGroupMembers.size(); i++) { if (nodeGroupMembers[i].ngid == ng) { if ((--nodeGroupMembers[i].membCount) == 0) { failedNodeGroups.push_back(ng); } } } } } } ndbout << ") will be declared failed." << endl; if (failedNodeGroups.size() != 0) { ndbout << " NG failure risk on reconnect for nodegroups : "; for (Uint32 i=0; i< failedNodeGroups.size(); i++) { if (i > 0) ndbout << ","; ndbout << failedNodeGroups[i]; } ndbout << endl; } if (presidentFailed) { /* A president (even/odd/both) has failed, we should * calculate the new president(s) from the p.o.v. * of both sides */ Uint32 newOdd=0; Uint32 newEven=0; for (Uint32 i=0; i< numNodes; i++) { /* Each side finds either the first node on their * side, or the first node on the other side which * is still 'alive' from their point of view */ bool candidateIsOdd = dynamicOrder[i] & 1; if (!newOdd) { if (candidateIsOdd || (distanceToRemote[i] > n)) { newOdd = dynamicOrder[i]; } } if (!newEven) { if ((!candidateIsOdd) || (distanceToRemote[i] > n)) { newEven = dynamicOrder[i]; } } } bool oddPresidentFailed = (oddPresident != newOdd); bool evenPresidentFailed = (evenPresident != newEven); if (oddPresidentFailed) { ndbout_c(" Odd president (%u) failed, new odd president : %u", oddPresident, newOdd); oddPresident = newOdd; } if (evenPresidentFailed) { ndbout_c(" Even president (%u) failed, new even president : %u", evenPresident, newEven); evenPresident = newEven; } if (oddPresident != evenPresident) { ndbout_c(" President role duplicated, Odd (%u), Even (%u)", oddPresident, evenPresident); } } } ndbout << endl << endl; return NDBT_OK; } int runSplitLatency25PctFail(NDBT_Context* ctx, NDBT_Step* step) { /* Use dump commands to inject artificial inter-node latency * Use an error insert to cause latency to disappear when * a node observes > 25% of nodes failed. * This should trigger a race of FAIL_REQs from both sides * of the cluster, and can result in cluster failure */ NdbRestarter restarter; /* * First set the ConnectCheckIntervalDelay to 1500 */ { int dump[] = { 9994, 1500 }; restarter.dumpStateAllNodes(dump, 2); } { int val2[] = { DumpStateOrd::CmvmiSetRestartOnErrorInsert, 1 }; restarter.dumpStateAllNodes(val2, 2); } /* First the error insert which will drop latency (QMGR) */ restarter.insertErrorInAllNodes(938); /* Now the dump code which causes the system to experience * latency along odd/even lines (CMVMI) * */ int dumpStateArgs[] = {9990, 1}; restarter.dumpStateAllNodes(dumpStateArgs, 2); /** * Now wait for half of cluster to die... */ const int node_count = restarter.getNumDbNodes(); ndbout_c("Waiting for half of cluster (%u/%u) to die", node_count/2, node_count); int not_started = 0; do { not_started = 0; for (int i = 0; i < node_count; i++) { int nodeId = restarter.getDbNodeId(i); int status = restarter.getNodeStatus(nodeId); ndbout_c("Node %u status %u", nodeId, status); if (status == NDB_MGM_NODE_STATUS_NOT_STARTED) not_started++; } NdbSleep_MilliSleep(2000); ndbout_c("%u / %u in state NDB_MGM_NODE_STATUS_NOT_STARTED(%u)", not_started, node_count, NDB_MGM_NODE_STATUS_NOT_STARTED); } while (2 * not_started != node_count); ndbout_c("Restarting cluster"); if (restarter.restartAll(false, true, true)) return NDBT_FAILED; ndbout_c("Waiting cluster not started"); if (restarter.waitClusterNoStart()) return NDBT_FAILED; ndbout_c("Starting"); if (restarter.startAll()) return NDBT_FAILED; if (restarter.waitClusterStarted()) return NDBT_FAILED; return NDBT_OK; } /* The purpose of this test is to check that a node failure is not misdiagnosed as a GCP stop. In other words, the timeout set to detect GCP stop must not be set so low that they are triggered before a cascading node failure has been detected. The test isolates the master node. This causes the master node to wait for the heartbeat from each of the other nodes to time out. Note that this happens sequentially for each node. Finally, the master is forced to run an arbitration (by using an error insert). The total time needed to detect the node failures is thus: (no_of_nodes - 1) * heartbeat_failure_time + arbitration_time The test then verifies that the node failed due to detcting that is was isolated and not due to GCP stop. */ int runIsolateMaster(NDBT_Context* ctx, NDBT_Step* step) { NdbRestarter restarter; const int nodeCount = restarter.getNumDbNodes(); if (nodeCount < 4) { /* With just two nodes, the isolated master wins the arbitration and the test would behave very differently. This case is not covered. */ g_err << "At least four data nodes required to run test." << endl; return NDBT_OK; } const int masterId = restarter.getMasterNodeId(); g_err << "Inserting errors 943 and 7145 in node " << masterId << endl; /* There is a corresponding CRASH_INSERTION(943), so the node will be restarted if it crashes due to being isolated from other nodes. If it crashes due to GCP stop, however, it will remain down. In addition, the 943 error insert forces the master to run an arbitration that times out, even if it is isolated. */ restarter.insertErrorInNode(masterId, 943); /* This error inserts sets the GCP stop and micro GCP timeouts to their minimal value, i.e. only the maximal time needed to detect node failure. That way, the test verifies the latter value is not set to low. */ restarter.insertErrorInNode(masterId, 7145); /* Block signals between the master node and all other nodes. The master will wait for heartbeats from other nodes to time out, sequentially for each node. Finally, the master should decide that it cannot form a viable cluster and stop itself. */ for (int i = 0; i < nodeCount; i++) { if (restarter.getDbNodeId(i) != masterId) { // Block signals from master node. g_err << "Blocking node " << restarter.getDbNodeId(i) << " for signals from node " << masterId << endl; const int dumpStateArgs[] = {9992, masterId}; int res = restarter .dumpStateOneNode(restarter.getDbNodeId(i), dumpStateArgs, 2); (void) res; // Prevent compiler warning. assert(res == 0); // Block signals to master node. g_err << "Blocking node " << masterId << " for signals from node " << restarter.getDbNodeId(i) << endl; const int dumpStateArgs2[] = {9992, restarter.getDbNodeId(i)}; res = restarter.dumpStateOneNode(masterId, dumpStateArgs2, 2); (void) res; // Prevent compiler warning. assert(res == 0); } } g_err << "Waiting for node " << masterId << " to restart " << endl; g_info << "Subscribing to MGMD events..." << endl; NdbMgmd mgmd; if (!mgmd.connect()) { g_err << "Failed to connect to MGMD" << endl; return NDBT_FAILED; } if (!mgmd.subscribe_to_events()) { g_err << "Failed to subscribe to events" << endl; return NDBT_FAILED; } char restartEventMsg[200]; // This is the message we expect to see when the master restarts. sprintf(restartEventMsg, "Node %d: Node shutdown completed, restarting.", masterId); const NDB_TICKS start = NdbTick_getCurrentTicks(); while (true) { char buff[1000]; if (mgmd.get_next_event_line(buff, sizeof(buff), 5 * 1000) && strstr(buff, restartEventMsg) != NULL) { g_err << "Node " << masterId << " restarting." << endl; break; } g_info << "Mgmd event: " << buff << endl; if (NdbTick_Elapsed(start, NdbTick_getCurrentTicks()).seconds() > 100) { g_err << "Waited 100 seconds for master to restart." << endl; return NDBT_FAILED; } } /* Now unblock outgoing signals from the master. Signals to the master will be unblocked automatically as it restarts. */ for (int i = 0; i < nodeCount; i++) { if (restarter.getDbNodeId(i) != masterId) { g_err << "Unblocking node " << restarter.getDbNodeId(i) << " for signals from node " << masterId << endl; const int dumpStateArgs[] = {9993, masterId}; int res = restarter .dumpStateOneNode(restarter.getDbNodeId(i),dumpStateArgs, 2); (void) res; // Prevent compiler warning. assert(res == 0); } } g_err << "Waiting for node " << masterId << " to come back up again." << endl; if(restarter.waitClusterStarted()==0) { // All nodes are up. return NDBT_OK; } else { g_err << "Failed to restart master node!" << endl; return NDBT_FAILED; } } int runMasterFailSlowLCP(NDBT_Context* ctx, NDBT_Step* step) { /* Motivated by bug# 13323589 */ NdbRestarter res; if (res.getNumDbNodes() < 4) { return NDBT_OK; } int master = res.getMasterNodeId(); int otherVictim = res.getRandomNodeOtherNodeGroup(master, rand()); int nextMaster = res.getNextMasterNodeId(master); nextMaster = (nextMaster == otherVictim) ? res.getNextMasterNodeId(otherVictim) : nextMaster; require(nextMaster != master); require(nextMaster != otherVictim); /* Get a node which is not current or next master */ int slowNode= nextMaster; while ((slowNode == nextMaster) || (slowNode == otherVictim) || (slowNode == master)) { slowNode = res.getRandomNotMasterNodeId(rand()); } ndbout_c("master: %d otherVictim : %d nextMaster: %d slowNode: %d", master, otherVictim, nextMaster, slowNode); /* Steps : * 1. Insert slow LCP frag error in slowNode * 2. Start LCP * 3. Wait for LCP to start * 4. Kill at least two nodes including Master * 5. Wait for killed nodes to attempt to rejoin * 6. Remove slow LCP error * 7. Allow system to stabilise + check no errors */ // 5073 = Delay on handling BACKUP_FRAGMENT_CONF in LQH if (res.insertErrorInNode(slowNode, 5073)) { return NDBT_FAILED; } { int req[1] = {DumpStateOrd::DihStartLcpImmediately}; if (res.dumpStateOneNode(master, req, 1)) { return NDBT_FAILED; } } ndbout_c("Giving LCP time to start..."); NdbSleep_SecSleep(10); ndbout_c("Killing other victim node (%u)...", otherVictim); if (res.restartOneDbNode(otherVictim, false, false, true)) { return NDBT_FAILED; } ndbout_c("Killing Master node (%u)...", master); if (res.restartOneDbNode(master, false, false, true)) { return NDBT_FAILED; } /* ndbout_c("Waiting for old Master node to enter NoStart state..."); if (res.waitNodesNoStart(&master, 1, 10)) return NDBT_FAILED; ndbout_c("Starting old Master..."); if (res.startNodes(&master, 1)) return NDBT_FAILED; */ ndbout_c("Waiting for some progress on old Master and other victim restart"); NdbSleep_SecSleep(15); ndbout_c("Now removing error insert on slow node (%u)", slowNode); if (res.insertErrorInNode(slowNode, 0)) { return NDBT_FAILED; } ndbout_c("Now wait a while to check stability..."); NdbSleep_SecSleep(30); if (res.getNodeStatus(master) == NDB_MGM_NODE_STATUS_NOT_STARTED) { ndbout_c("Old Master needs kick to restart"); if (res.startNodes(&master, 1)) { return NDBT_FAILED; } } ndbout_c("Wait for cluster recovery..."); if (res.waitClusterStarted()) { return NDBT_FAILED; } ndbout_c("Done"); return NDBT_OK; } /* Check that create big table and delete rows followed by node restart does not leak memory. See bugs, Bug #18683398 MEMORY LEAK DURING ROLLING RESTART Bug #18731008 NDB : AVOID MAPPING EMPTY PAGES DUE TO DELETES DURING NR */ int runDeleteRestart(NDBT_Context* ctx, NDBT_Step* step) { NdbRestarter res; NdbDictionary::Dictionary* pDict = GETNDB(step)->getDictionary(); if (runCreateBigTable(ctx, step) != NDBT_OK) { return NDBT_FAILED; } res.getNumDbNodes(); // will force it to connect... /** * Get memory usage */ struct ndb_mgm_events * time0 = ndb_mgm_dump_events(res.handle, NDB_LE_MemoryUsage, 0, 0); if (!time0) { ndbout_c("ERROR: failed to fetch report!"); return NDBT_FAILED;; } printf("memory usage:\n"); Uint32 t0_minpages = ~Uint32(0); Uint32 t0_maxpages = 0; for (int i = 0; i < time0->no_of_events; i++) { if (time0->events[i].MemoryUsage.block != DBTUP) continue; printf("node %u pages: %u\n", time0->events[i].source_nodeid, time0->events[i].MemoryUsage.pages_used); if (time0->events[i].MemoryUsage.pages_used < t0_minpages) t0_minpages = time0->events[i].MemoryUsage.pages_used; if (time0->events[i].MemoryUsage.pages_used > t0_maxpages) t0_maxpages = time0->events[i].MemoryUsage.pages_used; } /** * Stop one node */ int node = res.getNode(NdbRestarter::NS_RANDOM); ndbout_c("node: %d", node); if (res.restartOneDbNode(node, /** initial */ false, /** nostart */ true, /** abort */ true)) return NDBT_FAILED; if (res.waitNodesNoStart(&node, 1)) return NDBT_FAILED; /** * Then clear table it... */ { BaseString name; name.assfmt("_%s", ctx->getTab()->getName()); const NdbDictionary::Table * pTab = pDict->getTable(name.c_str()); UtilTransactions trans(* pTab); trans.clearTable(GETNDB(step)); } /** * Create a new big table... */ ctx->setProperty("PREFIX", "2"); if (runCreateBigTable(ctx, step) != NDBT_OK) return NDBT_FAILED; /** * Then start node */ res.startNodes(&node, 1); res.waitClusterStarted(); /** * Get memory usage */ struct ndb_mgm_events * time1 = ndb_mgm_dump_events(res.handle, NDB_LE_MemoryUsage, 0, 0); if (!time1) { ndbout_c("ERROR: failed to fetch report!"); return NDBT_FAILED;; } printf("memory usage:\n"); Uint32 t1_minpages = ~Uint32(0); Uint32 t1_maxpages = 0; for (int i = 0; i < time1->no_of_events; i++) { if (time1->events[i].MemoryUsage.block != DBTUP) continue; printf("node %u pages: %u\n", time1->events[i].source_nodeid, time1->events[i].MemoryUsage.pages_used); if (time1->events[i].MemoryUsage.pages_used < t1_minpages) t1_minpages = time1->events[i].MemoryUsage.pages_used; if (time1->events[i].MemoryUsage.pages_used > t1_maxpages) t1_maxpages = time1->events[i].MemoryUsage.pages_used; } { // Drop table 1 BaseString name; name.assfmt("_%s", ctx->getTab()->getName()); pDict->dropTable(name.c_str()); } { // Drop table 2 BaseString name; name.assfmt("2_%s", ctx->getTab()->getName()); pDict->dropTable(name.c_str()); } /** * Verification... * each node should have roughly the same now as before */ bool ok = true; int maxpctdiff = 10; for (int i = 0; i < time0->no_of_events; i++) { if (time0->events[i].MemoryUsage.block != DBTUP) continue; unsigned node = time0->events[i].source_nodeid; for (int j = 0; j < time1->no_of_events; j++) { if (time1->events[j].MemoryUsage.block != DBTUP) continue; if (time1->events[j].source_nodeid != node) continue; int diff = time0->events[i].MemoryUsage.pages_used - time1->events[j].MemoryUsage.pages_used; if (diff < 0) diff = -diff; int diffpct = (100 * diff) / time0->events[i].MemoryUsage.pages_used; ndbout_c("node %u pages %u - %u => diff pct: %u%% (max: %u) => %s", node, time0->events[i].MemoryUsage.pages_used, time1->events[j].MemoryUsage.pages_used, diffpct, maxpctdiff, diffpct <= maxpctdiff ? "OK" : "FAIL"); if (diffpct > maxpctdiff) ok = false; break; } } free(time0); free(time1); return ok ? NDBT_OK : NDBT_FAILED; } int master_err[] = { 7025, // LCP_FRG_REP in DIH 5056, // LCP complete rep from LQH 7191, // execLCP_COMPLETE_REP in DIH 7015, // execSTART_LCP_CONF in DIH 0 }; static struct { int errnum; bool obsolete; } other_err[] = { {7205, false}, // execMASTER_LCPREQ {7206, true}, // execEMPTY_LCP_CONF (not in use since 7.4.3) {7230, false}, // sendMASTER_LCPCONF and die {7232, false}, // Die after sending MASTER_LCPCONF {0, false}, }; int runLCPTakeOver(NDBT_Context* ctx, NDBT_Step* step) { { NdbRestarter res; if (res.getNumDbNodes() < 4) { ctx->stopTest(); return NDBT_OK; } } for (int i = 0; master_err[i] != 0; i++) { int errno1 = master_err[i]; for (int j = 0; other_err[j].errnum != 0; j++) { int errno2 = other_err[j].errnum; bool only_master_crash = other_err[j].obsolete; /** * we want to kill master, * and kill another node during LCP take-ove (not new master) */ NdbRestarter res; int master = res.getMasterNodeId(); int next = res.getNextMasterNodeId(master); loop: int victim = res.getRandomNodeOtherNodeGroup(master, rand()); while (next == victim) goto loop; ndbout_c("master: %u next: %u victim: %u master-err: %u victim-err: %u", master, next, victim, errno1, errno2); int val2[] = { DumpStateOrd::CmvmiSetRestartOnErrorInsert, 1 }; res.dumpStateOneNode(master, val2, 2); res.dumpStateOneNode(victim, val2, 2); res.insertErrorInNode(next, 7233); res.insertErrorInNode(victim, errno2); res.insertErrorInNode(master, errno1); int val1[] = { 7099 }; res.dumpStateOneNode(master, val1, 1); int list[] = { master, victim }; int cnt = NDB_ARRAY_SIZE(list); if (only_master_crash) { cnt = 1; } if (res.waitNodesNoStart(list, cnt)) { return NDBT_FAILED; } if (res.startNodes(list, cnt)) { return NDBT_FAILED; } if (res.waitClusterStarted()) { return NDBT_FAILED; } if (only_master_crash) { /* * Error set in victim should never be reached, so it will not * be cleared, nor node restarted. Clearing error here after * test case succeeded. */ res.insertErrorInNode(victim, 0); } } } ctx->stopTest(); return NDBT_OK; } int runBug16007980(NDBT_Context* ctx, NDBT_Step* step) { NdbRestarter res; if (res.getNumDbNodes() < 4) { g_err << "Insufficient nodes for test." << endl; ctx->stopTest(); return NDBT_OK; } int loops = ctx->getNumLoops(); for (int i = 0; i < loops; i++) { int master = res.getMasterNodeId(); int node1 = res.getRandomNodeSameNodeGroup(master, rand()); int node2 = res.getRandomNodeOtherNodeGroup(master, rand()); ndbout_c("master: %u node1: %u node2: %u", master, node1, node2); ndbout_c("restart node %u nostart", node2); res.restartNodes(&node2, 1, NdbRestarter::NRRF_NOSTART | NdbRestarter::NRRF_ABORT); CHECK(res.waitNodesNoStart(&node2, 1) == 0, ""); ndbout_c("prepare node %u to crash while node %u is starting", node1, node2); ndbout_c("dump/error insert 939 into node %u", node1); int dump[] = { 939, node2 }; res.dumpStateOneNode(node1, dump, NDB_ARRAY_SIZE(dump)); ndbout_c("error insert 940 into node %u", node1); res.insertErrorInNode(node1, 940); int val2[] = { DumpStateOrd::CmvmiSetRestartOnErrorInsert, 1 }; res.dumpStateOneNode(node1, val2, 2); res.insertErrorInNode(node2, 932); // Expect node 2 to crash with error 932 res.dumpStateOneNode(node2, val2, 2); ndbout_c("starting node %u", node2); res.startNodes(&node2, 1); /** * Now both should have failed! */ int list[] = { node1, node2 }; ndbout_c("waiting for node %u and %u nostart", node1, node2); CHECK(res.waitNodesNoStart(list, NDB_ARRAY_SIZE(list)) == 0, ""); ndbout_c("starting %u and %u", node1, node2); res.startNodes(list, NDB_ARRAY_SIZE(list)); ndbout_c("wait cluster started"); CHECK(res.waitClusterStarted() == 0, ""); } return NDBT_OK; } int runTestScanFragWatchdog(NDBT_Context* ctx, NDBT_Step* step) { /* Setup an error insert, then start a checkpoint */ NdbRestarter restarter; if (restarter.getNumDbNodes() < 2) { g_err << "Insufficient nodes for test." << endl; ctx->stopTest(); return NDBT_OK; } do { g_err << "Injecting fault to suspend LCP frag scan..." << endl; Uint32 victim = restarter.getNode(NdbRestarter::NS_RANDOM); Uint32 otherNode = 0; do { otherNode = restarter.getNode(NdbRestarter::NS_RANDOM); } while (otherNode == victim); // Setting 'RestartOnErrorInsert = 2' will auto restart 'victim' int val2[] = { DumpStateOrd::CmvmiSetRestartOnErrorInsert, 2}; if (restarter.dumpStateOneNode(victim, val2, 2) != 0) { g_err << "Failed setting dump state 'RestartOnErrorInsert'" << endl; break; } if (restarter.insertErrorInNode(victim, 10039) != 0) /* Cause LCP/backup frag scan to halt */ { g_err << "Error insert failed." << endl; break; } if (ctx->getProperty("WatchdogKillFail", Uint32(0))) { if (restarter.insertErrorInNode(victim, 5086) != 0) /* Disable watchdog kill */ { g_err << "Error insert failed." << endl; break; } if (restarter.insertErrorInNode(victim, 942) != 0) /* Disable self-kill via Isolation */ { g_err << "Error insert failed." << endl; break; } /* Can only be killed by others disconnecting me */ } else { if (restarter.insertErrorInNode(victim, 5075) != 0) /* Treat watchdog fail as test success */ { g_err << "Error insert failed." << endl; break; } } g_err << "Triggering LCP..." << endl; /* Now trigger LCP, in case the concurrent updates don't */ { int startLcpDumpCode = 7099; if (restarter.dumpStateOneNode(victim, &startLcpDumpCode, 1)) { g_err << "Dump state failed." << endl; break; } } g_err << "Subscribing to MGMD events..." << endl; NdbMgmd mgmd; if (!mgmd.connect()) { g_err << "Failed to connect to MGMD" << endl; break; } if (!mgmd.subscribe_to_events()) { g_err << "Failed to subscribe to events" << endl; break; } g_err << "Waiting to hear of LCP completion..." << endl; Uint32 completedLcps = 0; Uint64 maxWaitSeconds = 240; Uint64 endTime = NdbTick_CurrentMillisecond() + (maxWaitSeconds * 1000); while (NdbTick_CurrentMillisecond() < endTime) { char buff[512]; if (!mgmd.get_next_event_line(buff, sizeof(buff), 10 * 1000)) { g_err << "Failed to get event line " << endl; break; } // g_err << "Event : " << buff; if (strstr(buff, "Local checkpoint") && strstr(buff, "completed")) { completedLcps++; g_err << "LCP " << completedLcps << " completed." << endl; if (completedLcps == 2) break; /* Request + wait for another... */ { int startLcpDumpCode = 7099; if (restarter.dumpStateOneNode(otherNode, &startLcpDumpCode, 1)) { g_err << "Dump state failed." << endl; break; } } } } if (completedLcps != 2) { g_err << "Some problem while waiting for LCP completion" << endl; break; } /* Now wait for the node to recover */ if (restarter.waitNodesStarted((const int*) &victim, 1, 120) != 0) { g_err << "Failed waiting for node " << victim << "to start" << endl; break; } ctx->stopTest(); return NDBT_OK; } while (0); ctx->stopTest(); return NDBT_FAILED; } static Uint32 setConfigValueAndRestartNode(NdbMgmd *mgmd, Uint32 key, Uint32 value, int nodeId, NdbRestarter *restarter) { // Get the binary config Config conf; if (!mgmd->get_config(conf)) { g_err << "Failed to get config from ndb_mgmd." << endl; return NDBT_FAILED; } // Set the key ConfigValues::Iterator iter(conf.m_configValues->m_config); for (int nodeid = 1; nodeid < MAX_NODES; nodeid ++) { Uint32 oldValue; if (!iter.openSection(CFG_SECTION_NODE, nodeid)) continue; if (iter.get(key, &oldValue)) iter.set(key, value); iter.closeSection(); } // Set the modified config if (!mgmd->set_config(conf)) { g_err << "Failed to set config in ndb_mgmd." << endl; return NDBT_FAILED; } g_err << "Restarting node to apply config change..." << endl; if (restarter->restartOneDbNode(nodeId, false, false, true)) { g_err << "Failed to restart node." << endl; return NDBT_FAILED; } if (restarter->waitNodesStarted(&nodeId, 1) != 0) { g_err << "Failed waiting for node started." << endl; return NDBT_FAILED; } return NDBT_OK; } int runTestScanFragWatchdogDisable(NDBT_Context* ctx, NDBT_Step* step) { NdbRestarter restarter; if (restarter.getNumDbNodes() < 2) { g_err << "Insufficient nodes for test." << endl; ctx->stopTest(); return NDBT_OK; } int victim = restarter.getNode(NdbRestarter::NS_RANDOM); do { NdbMgmd mgmd; if(!mgmd.connect()) { g_err << "Failed to connect to ndb_mgmd." << endl; break; } g_err << "Disabling LCP frag scan watchdog..." << endl; // to disable the LCP frag scan watchdog, set // CFG_DB_LCP_SCAN_WATCHDOG_LIMIT = 0 if(setConfigValueAndRestartNode(&mgmd, CFG_DB_LCP_SCAN_WATCHDOG_LIMIT, 0, victim, &restarter) == NDBT_FAILED) break; g_err << "Injecting fault in node " << victim; g_err << " to suspend LCP frag scan..." << endl; if (restarter.insertErrorInNode(victim, 10039) != 0) { g_err << "Error insert failed." << endl; break; } g_err << "Creating table for LCP frag scan..." << endl; runLoadTable(ctx, step); g_err << "Triggering LCP..." << endl; { int startLcpDumpCode = 7099; if (restarter.dumpStateAllNodes(&startLcpDumpCode, 1)) { g_err << "Dump state failed." << endl; break; } } if (!mgmd.subscribe_to_events()) { g_err << "Failed to subscribe to mgmd events." << endl; break; } g_err << "Waiting for activity from LCP Frag watchdog..." << endl; Uint64 maxWaitSeconds = 240; Uint64 endTime = NdbTick_CurrentMillisecond() + (maxWaitSeconds * 1000); int result = NDBT_OK; while (NdbTick_CurrentMillisecond() < endTime) { char buff[512]; if (!mgmd.get_next_event_line(buff, sizeof(buff), 10 * 1000)) { g_err << "Failed to get event line." << endl; result = NDBT_FAILED; break; } if (strstr(buff, "Local checkpoint") && strstr(buff, "completed")) { g_err << "Failed to disable LCP Frag watchdog." << endl; result = NDBT_FAILED; break; } } if(result == NDBT_FAILED) break; g_err << "No LCP activity: LCP Frag watchdog successfully disabled..." << endl; g_err << "Restoring default LCP Frag watchdog config..." << endl; if(setConfigValueAndRestartNode(&mgmd, CFG_DB_LCP_SCAN_WATCHDOG_LIMIT, 60, victim, &restarter) == NDBT_FAILED) break; ctx->stopTest(); return NDBT_OK; } while (0); // Insert error code to resume LCP in case node halted if (restarter.insertErrorInNode(victim, 10040) != 0) { g_err << "Test cleanup failed: failed to resume LCP." << endl; } ctx->stopTest(); return NDBT_FAILED; } int runBug16834416(NDBT_Context* ctx, NDBT_Step* step) { Ndb* pNdb = GETNDB(step); NdbRestarter restarter; if (restarter.getNumDbNodes() < 2) { g_err << "Insufficient nodes for test." << endl; ctx->stopTest(); return NDBT_OK; } int loops = ctx->getNumLoops(); for (int i = 0; i < loops; i++) { ndbout_c("running big trans"); HugoOperations ops(* ctx->getTab()); ops.startTransaction(pNdb); ops.pkInsertRecord(0, 1024); // 1024 rows ops.execute_NoCommit(pNdb, AO_IgnoreError); // TC node id Uint32 nodeId = ops.getTransaction()->getConnectedNodeId(); int errcode = 8054; ndbout_c("TC: %u => kill kill kill (error: %u)", nodeId, errcode); int val2[] = { DumpStateOrd::CmvmiSetRestartOnErrorInsert, 1 }; restarter.dumpStateOneNode(nodeId, val2, 2); restarter.insertErrorInNode(nodeId, errcode); ops.execute_Commit(pNdb, AO_IgnoreError); int victim = (int)nodeId; restarter.waitNodesNoStart(&victim, 1); restarter.startAll(); restarter.waitClusterStarted(); ops.closeTransaction(pNdb); ops.clearTable(pNdb); int val3[] = { 4003 }; // Check TC/LQH CommitAckMarker leak restarter.dumpStateAllNodes(val3, 1); } restarter.insertErrorInAllNodes(0); return NDBT_OK; } enum LCPFSStopCases { NdbFsError1, NdbFsError2, NUM_CASES }; int runTestLcpFsErr(NDBT_Context* ctx, NDBT_Step* step) { /* Setup an error insert, then start a checkpoint */ NdbRestarter restarter; if (restarter.getNumDbNodes() < 2) { g_err << "Insufficient nodes for test." << endl; ctx->stopTest(); return NDBT_OK; } g_err << "Subscribing to MGMD events..." << endl; int filter[] = { 15, NDB_MGM_EVENT_CATEGORY_CHECKPOINT, 0 }; NdbLogEventHandle handle = ndb_mgm_create_logevent_handle(restarter.handle, filter); int scenario = NdbFsError1; bool failed = false; do { g_err << "Injecting fault " << scenario << " to suspend LCP frag scan..." << endl; Uint32 victim = restarter.getNode(NdbRestarter::NS_RANDOM); Uint32 otherNode = 0; do { otherNode = restarter.getNode(NdbRestarter::NS_RANDOM); } while (otherNode == victim); // Setting 'RestartOnErrorInsert = 2' will auto restart 'victim' int val2[] = { DumpStateOrd::CmvmiSetRestartOnErrorInsert, 2}; if (restarter.dumpStateOneNode(victim, val2, 2) != 0) { g_err << "Failed setting dump state 'RestartOnErrorInsert'" << endl; break; } bool failed = false; Uint32 lcpsRequired = 2; switch (scenario) { case NdbFsError1: { if (restarter.insertErrorInNode(victim, 10044) != 0) { g_err << "Error insert 10044 failed." << endl; failed = true; } lcpsRequired=6; break; } case NdbFsError2: { if (restarter.insertErrorInNode(victim, 10045) != 0) { g_err << "Error insert 10045 failed." << endl; failed = true; } lcpsRequired=6; break; } } if (failed) break; g_err << "Triggering LCP..." << endl; /* Now trigger LCP, in case the concurrent updates don't */ { int startLcpDumpCode = 7099; if (restarter.dumpStateOneNode(victim, &startLcpDumpCode, 1)) { g_err << "Dump state failed." << endl; break; } } g_err << "Waiting to hear of LCP completion..." << endl; Uint32 completedLcps = 0; Uint64 maxWaitSeconds = (120 * lcpsRequired); Uint64 endTime = NdbTick_CurrentMillisecond() + (maxWaitSeconds * 1000); struct ndb_logevent event; do { while(ndb_logevent_get_next(handle, &event, 0) >= 0 && event.type != NDB_LE_LocalCheckpointStarted && NdbTick_CurrentMillisecond() < endTime); while(ndb_logevent_get_next(handle, &event, 0) >= 0 && event.type != NDB_LE_LocalCheckpointCompleted && NdbTick_CurrentMillisecond() < endTime); if (NdbTick_CurrentMillisecond() >= endTime) break; completedLcps++; g_err << "LCP " << completedLcps << " completed." << endl; if (completedLcps == lcpsRequired) break; /* Request + wait for another... */ { int startLcpDumpCode = 7099; if (restarter.dumpStateOneNode(otherNode, &startLcpDumpCode, 1)) { g_err << "Dump state failed." << endl; break; } } } while (1); if (completedLcps != lcpsRequired) { g_err << "Some problem while waiting for LCP completion" << endl; break; } /* Now wait for the node to recover */ g_err << "Waiting for all nodes to be started..." << endl; if (restarter.waitNodesStarted((const int*) &victim, 1, 120) != 0) { g_err << "Failed waiting for node " << victim << "to start" << endl; break; } restarter.insertErrorInAllNodes(0); { Uint32 count = 0; g_err << "Consuming intervening mgmapi events..." << endl; while(ndb_logevent_get_next(handle, &event, 10) != 0) count++; g_err << count << " events consumed." << endl; } } while (!failed && ++scenario < NUM_CASES); ctx->stopTest(); if (failed) return NDBT_FAILED; else return NDBT_OK; } int runNodeFailGCPOpen(NDBT_Context* ctx, NDBT_Step* step) { /* Use an error insert to cause node failures, * then bring the cluster back up */ NdbRestarter restarter; int i = 0; while (i < 10 && !ctx->isTestStopped()) { /* Wait a moment or two */ ndbout_c("Waiting..."); NdbSleep_SecSleep(10); /* Insert error in all nodes */ ndbout_c("Inserting error..."); restarter.insertErrorInAllNodes(8098); /* Wait for failure... */ ndbout_c("Waiting to hear of node failure %u...", i); int timeout = 120; while ((restarter.waitClusterStarted(1) == 0) && timeout--); if (timeout == 0) { g_err << "Timed out waiting for node failure" << endl; } ndbout_c("Clearing error..."); restarter.insertErrorInAllNodes(0); ndbout_c("Waiting for node recovery..."); timeout = 120; while ((restarter.waitClusterStarted(1) != 0) && (restarter.startAll() == 0) && timeout--); ndbout_c("Done."); if (timeout == 0) { g_err << "Timed out waiting for recovery" << endl; return NDBT_FAILED; } if (restarter.waitClusterStarted(1) != 0) { g_err << "Failed waiting for cluster to start." << endl; return NDBT_FAILED; } i++; } ctx->stopTest(); return NDBT_OK; } static void callback(int retCode, NdbTransaction * trans, void * ptr) { } int runBug16944817(NDBT_Context* ctx, NDBT_Step* step) { NdbRestarter restarter; if (restarter.getNumDbNodes() < 2) { g_err << "Insufficient nodes for test." << endl; ctx->stopTest(); return NDBT_OK; } #ifndef NDEBUG /** * This program doesn't work with debug compiled due * due various asserts...which are correct... */ { ctx->stopTest(); return NDBT_OK; } #endif const int loops = ctx->getNumLoops(); for (int i = 0; i < loops; i++) { ndbout_c("loop %u/%u", (i+1), loops); Ndb* pNdb = new Ndb(&ctx->m_cluster_connection, "TEST_DB"); if (pNdb->init() != 0 || pNdb->waitUntilReady(30)) { delete pNdb; return NDBT_FAILED; } ndbout_c(" start trans"); HugoOperations hugoOps(*ctx->getTab()); if(hugoOps.startTransaction(pNdb) != 0) return NDBT_FAILED; if(hugoOps.pkInsertRecord(pNdb, i, 1, rand()) != 0) return NDBT_FAILED; if(hugoOps.execute_NoCommit(pNdb) != 0) return NDBT_FAILED; NdbTransaction * pTrans = hugoOps.getTransaction(); hugoOps.setTransaction(0, true); ndbout_c(" executeAsynchPrepare"); pTrans->executeAsynchPrepare(Commit, callback, 0, AbortOnError); int nodeId = pTrans->getConnectedNodeId(); ndbout_c(" insert error 8054 into %d", nodeId); restarter.insertErrorInNode(nodeId, 8054); int val2[] = { DumpStateOrd::CmvmiSetRestartOnErrorInsert, 1 }; if (restarter.dumpStateOneNode(nodeId, val2, 2)) return NDBT_FAILED; ndbout_c(" sendPreparedTransactions"); const int forceSend = 1; pNdb->sendPreparedTransactions(forceSend); /** * Now delete ndb-object with having heard reply from commit */ ndbout_c(" delete pNdb"); delete pNdb; /** * nodeId will die due to errorInsert 8054 above */ ndbout_c(" wait nodes no start"); restarter.waitNodesNoStart(&nodeId, 1); ndbout_c(" start nodes"); restarter.startNodes(&nodeId, 1); ndbout_c(" wait nodes started"); restarter.waitNodesStarted(&nodeId, 1); /** * restart it again...will cause duplicate marker (before bug fix) */ ndbout_c(" restart (again)"); restarter.restartNodes(&nodeId, 1, NdbRestarter::NRRF_NOSTART | NdbRestarter::NRRF_ABORT); ndbout_c(" wait nodes no start"); restarter.waitNodesNoStart(&nodeId, 1); ndbout_c(" start nodes"); restarter.startNodes(&nodeId, 1); ndbout_c(" wait nodes started"); restarter.waitClusterStarted(); } bool checkMarkers = true; if (checkMarkers) { ndbout_c("and finally...check markers"); int check = 2552; // check that no markers are leaked restarter.dumpStateAllNodes(&check, 1); } return NDBT_OK; } #define CHK2(b, e) \ if (!(b)) { \ g_err << "ERR: " << #b << " failed at line " << __LINE__ \ << ": " << e << endl; \ result = NDBT_FAILED; \ break; \ } int runBug16766493(NDBT_Context* ctx, NDBT_Step* step) { Ndb* pNdb = GETNDB(step); NdbDictionary::Dictionary* pDic = pNdb->getDictionary(); const int loops = ctx->getNumLoops(); const int records = ctx->getNumRecords(); char* tabname = strdup(ctx->getTab()->getName()); int result = NDBT_OK; ndb_srand(getpid()); NdbRestarter restarter; (void)pDic->dropTable(tabname); // replace table do { NdbDictionary::Table tab; tab.setName(tabname); tab.setTablespaceName("DEFAULT-TS"); { NdbDictionary::Column c; c.setName("A"); c.setType(NdbDictionary::Column::Unsigned); c.setPrimaryKey(true); tab.addColumn(c); } /* * Want big DD column which does not fit evenly into 32k UNDO * buffer i.e. produces big NOOP entries. The bug was reported * in 7.2 for longblob where part size is 13948. This will do. */ { NdbDictionary::Column c; c.setName("B"); c.setType(NdbDictionary::Column::Char); c.setLength(13948); c.setNullable(false); c.setStorageType(NdbDictionary::Column::StorageTypeDisk); tab.addColumn(c); } { NdbDictionary::Column c; // for hugo c.setName("C"); c.setType(NdbDictionary::Column::Unsigned); c.setNullable(false); tab.addColumn(c); } CHK2(pDic->createTable(tab) == 0, pDic->getNdbError()); const NdbDictionary::Table* pTab; CHK2((pTab = pDic->getTable(tabname)) != 0, pDic->getNdbError()); HugoTransactions trans(*pTab); if (loops <= 1) g_err << "note: test is not useful for loops=" << loops << endl; for (int loop = 0; loop < loops; loop++) { g_info << "loop: " << loop << endl; CHK2(trans.loadTable(pNdb, records) == 0, trans.getNdbError()); if (loop + 1 == loops) break; // leave rows for verify while (1) { g_info << "clear table" << endl; #if 0 if (trans.clearTable(pNdb, records) == 0) break; #else // nicer for debugging if (trans.pkDelRecords(pNdb, records, records) == 0) break; #endif const NdbError& err = trans.getNdbError(); // hugo does not return error code on max tries CHK2(err.code == 0, err); #if 0 // can cause ndbrequire in exec_lcp_frag_ord int lcp = 7099; CHK2(restarter.dumpStateAllNodes(&lcp, 1) == 0, "-"); #endif const int timeout = 5; CHK2(restarter.waitClusterStarted(timeout) == 0, "-"); g_info << "assume UNDO overloaded..." << endl; NdbSleep_MilliSleep(1000); } CHK2(result == NDBT_OK, "-"); } CHK2(result == NDBT_OK, "-"); g_info << "verify records" << endl; CHK2(trans.scanReadRecords(pNdb, records) == 0, trans.getNdbError()); // test that restart works g_info << "restart" << endl; const bool initial = false; const bool nostart = true; CHK2(restarter.restartAll(initial, nostart) == 0, "-"); CHK2(restarter.waitClusterNoStart() == 0, "-"); g_info << "nostart done" << endl; CHK2(restarter.startAll() == 0, "-"); CHK2(restarter.waitClusterStarted() == 0, "-"); g_info << "restart done" << endl; g_info << "verify records" << endl; CHK2(trans.scanReadRecords(pNdb, records) == 0, trans.getNdbError()); } while (0); if (result!=NDBT_OK) abort(); free(tabname); return result; } /* Bug16895311 */ struct Bug16895311 { struct Row { int bytelen; int chrlen; uchar* data; bool exist; Row() { bytelen = -1; chrlen = -1; data = 0; exist = false; } }; const char* tabname; int maxbytelen; CHARSET_INFO* cs; const NdbDictionary::Table* pTab; int records; Row* rows; Bug16895311() { tabname = "tBug16895311"; maxbytelen = 0; cs = 0; pTab = 0; records = 0; rows = 0; }; }; static Bug16895311 bug16895311; int runBug16895311_create(NDBT_Context* ctx, NDBT_Step* step) { Bug16895311& bug = bug16895311; Ndb* pNdb = GETNDB(step); NdbDictionary::Dictionary* pDic = pNdb->getDictionary(); int result = 0; ndb_srand((unsigned)getpid()); do { (void)pDic->dropTable(bug.tabname); NdbDictionary::Table tab; tab.setName(bug.tabname); const char* csname = "utf8_unicode_ci"; bug.cs = get_charset_by_name(csname, MYF(0)); require(bug.cs != 0); // can hit too small xfrm buffer in 2 ways // ndbrequire line numbers are from 7.1 revno: 4997 if (ndb_rand() % 100 < 50) bug.maxbytelen = 255 * 3; // line 732 else bug.maxbytelen = MAX_KEY_SIZE_IN_WORDS * 4 - 2; // line 1862 g_err << "char key: maxbytelen=" << bug.maxbytelen << endl; { NdbDictionary::Column c; c.setName("a"); c.setType(NdbDictionary::Column::Longvarchar); c.setCharset(bug.cs); c.setLength(bug.maxbytelen); c.setNullable(false); c.setPrimaryKey(true); tab.addColumn(c); } CHK2(pDic->createTable(tab) == 0, pDic->getNdbError()); CHK2((bug.pTab = pDic->getTable(bug.tabname)) != 0, pDic->getNdbError()); // allocate rows bug.records = ctx->getNumRecords(); bug.rows = new Bug16895311::Row [bug.records]; } while (0); return result; } void doBug16895311_data(int i) { Bug16895311& bug = bug16895311; require(0 <= i && i < bug.records); Bug16895311::Row& row = bug.rows[i]; const uchar chr[][3] = { { 0xE2, 0x82, 0xAC }, // U+20AC { 0xE2, 0x84, 0xB5 }, // U+2135 { 0xE2, 0x88, 0xAB } // U+222B }; const int chrcnt = sizeof(chr) / sizeof(chr[0]); while (1) { if (row.data != 0) delete [] row.data; int len; if (ndb_rand() % 100 < 50) len = bug.maxbytelen; else len = ndb_rand() % (bug.maxbytelen + 1); row.chrlen = len / 3; row.bytelen = row.chrlen * 3; row.data = new uchar [2 + row.bytelen]; row.data[0] = uint(row.bytelen) & 0xFF; row.data[1] = uint(row.bytelen) >> 8; for (int j = 0; j < row.chrlen; j++) { int k = ndb_rand() % chrcnt; memcpy(&row.data[2 + j * 3], chr[k], 3); } int not_used; int wflen = (int)(*bug.cs->cset->well_formed_len)( bug.cs, (const char*)&row.data[2], (const char*)&row.data[2] + row.bytelen, row.chrlen, ¬_used); require(wflen == row.bytelen); bool dups = false; for (int i2 = 0; i2 < bug.records; i2++) { if (i2 != i) { Bug16895311::Row& row2 = bug.rows[i2]; if (row2.exist && row2.bytelen == row.bytelen && memcmp(row2.data, row.data, 2 + row.bytelen) == 0) { dups = true; break; } } } if (dups) continue; break; } require(row.data != 0); } int doBug16895311_op(Ndb* pNdb, const char* op, int i) { Bug16895311& bug = bug16895311; int result = NDBT_OK; require(strcmp(op, "I") == 0 || strcmp(op, "D") == 0); Bug16895311::Row& row = bug.rows[i]; int tries = 0; while (1) { tries++; Uint32 acol = 0; const char* aval = (const char*)row.data; require(aval != 0); NdbTransaction* pTx = 0; CHK2((pTx = pNdb->startTransaction()) != 0, pNdb->getNdbError()); NdbOperation* pOp = 0; CHK2((pOp = pTx->getNdbOperation(bug.pTab)) != 0, pTx->getNdbError()); if (*op == 'I') { CHK2(pOp->insertTuple() == 0, pOp->getNdbError()); } if (*op == 'D') { CHK2(pOp->deleteTuple() == 0, pOp->getNdbError()); } CHK2(pOp->equal(acol, aval) == 0, pOp->getNdbError()); int ret = pTx->execute(NdbTransaction::Commit); if (ret != 0) { const NdbError& error = pTx->getNdbError(); g_info << "i=" << i << " op=" << op << ": " << error << endl; CHK2(error.status == NdbError::TemporaryError, error); CHK2(tries < 100, error << ": tries=" << tries); NdbSleep_MilliSleep(100); pNdb->closeTransaction(pTx); continue; } pNdb->closeTransaction(pTx); if (*op == 'I') { require(!row.exist); row.exist = true; } if (*op == 'D') { require(row.exist); row.exist = false; } break; } return result; } int runBug16895311_load(NDBT_Context* ctx, NDBT_Step* step) { Bug16895311& bug = bug16895311; Ndb* pNdb = GETNDB(step); int result = NDBT_OK; for (int i = 0; i < bug.records; i++) { doBug16895311_data(i); CHK2(doBug16895311_op(pNdb, "I", i) == 0, "-"); } return result; } int runBug16895311_update(NDBT_Context* ctx, NDBT_Step* step) { Bug16895311& bug = bug16895311; Ndb* pNdb = GETNDB(step); int result = NDBT_OK; int i = 0; while (!ctx->isTestStopped()) { // the delete/insert can turn into update on recovering node // TODO: investigate what goes on CHK2(doBug16895311_op(pNdb, "D", i) == 0, "-"); CHK2(doBug16895311_op(pNdb, "I", i) == 0, "-"); i++; if (i >= bug.records) i = 0; } return result; } int runBug16895311_drop(NDBT_Context* ctx, NDBT_Step* step) { Bug16895311& bug = bug16895311; Ndb* pNdb = GETNDB(step); NdbDictionary::Dictionary* pDic = pNdb->getDictionary(); int result = 0; do { CHK2(pDic->dropTable(bug.tabname) == 0, pDic->getNdbError()); // free rows delete [] bug.rows; bug.rows = 0; } while (0); return result; } int runBug18044717(NDBT_Context* ctx, NDBT_Step* step) { int result = NDBT_OK; NdbRestarter restarter; int master = restarter.getMasterNodeId(); do { ndbout_c("slow down LCP so that global c_lcpStatus = LCP_INIT_TABLES"); ndbout_c("and all tables have tabLcpStatus = TLS_ACTIVE"); if (restarter.insertErrorInAllNodes(7236)) { result = NDBT_FAILED; break; } ndbout_c("start LCP"); int startLcpDumpCode = 7099; if (restarter.dumpStateAllNodes(&startLcpDumpCode, 1)) { result = NDBT_FAILED; break; } ndbout_c("restart master node so that NODE_FAILREP changes"); ndbout_c("c_lcpState from LCP_INIT_TABLES to LCP_STATUS_IDLE"); if(restarter.restartOneDbNode(master, false, false, true, true) != 0) { result = NDBT_FAILED; break; } } while (0); ndbout_c("restore original state of cluster and verify that there"); ndbout_c("is no core due to inconsistent c_lcpStatus/tabLcpStatus"); if (restarter.waitNodesStarted(&master, 1)) { ndbout_c("master node failed to start"); return NDBT_FAILED; } if (restarter.insertErrorInAllNodes(0)) { result = NDBT_FAILED; } return result; } static int createEvent(Ndb *pNdb, const NdbDictionary::Table &tab, bool merge_events, bool report) { char eventName[1024]; sprintf(eventName,"%s_EVENT",tab.getName()); NdbDictionary::Dictionary *myDict = pNdb->getDictionary(); if (!myDict) { g_err << "Dictionary not found " << pNdb->getNdbError().code << " " << pNdb->getNdbError().message << endl; return NDBT_FAILED; } myDict->dropEvent(eventName); NdbDictionary::Event myEvent(eventName); myEvent.setTable(tab.getName()); myEvent.addTableEvent(NdbDictionary::Event::TE_ALL); for(int a = 0; a < tab.getNoOfColumns(); a++){ myEvent.addEventColumn(a); } myEvent.mergeEvents(merge_events); if (report) myEvent.setReport(NdbDictionary::Event::ER_SUBSCRIBE); int res = myDict->createEvent(myEvent); // Add event to database if (res == 0) myEvent.print(); else if (myDict->getNdbError().classification == NdbError::SchemaObjectExists) { g_info << "Event creation failed event exists\n"; res = myDict->dropEvent(eventName); if (res) { g_err << "Failed to drop event: " << myDict->getNdbError().code << " : " << myDict->getNdbError().message << endl; return NDBT_FAILED; } // try again res = myDict->createEvent(myEvent); // Add event to database if (res) { g_err << "Failed to create event (1): " << myDict->getNdbError().code << " : " << myDict->getNdbError().message << endl; return NDBT_FAILED; } } else { g_err << "Failed to create event (2): " << myDict->getNdbError().code << " : " << myDict->getNdbError().message << endl; return NDBT_FAILED; } return NDBT_OK; } static int createEvent(Ndb *pNdb, const NdbDictionary::Table &tab, NDBT_Context* ctx) { bool merge_events = ctx->getProperty("MergeEvents"); bool report = ctx->getProperty("ReportSubscribe"); return createEvent(pNdb, tab, merge_events, report); } static int dropEvent(Ndb *pNdb, const NdbDictionary::Table &tab) { char eventName[1024]; sprintf(eventName,"%s_EVENT",tab.getName()); NdbDictionary::Dictionary *myDict = pNdb->getDictionary(); if (!myDict) { g_err << "Dictionary not found " << pNdb->getNdbError().code << " " << pNdb->getNdbError().message << endl; return NDBT_FAILED; } if (myDict->dropEvent(eventName)) { g_err << "Failed to drop event: " << myDict->getNdbError().code << " : " << myDict->getNdbError().message << endl; return NDBT_FAILED; } return NDBT_OK; } static NdbEventOperation *createEventOperation(Ndb *ndb, const NdbDictionary::Table &tab, int do_report_error = 1) { char buf[1024]; sprintf(buf, "%s_EVENT", tab.getName()); NdbEventOperation *pOp= ndb->createEventOperation(buf); if (pOp == 0) { if (do_report_error) g_err << "createEventOperation: " << ndb->getNdbError().code << " " << ndb->getNdbError().message << endl; return 0; } int n_columns= tab.getNoOfColumns(); for (int j = 0; j < n_columns; j++) { pOp->getValue(tab.getColumn(j)->getName()); pOp->getPreValue(tab.getColumn(j)->getName()); } if ( pOp->execute() ) { if (do_report_error) g_err << "pOp->execute(): " << pOp->getNdbError().code << " " << pOp->getNdbError().message << endl; ndb->dropEventOperation(pOp); return 0; } return pOp; } static int runCreateEvent(NDBT_Context* ctx, NDBT_Step* step) { if (createEvent(GETNDB(step),* ctx->getTab(), ctx) != 0){ return NDBT_FAILED; } return NDBT_OK; } int runDropEvent(NDBT_Context* ctx, NDBT_Step* step) { return dropEvent(GETNDB(step), * ctx->getTab()); } struct GcpStopVariant { int errorCode; const char* description; bool masterOnly; bool gcpSaveOnly; }; GcpStopVariant gcpStopVariants[]= { {7238, "GCP_PREPARE @ participant", false, false}, {7239, "GCP_COMMIT @ participant", false, false}, {7244, "SUB_GCP_COMPLETE_REP @ participant", false, false}, {7237, "GCP_SAVEREQ @ participant", false, true}, {7241, "COPY_GCIREQ @ participant", false, true}, {7242, "GCP COMMIT IDLE @ master", true, false}, {7243, "GCP SAVE IDLE @ master", true, true}, {0, "", false, false} }; int setupTestVariant(NdbRestarter& res, const GcpStopVariant& variant, Uint32 victimNode, bool requireIsolation) { /** * First use dump code to lower thresholds to something * reasonable * This is run on all nodes to include the master. */ { /* GCP Commit watchdog threshold */ int dumpCommand[3] = {DumpStateOrd::DihSetGcpStopVals, 0, 10000}; if (res.dumpStateAllNodes(&dumpCommand[0], 3) != 0) { g_err << "Error setting dump state 'GcpStopVals'" << endl; return NDBT_FAILED; } } { /* GCP Save watchdog threshold */ int dumpCommand[3] = {DumpStateOrd::DihSetGcpStopVals, 1, 15000}; if (res.dumpStateAllNodes(&dumpCommand[0], 3) != 0) { g_err << "Error setting dump state 'GcpStopVals'" << endl; return NDBT_FAILED; } } // Setting 'RestartOnErrorInsert = 2' will auto restart 'victim' int val2[] = { DumpStateOrd::CmvmiSetRestartOnErrorInsert, 2}; if (res.dumpStateAllNodes(val2, 2)) { g_err << "Error setting dump state 'RestartOnErrorInsert'" << endl; return NDBT_FAILED; } if (res.insertErrorInAllNodes(0) != 0) { g_err << "Failed clearing errors" << endl; return NDBT_FAILED; } /** * Cause GCP to stall in some way */ if (requireIsolation) { /* Error insert flagging that we are testing the * 'isolation required' scenario */ g_err << "Causing GCP stall using error code " << variant.errorCode << " 1" << endl; if (res.insertError2InNode(victimNode, variant.errorCode, 1) != 0) { g_err << "Error inserting error" << endl; return NDBT_FAILED; } } else { g_err << "Causing GCP stall using error code " << variant.errorCode << endl; if (res.insertErrorInNode(victimNode, variant.errorCode) != 0) { g_err << "Error inserting error" << endl; return NDBT_FAILED; } } if (requireIsolation) { /** * Now error inserts to stop the normal GCP stop * mechanisms working so that we rely on * isolation */ g_err << "Causing GCP self-stop to fail on node " << victimNode << endl; /* NDBCNTR : Ignore GCP Stop in SYSTEM_ERROR */ if (res.insertErrorInNode(victimNode, 1004) != 0) { g_err << "Error inserting error" << endl; return NDBT_FAILED; } /* LQH : Ignore GCP Stop Kill in DUMP */ if (res.insertErrorInNode(victimNode, 5085) != 0) { g_err << "Error inserting error" << endl; return NDBT_FAILED; } /** * QMGR : Node will not disconnect itself, * due to ISOLATE_REQ, others must do it. * BUT DISCONNECT_REP is an ok way to die. */ if (res.insertErrorInNode(victimNode, 942) != 0) { g_err << "Error inserting error" << endl; return NDBT_FAILED; } } else { /* Testing normal GCP stop kill method */ /* LQH : GCP Stop Kill is ok way to die */ if (res.insertErrorInNode(victimNode, 5087) != 0) { g_err << "Error inserting error" << endl; return NDBT_FAILED; } /** * NDBCNTR 'Normal' GCP stop kill in SYSTEM_ERROR * is ok way to die */ if (res.insertErrorInNode(victimNode, 1005) != 0) { g_err << "Error inserting error" << endl; return NDBT_FAILED; } } return NDBT_OK; }; int runGcpStop(NDBT_Context* ctx, NDBT_Step* step) { /* Intention here is to : * a) Use DUMP code to lower GCP stop detection threshold * b) Use ERROR INSERT to trigger GCP stop * c) (Optional : Use ERROR INSERT to cause 'kill-self' * handling of GCP Stop to fail, so that isolation * is required) * d) Check that GCP is resumed */ /* TODO : Survivable multiple participant failure */ int loops = ctx->getNumLoops(); NdbRestarter res; Ndb* pNdb = GETNDB(step); /** * We use an event here just so that we get live 'cluster epoch' * info in the API. * There's no actual row events used or read. */ NdbEventOperation * myEvent = createEventOperation(pNdb, *ctx->getTab()); if (myEvent == NULL) { g_err << "Failed to create Event operation" << endl; return NDBT_FAILED; } /** * requireIsolation == the normal GCP stop 'kill self' * mechanism is disabled via ERROR_INSERT, so that * isolation of the node by other nodes is required * to get it 'cut off' from the cluster */ bool requireIsolation = (ctx->getProperty("GcpStopIsolation", Uint32(0)) != 0); int result = NDBT_FAILED; while (loops--) { int variantIndex = 0; bool done = false; do { GcpStopVariant& variant = gcpStopVariants[variantIndex++]; g_err << "Testcase " << variant.description << " Save only? " << variant.gcpSaveOnly << " Isolation : " << requireIsolation << endl; int victimNode = res.getNode(NdbRestarter::NS_RANDOM); if (variant.masterOnly) { victimNode = res.getNode(NdbRestarter::NS_MASTER); } bool isMaster = (victimNode == res.getNode(NdbRestarter::NS_MASTER)); g_err << "Victim will be " << victimNode << " " << (isMaster ? "*" : "") << endl; if (setupTestVariant(res, variant, victimNode, requireIsolation) != NDBT_OK) { break; } /** * Epoch / GCP should not be stopped * Let's wait for it to start again */ /* GCP Commit stall visible within 2 s * GCP Save stall requires longer */ Uint32 minStallSeconds = (variant.gcpSaveOnly? 10: 2); g_err << "Waiting for " << minStallSeconds << " seconds of epoch stall" << endl; pNdb->pollEvents(1, 0); Uint64 startEpoch = pNdb->getLatestGCI(); Uint32 stallSeconds = 0; do { NdbSleep_MilliSleep(1000); pNdb->pollEvents(1, 0); Uint64 currEpoch = pNdb->getLatestGCI(); bool same = false; if (variant.gcpSaveOnly) { same = ((currEpoch >> 32) == (startEpoch >> 32)); } else { same = (currEpoch == startEpoch); } if (same) { g_err << "Epoch stalled @ " << (currEpoch >> 32) << "/" << (currEpoch & 0xffffffff) << endl; stallSeconds++; } else { g_err << "Epoch not stalled yet" << endl; /* Diff */ startEpoch = currEpoch; stallSeconds = 0; } } while (stallSeconds < minStallSeconds); g_err << "Epoch definitely stalled" << endl; /* GCP Commit stall stops any increase * GCP Save stall stops only msw increase */ Uint64 minNewEpoch = (variant.gcpSaveOnly? ((startEpoch >> 32) + 1) << 32 : (startEpoch + 1)); Uint64 currEpoch = pNdb->getLatestGCI(); while (currEpoch < minNewEpoch) { g_err << "Waiting for epoch to advance from " << (currEpoch >> 32) << "/" << (currEpoch & 0xffffffff) << " to at least " << (minNewEpoch >> 32) << "/" << (minNewEpoch & 0xffffffff) << endl; NdbSleep_MilliSleep(1000); currEpoch = pNdb->getLatestGCI(); } g_err << "Epoch is now " << (currEpoch >> 32) << "/" << (currEpoch & 0xffffffff) << endl; g_err << "Cluster recovered from GCP stop" << endl; g_err << "Now waiting for victim node to recover" << endl; /** * Now wait until all nodes are available */ if (res.waitClusterStarted() != 0) { g_err << "Timed out waiting for cluster to fully start" << endl; break; } g_err << "Cluster recovered..." << endl; done = (gcpStopVariants[variantIndex].errorCode == 0); } while (!done); if (!done) { /* Error exit from inner loop */ break; } if (loops == 0) { /* All loops done */ result = NDBT_OK; } } pNdb->dropEventOperation(myEvent); return result; } static const Uint32 numTables = 20; int CMT_createTableHook(Ndb* ndb, NdbDictionary::Table& table, int when, void* arg) { if (when == 0) { Uint32 num = ((Uint32*) arg)[0]; /* Substitute a unique name */ char buf[100]; BaseString::snprintf(buf, sizeof(buf), "%s_%u", table.getName(), num); table.setName(buf); ndbout << "Creating " << buf << endl; } return 0; } int createManyTables(NDBT_Context* ctx, NDBT_Step* step) { Ndb* pNdb = GETNDB(step); for (Uint32 tn = 0; tn < numTables; tn++) { Uint32 args[1]; args[0] = tn; if (NDBT_Tables::createTable(pNdb, ctx->getTab()->getName(), false, false, CMT_createTableHook, &args) != 0) { return NDBT_FAILED; } } return NDBT_OK; } int dropManyTables(NDBT_Context* ctx, NDBT_Step* step) { Ndb* pNdb = GETNDB(step); char buf[100]; for (Uint32 tn = 0; tn < numTables; tn++) { BaseString::snprintf(buf, sizeof(buf), "%s_%u", ctx->getTab()->getName(), tn); ndbout << "Dropping " << buf << endl; pNdb->getDictionary()->dropTable(buf); } return NDBT_OK; } int runGetTabInfo(NDBT_Context* ctx, NDBT_Step* step) { Ndb* pNdb = GETNDB(step); NdbDictionary::Dictionary* dict = pNdb->getDictionary(); Uint32 stepNum = step->getStepNo(); char buf[100]; BaseString::snprintf(buf, sizeof(buf), "%s_%u", ctx->getTab()->getName(), stepNum - 1); ndbout << "runGetTabInfo() Step num " << stepNum << " accessing table " << buf << endl; Uint32 success = 0; Uint32 failure = 0; NDB_TICKS periodStart = NdbTick_getCurrentTicks(); Uint32 periodSnap = 0; while (!ctx->isTestStopped()) { dict->invalidateTable(buf); const NdbDictionary::Table* pTab = dict->getTable(buf); if (pTab == NULL) { ndbout << "Step num " << stepNum << " got error " << dict->getNdbError().code << " " << dict->getNdbError().message << " when getting table " << buf << endl; failure++; } else { success++; } Uint64 millisPassed = NdbTick_Elapsed(periodStart, NdbTick_getCurrentTicks()).milliSec(); if (millisPassed > 10000) { ndbout << "Step num " << stepNum << " completed " << (success - periodSnap) << " lookups " << " in " << millisPassed << " millis. " << "Rate is " << (success - periodSnap) * 1000 / millisPassed << " lookups/s" << endl; periodSnap = success; periodStart = NdbTick_getCurrentTicks(); } } ndbout << "Step num " << stepNum << " ok : " << success << " failed : " << failure << endl; return NDBT_OK; } int runLCPandRestart(NDBT_Context* ctx, NDBT_Step* step) { NdbRestarter restarter; NdbSleep_MilliSleep(6000); for (int i=0; i < 4; i++) { ndbout << "Triggering LCP..." << endl; int lcpDumpCode = 7099; restarter.dumpStateAllNodes(&lcpDumpCode, 1); /* TODO : Proper 'wait for LCP completion' here */ NdbSleep_MilliSleep(20000); } int node = restarter.getNode(NdbRestarter::NS_RANDOM); ndbout << "Triggering node restart " << node << endl; restarter.restartOneDbNode2(node, 0); ndbout << "Wait for node recovery..." << endl; if (restarter.waitNodesStarted(&node, 1) != 0) { ndbout << "Failed waiting for node to restart" << endl; return NDBT_FAILED; } ndbout << "Done." << endl; ctx->stopTest(); return NDBT_OK; } NDBT_TESTSUITE(testNodeRestart); TESTCASE("NoLoad", "Test that one node at a time can be stopped and then restarted "\ "when there are no load on the system. Do this loop number of times"){ INITIALIZER(runCheckAllNodesStarted); INITIALIZER(runLoadTable); STEP(runRestarter); FINALIZER(runClearTable); } TESTCASE("PkRead", "Test that one node at a time can be stopped and then restarted "\ "perform pk read while restarting. Do this loop number of times"){ TC_PROPERTY("ReadLockMode", NdbOperation::LM_Read); INITIALIZER(runCheckAllNodesStarted); INITIALIZER(runLoadTable); STEP(runRestarter); STEP(runPkReadUntilStopped); FINALIZER(runClearTable); } TESTCASE("PkReadCommitted", "Test that one node at a time can be stopped and then restarted "\ "perform pk read while restarting. Do this loop number of times"){ TC_PROPERTY("ReadLockMode", NdbOperation::LM_CommittedRead); INITIALIZER(runCheckAllNodesStarted); INITIALIZER(runLoadTable); STEP(runRestarter); STEP(runPkReadUntilStopped); FINALIZER(runClearTable); } TESTCASE("MixedPkRead", "Test that one node at a time can be stopped and then restarted "\ "perform pk read while restarting. Do this loop number of times"){ TC_PROPERTY("ReadLockMode", Uint32(-1)); INITIALIZER(runCheckAllNodesStarted); INITIALIZER(runLoadTable); STEP(runRestarter); STEP(runPkReadUntilStopped); FINALIZER(runClearTable); } TESTCASE("PkReadPkUpdate", "Test that one node at a time can be stopped and then restarted "\ "perform pk read and pk update while restarting. Do this loop number of times"){ TC_PROPERTY("ReadLockMode", NdbOperation::LM_Read); INITIALIZER(runCheckAllNodesStarted); INITIALIZER(runLoadTable); STEP(runRestarter); STEP(runPkReadUntilStopped); STEP(runPkUpdateUntilStopped); STEP(runPkReadPkUpdateUntilStopped); STEP(runPkReadUntilStopped); STEP(runPkUpdateUntilStopped); STEP(runPkReadPkUpdateUntilStopped); FINALIZER(runClearTable); } TESTCASE("MixedPkReadPkUpdate", "Test that one node at a time can be stopped and then restarted "\ "perform pk read and pk update while restarting. Do this loop number of times"){ TC_PROPERTY("ReadLockMode", Uint32(-1)); INITIALIZER(runCheckAllNodesStarted); INITIALIZER(runLoadTable); STEP(runRestarter); STEP(runPkReadUntilStopped); STEP(runPkUpdateUntilStopped); STEP(runPkReadPkUpdateUntilStopped); STEP(runPkReadUntilStopped); STEP(runPkUpdateUntilStopped); STEP(runPkReadPkUpdateUntilStopped); FINALIZER(runClearTable); } TESTCASE("ReadUpdateScan", "Test that one node at a time can be stopped and then restarted "\ "perform pk read, pk update and scan reads while restarting. Do this loop number of times"){ INITIALIZER(runCheckAllNodesStarted); INITIALIZER(runLoadTable); STEP(runRestarter); STEP(runPkReadUntilStopped); STEP(runPkUpdateUntilStopped); STEP(runPkReadPkUpdateUntilStopped); STEP(runScanReadUntilStopped); STEP(runScanUpdateUntilStopped); FINALIZER(runClearTable); } TESTCASE("MixedReadUpdateScan", "Test that one node at a time can be stopped and then restarted "\ "perform pk read, pk update and scan reads while restarting. Do this loop number of times"){ TC_PROPERTY("ReadLockMode", Uint32(-1)); INITIALIZER(runCheckAllNodesStarted); INITIALIZER(runLoadTable); STEP(runRestarter); STEP(runPkReadUntilStopped); STEP(runPkUpdateUntilStopped); STEP(runPkReadPkUpdateUntilStopped); STEP(runScanReadUntilStopped); STEP(runScanUpdateUntilStopped); FINALIZER(runClearTable); } TESTCASE("Terror", "Test that one node at a time can be stopped and then restarted "\ "perform all kind of transactions while restarting. Do this loop number of times"){ INITIALIZER(runCheckAllNodesStarted); INITIALIZER(runLoadTable); STEP(runRestarter); STEP(runPkReadUntilStopped); STEP(runPkUpdateUntilStopped); STEP(runScanReadUntilStopped); STEP(runScanUpdateUntilStopped); FINALIZER(runClearTable); } TESTCASE("FullDb", "Test that one node at a time can be stopped and then restarted "\ "when db is full. Do this loop number of times"){ INITIALIZER(runCheckAllNodesStarted); INITIALIZER(runFillTable); STEP(runRestarter); } TESTCASE("RestartRandomNode", "Test that we can execute the restart RestartRandomNode loop\n"\ "number of times"){ INITIALIZER(runCheckAllNodesStarted); INITIALIZER(runLoadTable); STEP(runRestarts); FINALIZER(runScanReadVerify); FINALIZER(runClearTable); } TESTCASE("RestartRandomNodeError", "Test that we can execute the restart RestartRandomNodeError loop\n"\ "number of times"){ INITIALIZER(runCheckAllNodesStarted); INITIALIZER(runLoadTable); STEP(runRestarts); FINALIZER(runScanReadVerify); FINALIZER(runClearTable); } TESTCASE("RestartRandomNodeInitial", "Test that we can execute the restart RestartRandomNodeInitial loop\n"\ "number of times"){ INITIALIZER(runCheckAllNodesStarted); INITIALIZER(runLoadTable); STEP(runRestarts); FINALIZER(runScanReadVerify); FINALIZER(runClearTable); } TESTCASE("RestartNFDuringNR", "Test that we can execute the restart RestartNFDuringNR loop\n"\ "number of times"){ INITIALIZER(runCheckAllNodesStarted); INITIALIZER(runLoadTable); STEP(runRestarts); STEP(runPkUpdateUntilStopped); STEP(runScanUpdateUntilStopped); FINALIZER(runScanReadVerify); FINALIZER(runClearTable); } TESTCASE("RestartMasterNodeError", "Test that we can execute the restart RestartMasterNodeError loop\n"\ "number of times"){ INITIALIZER(runCheckAllNodesStarted); INITIALIZER(runLoadTable); STEP(runRestarts); FINALIZER(runScanReadVerify); FINALIZER(runClearTable); } TESTCASE("GetTabInfoOverload", "Test behaviour of GET_TABINFOREQ overload + LCP + restart") { INITIALIZER(createManyTables); STEPS(runGetTabInfo, (int) numTables); STEP(runLCPandRestart); FINALIZER(dropManyTables); }; TESTCASE("TwoNodeFailure", "Test that we can execute the restart TwoNodeFailure\n"\ "(which is a multiple node failure restart) loop\n"\ "number of times"){ INITIALIZER(runCheckAllNodesStarted); INITIALIZER(runLoadTable); STEP(runRestarts); FINALIZER(runScanReadVerify); FINALIZER(runClearTable); } TESTCASE("TwoMasterNodeFailure", "Test that we can execute the restart TwoMasterNodeFailure\n"\ "(which is a multiple node failure restart) loop\n"\ "number of times"){ INITIALIZER(runCheckAllNodesStarted); INITIALIZER(runLoadTable); STEP(runRestarts); FINALIZER(runScanReadVerify); FINALIZER(runClearTable); } TESTCASE("FiftyPercentFail", "Test that we can execute the restart FiftyPercentFail\n"\ "(which is a multiple node failure restart) loop\n"\ "number of times"){ INITIALIZER(runCheckAllNodesStarted); INITIALIZER(runLoadTable); STEP(runRestarts); FINALIZER(runScanReadVerify); FINALIZER(runClearTable); } TESTCASE("RestartAllNodes", "Test that we can execute the restart RestartAllNodes\n"\ "(which is a system restart) loop\n"\ "number of times"){ INITIALIZER(runCheckAllNodesStarted); INITIALIZER(runLoadTable); STEP(runRestarts); FINALIZER(runScanReadVerify); FINALIZER(runClearTable); } TESTCASE("RestartAllNodesAbort", "Test that we can execute the restart RestartAllNodesAbort\n"\ "(which is a system restart) loop\n"\ "number of times"){ INITIALIZER(runCheckAllNodesStarted); INITIALIZER(runLoadTable); STEP(runRestarts); FINALIZER(runScanReadVerify); FINALIZER(runClearTable); } TESTCASE("RestartAllNodesError9999", "Test that we can execute the restart RestartAllNodesError9999\n"\ "(which is a system restart) loop\n"\ "number of times"){ INITIALIZER(runCheckAllNodesStarted); INITIALIZER(runLoadTable); STEP(runRestarts); FINALIZER(runScanReadVerify); FINALIZER(runClearTable); } TESTCASE("FiftyPercentStopAndWait", "Test that we can execute the restart FiftyPercentStopAndWait\n"\ "(which is a system restart) loop\n"\ "number of times"){ INITIALIZER(runCheckAllNodesStarted); INITIALIZER(runLoadTable); STEP(runRestarts); FINALIZER(runScanReadVerify); FINALIZER(runClearTable); } TESTCASE("RestartNodeDuringLCP", "Test that we can execute the restart RestartRandomNode loop\n"\ "number of times"){ INITIALIZER(runCheckAllNodesStarted); INITIALIZER(runLoadTable); STEP(runRestarts); STEP(runPkUpdateUntilStopped); STEP(runScanUpdateUntilStopped); FINALIZER(runScanReadVerify); FINALIZER(runClearTable); } TESTCASE("StopOnError", "Test StopOnError. A node that has StopOnError set to false "\ "should restart automatically when an error occurs"){ INITIALIZER(runCheckAllNodesStarted); INITIALIZER(runLoadTable); STEP(runRestarts); FINALIZER(runScanReadVerify); FINALIZER(runClearTable); } TESTCASE("CommittedRead", "Test committed read"){ INITIALIZER(runLoadTable); STEP(runDirtyRead); FINALIZER(runClearTable); } TESTCASE("LateCommit", "Test commit after node failure"){ INITIALIZER(runLoadTable); STEP(runLateCommit); FINALIZER(runClearTable); } TESTCASE("Bug15587", "Test bug with NF during NR"){ INITIALIZER(runLoadTable); STEP(runScanUpdateUntilStopped); STEP(runBug15587); FINALIZER(runClearTable); } TESTCASE("Bug15632", "Test bug with NF during NR"){ INITIALIZER(runLoadTable); STEP(runBug15632); FINALIZER(runClearTable); } TESTCASE("Bug15685", "Test bug with NF during abort"){ STEP(runBug15685); FINALIZER(runClearTable); } TESTCASE("Bug16772", "Test bug with restarting before NF handling is complete"){ STEP(runBug16772); } TESTCASE("Bug18414", "Test bug with NF during NR"){ INITIALIZER(runLoadTable); STEP(runBug18414); FINALIZER(runClearTable); } TESTCASE("Bug18612", "Test bug with partitioned clusters"){ INITIALIZER(runLoadTable); STEP(runBug18612); FINALIZER(runClearTable); } TESTCASE("Bug18612SR", "Test bug with partitioned clusters"){ INITIALIZER(runLoadTable); STEP(runBug18612SR); FINALIZER(runClearTable); } TESTCASE("Bug20185", ""){ INITIALIZER(runLoadTable); STEP(runBug20185); FINALIZER(runClearTable); } TESTCASE("Bug24543", "") { INITIALIZER(runBug24543); } TESTCASE("Bug21271", ""){ INITIALIZER(runLoadTable); STEP(runBug21271); STEP(runPkUpdateUntilStopped); FINALIZER(runClearTable); } TESTCASE("Bug24717", ""){ INITIALIZER(runBug24717); } TESTCASE("Bug25364", ""){ INITIALIZER(runBug25364); } TESTCASE("Bug25468", ""){ INITIALIZER(runBug25468); } TESTCASE("Bug25554", ""){ INITIALIZER(runBug25554); } TESTCASE("Bug25984", ""){ INITIALIZER(runBug25984); } TESTCASE("Bug26457", ""){ INITIALIZER(runBug26457); } TESTCASE("Bug26481", ""){ INITIALIZER(runBug26481); } TESTCASE("Bug26450", ""){ INITIALIZER(runLoadTable); INITIALIZER(runBug26450); } TESTCASE("Bug27003", ""){ INITIALIZER(runBug27003); } TESTCASE("Bug27283", ""){ INITIALIZER(runBug27283); } TESTCASE("Bug27466", ""){ INITIALIZER(runBug27466); } TESTCASE("Bug28023", ""){ INITIALIZER(runBug28023); } TESTCASE("Bug28717", ""){ INITIALIZER(runBug28717); } TESTCASE("Bug31980", ""){ INITIALIZER(runBug31980); } TESTCASE("Bug29364", ""){ INITIALIZER(runBug29364); } TESTCASE("GCP", ""){ INITIALIZER(runLoadTable); STEP(runGCP); STEP(runScanUpdateUntilStopped); FINALIZER(runClearTable); } TESTCASE("CommitAck", ""){ INITIALIZER(runCommitAck); FINALIZER(runClearTable); } TESTCASE("Bug32160", ""){ INITIALIZER(runBug32160); } TESTCASE("pnr", "Parallel node restart") { TC_PROPERTY("ScanUpdateNoRowCountCheck", 1); INITIALIZER(runLoadTable); INITIALIZER(runCreateBigTable); STEP(runScanUpdateUntilStopped); STEP(runDeleteInsertUntilStopped); STEP(runPnr); FINALIZER(runClearTable); FINALIZER(runDropBigTable); } TESTCASE("pnr_lcp", "Parallel node restart") { TC_PROPERTY("LCP", 1); TC_PROPERTY("ScanUpdateNoRowCountCheck", 1); INITIALIZER(runLoadTable); INITIALIZER(runCreateBigTable); STEP(runScanUpdateUntilStopped); STEP(runDeleteInsertUntilStopped); STEP(runPnr); FINALIZER(runClearTable); FINALIZER(runDropBigTable); } TESTCASE("Bug32922", ""){ INITIALIZER(runBug32922); } TESTCASE("Bug34216", ""){ INITIALIZER(runCheckAllNodesStarted); INITIALIZER(runLoadTable); STEP(runBug34216); FINALIZER(runClearTable); } TESTCASE("mixedmultiop", ""){ TC_PROPERTY("MULTI_OP", 5); INITIALIZER(runCheckAllNodesStarted); INITIALIZER(runLoadTable); STEP(runNF_commit); STEP(runPkUpdateUntilStopped); STEP(runPkUpdateUntilStopped); FINALIZER(runClearTable); } TESTCASE("Bug34702", ""){ INITIALIZER(runBug34702); } TESTCASE("MNF", ""){ INITIALIZER(runLoadTable); STEP(runMNF); STEP(runScanUpdateUntilStopped); } TESTCASE("Bug36199", ""){ INITIALIZER(runBug36199); } TESTCASE("Bug36246", ""){ INITIALIZER(runLoadTable); STEP(runBug36246); VERIFIER(runClearTable); } TESTCASE("Bug36247", ""){ INITIALIZER(runLoadTable); STEP(runBug36247); VERIFIER(runClearTable); } TESTCASE("Bug36276", ""){ INITIALIZER(runLoadTable); STEP(runBug36276); VERIFIER(runClearTable); } TESTCASE("Bug36245", ""){ INITIALIZER(runLoadTable); STEP(runBug36245); VERIFIER(runClearTable); } TESTCASE("NF_Hammer", ""){ TC_PROPERTY("Sleep0", 9000); TC_PROPERTY("Sleep1", 3000); TC_PROPERTY("Rand", 1); INITIALIZER(runLoadTable); STEPS(runHammer, 25); STEP(runRestarter); VERIFIER(runClearTable); } TESTCASE("Bug41295", "") { TC_PROPERTY("Threads", 25); INITIALIZER(runLoadTable); STEPS(runMixedLoad, 25); STEP(runBug41295); FINALIZER(runClearTable); } TESTCASE("Bug41469", ""){ INITIALIZER(runLoadTable); STEP(runBug41469); STEP(runScanUpdateUntilStopped); FINALIZER(runClearTable); } TESTCASE("Bug42422", ""){ INITIALIZER(runBug42422); } TESTCASE("Bug43224", ""){ INITIALIZER(runBug43224); } TESTCASE("Bug58453", "") { INITIALIZER(runBug58453); } TESTCASE("Bug43888", ""){ INITIALIZER(runBug43888); } TESTCASE("Bug44952", "Test that we can execute the restart RestartNFDuringNR loop\n" \ "number of times"){ INITIALIZER(runCheckAllNodesStarted); INITIALIZER(runLoadTable); STEP(runBug44952); STEP(runPkUpdateUntilStopped); STEP(runScanUpdateUntilStopped); FINALIZER(runScanReadVerify); FINALIZER(runClearTable); } TESTCASE("Bug48474", "") { INITIALIZER(runLoadTable); INITIALIZER(initBug48474); STEP(runBug48474); STEP(runScanUpdateUntilStopped); FINALIZER(cleanupBug48474); } TESTCASE("MixReadUnlockRestart", "Run mixed read+unlock and update transactions"){ INITIALIZER(runCheckAllNodesStarted); INITIALIZER(runLoadTable); STEP(runPkReadPkUpdateUntilStopped); STEP(runPkReadPkUpdatePkUnlockUntilStopped); STEP(runPkReadPkUpdatePkUnlockUntilStopped); STEP(runRestarter); FINALIZER(runClearTable); } TESTCASE("Bug56044", "") { INITIALIZER(runBug56044); } TESTCASE("Bug57767", "") { INITIALIZER(runLoadTable); INITIALIZER(runBug57767) } TESTCASE("Bug57522", "") { INITIALIZER(runBug57522); } TESTCASE("Bug16944817", "") { INITIALIZER(runBug16944817); } TESTCASE("MasterFailSlowLCP", "DIH Master failure during a slow LCP can cause a crash.") { INITIALIZER(runMasterFailSlowLCP); } TESTCASE("TestLCPFSErr", "Test LCP FS Error handling") { INITIALIZER(runLoadTable); STEP(runPkUpdateUntilStopped); STEP(runTestLcpFsErr); } TESTCASE("ForceStopAndRestart", "Test restart and stop -with force flag") { STEP(runForceStopAndRestart); } TESTCASE("ClusterSplitLatency", "Test behaviour of 2-replica cluster with latency between halves") { TC_PROPERTY("DynamicOrder", Uint32(9)); INITIALIZER(runRestartToDynamicOrder); INITIALIZER(analyseDynamicOrder); INITIALIZER(runSplitLatency25PctFail); } TESTCASE("GCPStopFalsePositive", "Test node failures is not misdiagnosed as GCP stop") { INITIALIZER(runIsolateMaster); } TESTCASE("LCPTakeOver", "") { INITIALIZER(runCheckAllNodesStarted); INITIALIZER(runLoadTable); STEP(runLCPTakeOver); STEP(runPkUpdateUntilStopped); STEP(runScanUpdateUntilStopped); } TESTCASE("Bug16007980", "") { INITIALIZER(runBug16007980); } TESTCASE("LCPScanFragWatchdog", "Test LCP scan watchdog") { INITIALIZER(runLoadTable); STEP(runPkUpdateUntilStopped); STEP(runTestScanFragWatchdog); } TESTCASE("LCPScanFragWatchdogDisable", "Test disabling LCP scan watchdog") { STEP(runTestScanFragWatchdogDisable); } TESTCASE("LCPScanFragWatchdogIsolation", "Test LCP scan watchdog resulting in isolation") { TC_PROPERTY("WatchdogKillFail", Uint32(1)); INITIALIZER(runLoadTable); STEP(runPkUpdateUntilStopped); STEP(runTestScanFragWatchdog); } TESTCASE("Bug16834416", "") { INITIALIZER(runBug16834416); } TESTCASE("NodeFailGCPOpen", "Test behaviour of code to keep GCP open for node failure " " handling") { INITIALIZER(runLoadTable); STEP(runPkUpdateUntilStopped); STEP(runNodeFailGCPOpen); FINALIZER(runClearTable); } TESTCASE("Bug16766493", "") { INITIALIZER(runBug16766493); } TESTCASE("multiTCtakeover", "") { INITIALIZER(run_multiTCtakeover); STEP(runLargeTransactions); STEP(runManyTransactions); FINALIZER(runClearTable); } TESTCASE("Bug16895311", "Test NR with long UTF8 PK.\n" "Give any tablename as argument (T1)") { INITIALIZER(runBug16895311_create); INITIALIZER(runBug16895311_load); STEP(runBug16895311_update); STEP(runRestarter); FINALIZER(runBug16895311_drop); } TESTCASE("Bug18044717", "Test LCP state change from LCP_INIT_TABLES " "to LCP_STATUS_IDLE during node restart") { INITIALIZER(runBug18044717); } TESTCASE("DeleteRestart", "Check that create big table and delete rows followed by " "node restart does not leak memory") { INITIALIZER(runDeleteRestart); } TESTCASE("GcpStop", "Check various Gcp stop scenarios") { INITIALIZER(runCreateEvent); STEP(runGcpStop); FINALIZER(runDropEvent); } TESTCASE("GcpStopIsolation", "Check various Gcp stop scenarios where isolation is " "required to recover.") { TC_PROPERTY("GcpStopIsolation", Uint32(1)); INITIALIZER(runCreateEvent); STEP(runGcpStop); FINALIZER(runDropEvent); } NDBT_TESTSUITE_END(testNodeRestart); int main(int argc, const char** argv){ ndb_init(); NDBT_TESTSUITE_INSTANCE(testNodeRestart); #if 0 // It might be interesting to have longer defaults for num // loops in this test // Just performing 100 node restarts would not be enough? // We can have initialisers in the NDBT_Testcase class like // this... testNodeRestart.setDefaultLoops(1000); #endif return testNodeRestart.execute(argc, argv); } template class Vector;