diff --git a/tsl/src/data_node.c b/tsl/src/data_node.c index fd1a86552..f8faaba4d 100644 --- a/tsl/src/data_node.c +++ b/tsl/src/data_node.c @@ -1946,6 +1946,24 @@ data_node_get_node_name_list_with_aclcheck(AclMode mode, bool fail_on_aclcheck) return nodes; } +void +data_node_fail_if_nodes_are_unavailable(void) +{ + /* Get a list of data nodes and ensure all of them are available */ + List *data_node_list = data_node_get_node_name_list_with_aclcheck(ACL_NO_CHECK, false); + ListCell *lc; + + foreach (lc, data_node_list) + { + const char *node_name = lfirst(lc); + const ForeignServer *server; + + server = data_node_get_foreign_server(node_name, ACL_NO_CHECK, false, false); + if (!ts_data_node_is_available_by_server(server)) + ereport(ERROR, (errmsg("some data nodes are not available"))); + } +} + /* * Get server list with optional ACL check. * diff --git a/tsl/src/data_node.h b/tsl/src/data_node.h index 4cfdc9054..5f80e6a70 100644 --- a/tsl/src/data_node.h +++ b/tsl/src/data_node.h @@ -35,6 +35,7 @@ extern List *data_node_get_node_name_list_with_aclcheck(AclMode mode, bool fail_ extern List *data_node_get_filtered_node_name_list(ArrayType *nodearr, AclMode mode, bool fail_on_aclcheck); extern List *data_node_get_node_name_list(void); +extern void data_node_fail_if_nodes_are_unavailable(void); extern List *data_node_array_to_node_name_list_with_aclcheck(ArrayType *nodearr, AclMode mode, bool fail_on_aclcheck); extern List *data_node_array_to_node_name_list(ArrayType *nodearr); diff --git a/tsl/src/dist_backup.c b/tsl/src/dist_backup.c index 78b42a775..a042b3e72 100644 --- a/tsl/src/dist_backup.c +++ b/tsl/src/dist_backup.c @@ -21,6 +21,7 @@ #include "debug_point.h" #include "dist_util.h" #include "remote/dist_commands.h" +#include "data_node.h" #include "dist_backup.h" #define TS_ACCESS_NODE_TYPE "access_node" @@ -115,6 +116,9 @@ create_distributed_restore_point(PG_FUNCTION_ARGS) errhint("Connect to the access node and create the distributed restore point " "from there."))); + /* Ensure all data nodes are available */ + data_node_fail_if_nodes_are_unavailable(); + /* * In order to achieve synchronization across the multinode cluster, * we must ensure that the restore point created on the access node is diff --git a/tsl/test/expected/dist_backup.out b/tsl/test/expected/dist_backup.out index d6b9ddbc1..5702ef913 100644 --- a/tsl/test/expected/dist_backup.out +++ b/tsl/test/expected/dist_backup.out @@ -131,6 +131,32 @@ SELECT pg_lsn(:'lsn_3') > pg_lsn(:'lsn_2') as valid_lsn; t (1 row) +-- test create_distributed_restore_point() when one of the nodes if unavailable +SELECT alter_data_node(:'DATA_NODE_1', available => false); + alter_data_node +------------------------------------------------------- + (db_dist_backup_1,localhost,55432,db_dist_backup_1,f) +(1 row) + +\set ON_ERROR_STOP 0 +SELECT create_distributed_restore_point('test'); +ERROR: some data nodes are not available +\set ON_ERROR_STOP 1 +SELECT alter_data_node(:'DATA_NODE_1', available => true); + alter_data_node +------------------------------------------------------- + (db_dist_backup_1,localhost,55432,db_dist_backup_1,t) +(1 row) + +SELECT node_name, node_type, pg_lsn(restore_point) > pg_lsn('0/0') as valid_lsn FROM create_distributed_restore_point('test') ORDER BY node_name; + node_name | node_type | valid_lsn +------------------+-------------+----------- + db_dist_backup_1 | data_node | t + db_dist_backup_2 | data_node | t + db_dist_backup_3 | data_node | t + | access_node | t +(4 rows) + DROP DATABASE :DATA_NODE_1; DROP DATABASE :DATA_NODE_2; DROP DATABASE :DATA_NODE_3; diff --git a/tsl/test/sql/dist_backup.sql b/tsl/test/sql/dist_backup.sql index 724f2102d..5553950f4 100644 --- a/tsl/test/sql/dist_backup.sql +++ b/tsl/test/sql/dist_backup.sql @@ -110,6 +110,16 @@ SELECT pg_lsn(:'lsn_2') > pg_lsn(:'lsn_1') as valid_lsn; SELECT pg_create_restore_point('dist_rp') as lsn_3 \gset SELECT pg_lsn(:'lsn_3') > pg_lsn(:'lsn_2') as valid_lsn; +-- test create_distributed_restore_point() when one of the nodes if unavailable +SELECT alter_data_node(:'DATA_NODE_1', available => false); + +\set ON_ERROR_STOP 0 +SELECT create_distributed_restore_point('test'); +\set ON_ERROR_STOP 1 + +SELECT alter_data_node(:'DATA_NODE_1', available => true); +SELECT node_name, node_type, pg_lsn(restore_point) > pg_lsn('0/0') as valid_lsn FROM create_distributed_restore_point('test') ORDER BY node_name; + DROP DATABASE :DATA_NODE_1; DROP DATABASE :DATA_NODE_2; DROP DATABASE :DATA_NODE_3;