Ensure nodes availability using dist restore point

Make sure that a data node list does not have unavailable data nodes
when using create_distributed_restore_point() API.

Fix #4979
This commit is contained in:
Dmitry Simonenko 2022-11-24 15:13:17 +02:00 committed by Dmitry Simonenko
parent 7bfd28a02f
commit 826dcd2721
5 changed files with 59 additions and 0 deletions

View File

@ -1946,6 +1946,24 @@ data_node_get_node_name_list_with_aclcheck(AclMode mode, bool fail_on_aclcheck)
return nodes;
}
void
data_node_fail_if_nodes_are_unavailable(void)
{
/* Get a list of data nodes and ensure all of them are available */
List *data_node_list = data_node_get_node_name_list_with_aclcheck(ACL_NO_CHECK, false);
ListCell *lc;
foreach (lc, data_node_list)
{
const char *node_name = lfirst(lc);
const ForeignServer *server;
server = data_node_get_foreign_server(node_name, ACL_NO_CHECK, false, false);
if (!ts_data_node_is_available_by_server(server))
ereport(ERROR, (errmsg("some data nodes are not available")));
}
}
/*
* Get server list with optional ACL check.
*

View File

@ -35,6 +35,7 @@ extern List *data_node_get_node_name_list_with_aclcheck(AclMode mode, bool fail_
extern List *data_node_get_filtered_node_name_list(ArrayType *nodearr, AclMode mode,
bool fail_on_aclcheck);
extern List *data_node_get_node_name_list(void);
extern void data_node_fail_if_nodes_are_unavailable(void);
extern List *data_node_array_to_node_name_list_with_aclcheck(ArrayType *nodearr, AclMode mode,
bool fail_on_aclcheck);
extern List *data_node_array_to_node_name_list(ArrayType *nodearr);

View File

@ -21,6 +21,7 @@
#include "debug_point.h"
#include "dist_util.h"
#include "remote/dist_commands.h"
#include "data_node.h"
#include "dist_backup.h"
#define TS_ACCESS_NODE_TYPE "access_node"
@ -115,6 +116,9 @@ create_distributed_restore_point(PG_FUNCTION_ARGS)
errhint("Connect to the access node and create the distributed restore point "
"from there.")));
/* Ensure all data nodes are available */
data_node_fail_if_nodes_are_unavailable();
/*
* In order to achieve synchronization across the multinode cluster,
* we must ensure that the restore point created on the access node is

View File

@ -131,6 +131,32 @@ SELECT pg_lsn(:'lsn_3') > pg_lsn(:'lsn_2') as valid_lsn;
t
(1 row)
-- test create_distributed_restore_point() when one of the nodes if unavailable
SELECT alter_data_node(:'DATA_NODE_1', available => false);
alter_data_node
-------------------------------------------------------
(db_dist_backup_1,localhost,55432,db_dist_backup_1,f)
(1 row)
\set ON_ERROR_STOP 0
SELECT create_distributed_restore_point('test');
ERROR: some data nodes are not available
\set ON_ERROR_STOP 1
SELECT alter_data_node(:'DATA_NODE_1', available => true);
alter_data_node
-------------------------------------------------------
(db_dist_backup_1,localhost,55432,db_dist_backup_1,t)
(1 row)
SELECT node_name, node_type, pg_lsn(restore_point) > pg_lsn('0/0') as valid_lsn FROM create_distributed_restore_point('test') ORDER BY node_name;
node_name | node_type | valid_lsn
------------------+-------------+-----------
db_dist_backup_1 | data_node | t
db_dist_backup_2 | data_node | t
db_dist_backup_3 | data_node | t
| access_node | t
(4 rows)
DROP DATABASE :DATA_NODE_1;
DROP DATABASE :DATA_NODE_2;
DROP DATABASE :DATA_NODE_3;

View File

@ -110,6 +110,16 @@ SELECT pg_lsn(:'lsn_2') > pg_lsn(:'lsn_1') as valid_lsn;
SELECT pg_create_restore_point('dist_rp') as lsn_3 \gset
SELECT pg_lsn(:'lsn_3') > pg_lsn(:'lsn_2') as valid_lsn;
-- test create_distributed_restore_point() when one of the nodes if unavailable
SELECT alter_data_node(:'DATA_NODE_1', available => false);
\set ON_ERROR_STOP 0
SELECT create_distributed_restore_point('test');
\set ON_ERROR_STOP 1
SELECT alter_data_node(:'DATA_NODE_1', available => true);
SELECT node_name, node_type, pg_lsn(restore_point) > pg_lsn('0/0') as valid_lsn FROM create_distributed_restore_point('test') ORDER BY node_name;
DROP DATABASE :DATA_NODE_1;
DROP DATABASE :DATA_NODE_2;
DROP DATABASE :DATA_NODE_3;