mirror of
https://github.com/timescale/timescaledb.git
synced 2025-05-24 15:05:03 +08:00
Fix concurrent locking with chunk_data_node table
Concurrent insert into dist hypertable after a data node marked as unavailable would produce 'tuple concurrently deleted` error. The problem occurs because of missing tuple level locking during scan and concurrent delete from chunk_data_node table afterwards, which should be treated as `SELECT … FOR UPDATE` case instead. Based on the fix by @erimatnor. Fix #5153
This commit is contained in:
parent
4c0075010d
commit
830c37b5b0
@ -13,6 +13,7 @@ accidentally triggering the load of a previous DB version.**
|
||||
* #5317 Fix some incorrect memory handling
|
||||
* #5367 Rename columns in old-style continuous aggregates
|
||||
* #5384 Fix Hierarchical Continuous Aggregates chunk_interval_size
|
||||
* #5153 Fix concurrent locking with chunk_data_node table
|
||||
|
||||
**Thanks**
|
||||
* @Medvecrab for discovering an issue with copying NameData when forming heap tuples.
|
||||
|
@ -4,6 +4,7 @@
|
||||
* LICENSE-APACHE for a copy of the license.
|
||||
*/
|
||||
#include <postgres.h>
|
||||
#include <access/tableam.h>
|
||||
#include <catalog/pg_foreign_table.h>
|
||||
#include <catalog/pg_foreign_server.h>
|
||||
#include <catalog/dependency.h>
|
||||
@ -19,6 +20,7 @@
|
||||
#include "hypertable_cache.h"
|
||||
#include "scanner.h"
|
||||
#include "chunk.h"
|
||||
#include "debug_point.h"
|
||||
|
||||
static void
|
||||
chunk_data_node_insert_relation(const Relation rel, int32 chunk_id, int32 node_chunk_id,
|
||||
@ -83,7 +85,7 @@ ts_chunk_data_node_insert_multi(List *chunk_data_nodes)
|
||||
static int
|
||||
chunk_data_node_scan_limit_internal(ScanKeyData *scankey, int num_scankeys, int indexid,
|
||||
tuple_found_func on_tuple_found, void *scandata, int limit,
|
||||
LOCKMODE lock, MemoryContext mctx)
|
||||
LOCKMODE lock, ScanTupLock *tuplock, MemoryContext mctx)
|
||||
{
|
||||
Catalog *catalog = ts_catalog_get();
|
||||
ScannerCtx scanctx = {
|
||||
@ -94,6 +96,7 @@ chunk_data_node_scan_limit_internal(ScanKeyData *scankey, int num_scankeys, int
|
||||
.data = scandata,
|
||||
.limit = limit,
|
||||
.tuple_found = on_tuple_found,
|
||||
.tuplock = tuplock,
|
||||
.lockmode = lock,
|
||||
.scandirection = ForwardScanDirection,
|
||||
.result_mctx = mctx,
|
||||
@ -162,7 +165,8 @@ static int
|
||||
ts_chunk_data_node_scan_by_chunk_id_and_node_internal(int32 chunk_id, const char *node_name,
|
||||
bool scan_by_remote_chunk_id,
|
||||
tuple_found_func tuple_found, void *data,
|
||||
LOCKMODE lockmode, MemoryContext mctx)
|
||||
LOCKMODE lockmode, ScanTupLock *tuplock,
|
||||
MemoryContext mctx)
|
||||
{
|
||||
ScanKeyData scankey[2];
|
||||
int nkeys = 0;
|
||||
@ -203,12 +207,14 @@ ts_chunk_data_node_scan_by_chunk_id_and_node_internal(int32 chunk_id, const char
|
||||
data,
|
||||
0,
|
||||
lockmode,
|
||||
tuplock,
|
||||
mctx);
|
||||
}
|
||||
|
||||
static int
|
||||
ts_chunk_data_node_scan_by_node_internal(const char *node_name, tuple_found_func tuple_found,
|
||||
void *data, LOCKMODE lockmode, MemoryContext mctx)
|
||||
void *data, LOCKMODE lockmode, ScanTupLock *tuplock,
|
||||
MemoryContext mctx)
|
||||
{
|
||||
ScanKeyData scankey[1];
|
||||
|
||||
@ -225,6 +231,7 @@ ts_chunk_data_node_scan_by_node_internal(const char *node_name, tuple_found_func
|
||||
data,
|
||||
0,
|
||||
lockmode,
|
||||
tuplock,
|
||||
mctx);
|
||||
}
|
||||
|
||||
@ -233,13 +240,13 @@ List *
|
||||
ts_chunk_data_node_scan_by_chunk_id(int32 chunk_id, MemoryContext mctx)
|
||||
{
|
||||
List *chunk_data_nodes = NIL;
|
||||
|
||||
ts_chunk_data_node_scan_by_chunk_id_and_node_internal(chunk_id,
|
||||
NULL,
|
||||
false,
|
||||
chunk_data_node_tuple_found,
|
||||
&chunk_data_nodes,
|
||||
AccessShareLock,
|
||||
NULL,
|
||||
mctx);
|
||||
return chunk_data_nodes;
|
||||
}
|
||||
@ -249,13 +256,13 @@ List *
|
||||
ts_chunk_data_node_scan_by_chunk_id_filter(int32 chunk_id, MemoryContext mctx)
|
||||
{
|
||||
List *chunk_data_nodes = NIL;
|
||||
|
||||
ts_chunk_data_node_scan_by_chunk_id_and_node_internal(chunk_id,
|
||||
NULL,
|
||||
false,
|
||||
chunk_data_node_tuple_found_filter,
|
||||
&chunk_data_nodes,
|
||||
AccessShareLock,
|
||||
NULL,
|
||||
mctx);
|
||||
return chunk_data_nodes;
|
||||
}
|
||||
@ -266,13 +273,13 @@ chunk_data_node_scan_by_chunk_id_and_node_name(int32 chunk_id, const char *node_
|
||||
|
||||
{
|
||||
List *chunk_data_nodes = NIL;
|
||||
|
||||
ts_chunk_data_node_scan_by_chunk_id_and_node_internal(chunk_id,
|
||||
node_name,
|
||||
scan_by_remote_chunk_id,
|
||||
chunk_data_node_tuple_found,
|
||||
&chunk_data_nodes,
|
||||
AccessShareLock,
|
||||
NULL,
|
||||
mctx);
|
||||
Assert(list_length(chunk_data_nodes) <= 1);
|
||||
|
||||
@ -302,9 +309,20 @@ chunk_data_node_tuple_delete(TupleInfo *ti, void *data)
|
||||
{
|
||||
CatalogSecurityContext sec_ctx;
|
||||
|
||||
ts_catalog_database_info_become_owner(ts_catalog_database_info_get(), &sec_ctx);
|
||||
ts_catalog_delete_tid(ti->scanrel, ts_scanner_get_tuple_tid(ti));
|
||||
ts_catalog_restore_user(&sec_ctx);
|
||||
switch (ti->lockresult)
|
||||
{
|
||||
case TM_Ok:
|
||||
ts_catalog_database_info_become_owner(ts_catalog_database_info_get(), &sec_ctx);
|
||||
ts_catalog_delete_tid(ti->scanrel, ts_scanner_get_tuple_tid(ti));
|
||||
ts_catalog_restore_user(&sec_ctx);
|
||||
break;
|
||||
case TM_Deleted:
|
||||
/* Already deleted, do nothing. */
|
||||
break;
|
||||
default:
|
||||
Assert(false);
|
||||
break;
|
||||
}
|
||||
|
||||
return SCAN_CONTINUE;
|
||||
}
|
||||
@ -312,34 +330,55 @@ chunk_data_node_tuple_delete(TupleInfo *ti, void *data)
|
||||
int
|
||||
ts_chunk_data_node_delete_by_chunk_id(int32 chunk_id)
|
||||
{
|
||||
ScanTupLock tuplock = {
|
||||
.lockmode = LockTupleExclusive,
|
||||
.waitpolicy = LockWaitBlock,
|
||||
};
|
||||
|
||||
return ts_chunk_data_node_scan_by_chunk_id_and_node_internal(chunk_id,
|
||||
NULL,
|
||||
false,
|
||||
chunk_data_node_tuple_delete,
|
||||
NULL,
|
||||
RowExclusiveLock,
|
||||
&tuplock,
|
||||
CurrentMemoryContext);
|
||||
}
|
||||
|
||||
int
|
||||
ts_chunk_data_node_delete_by_chunk_id_and_node_name(int32 chunk_id, const char *node_name)
|
||||
{
|
||||
return ts_chunk_data_node_scan_by_chunk_id_and_node_internal(chunk_id,
|
||||
node_name,
|
||||
false,
|
||||
chunk_data_node_tuple_delete,
|
||||
NULL,
|
||||
RowExclusiveLock,
|
||||
CurrentMemoryContext);
|
||||
int count;
|
||||
|
||||
ScanTupLock tuplock = {
|
||||
.lockmode = LockTupleExclusive,
|
||||
.waitpolicy = LockWaitBlock,
|
||||
};
|
||||
|
||||
count = ts_chunk_data_node_scan_by_chunk_id_and_node_internal(chunk_id,
|
||||
node_name,
|
||||
false,
|
||||
chunk_data_node_tuple_delete,
|
||||
NULL,
|
||||
RowExclusiveLock,
|
||||
&tuplock,
|
||||
CurrentMemoryContext);
|
||||
DEBUG_WAITPOINT("chunk_data_node_delete");
|
||||
return count;
|
||||
}
|
||||
|
||||
int
|
||||
ts_chunk_data_node_delete_by_node_name(const char *node_name)
|
||||
{
|
||||
ScanTupLock tuplock = {
|
||||
.lockmode = LockTupleExclusive,
|
||||
.waitpolicy = LockWaitBlock,
|
||||
};
|
||||
return ts_chunk_data_node_scan_by_node_internal(node_name,
|
||||
chunk_data_node_tuple_delete,
|
||||
NULL,
|
||||
RowExclusiveLock,
|
||||
&tuplock,
|
||||
CurrentMemoryContext);
|
||||
}
|
||||
|
||||
|
@ -773,6 +773,7 @@ void
|
||||
chunk_update_stale_metadata(Chunk *new_chunk, List *chunk_data_nodes)
|
||||
{
|
||||
List *serveroids = NIL, *removeoids = NIL;
|
||||
bool locked = false;
|
||||
ChunkDataNode *cdn;
|
||||
ListCell *lc;
|
||||
|
||||
@ -807,6 +808,13 @@ chunk_update_stale_metadata(Chunk *new_chunk, List *chunk_data_nodes)
|
||||
*/
|
||||
if (!list_member_oid(serveroids, cdn->foreign_server_oid))
|
||||
{
|
||||
if (!locked)
|
||||
{
|
||||
LockRelationOid(ts_catalog_get()->tables[CHUNK_DATA_NODE].id,
|
||||
ShareUpdateExclusiveLock);
|
||||
locked = true;
|
||||
}
|
||||
|
||||
chunk_update_foreign_server_if_needed(new_chunk, cdn->foreign_server_oid, false);
|
||||
ts_chunk_data_node_delete_by_chunk_id_and_node_name(cdn->fd.chunk_id,
|
||||
NameStr(cdn->fd.node_name));
|
||||
|
@ -19,6 +19,8 @@
|
||||
#include <fmgr.h>
|
||||
#include <funcapi.h>
|
||||
#include <miscadmin.h>
|
||||
#include <storage/lmgr.h>
|
||||
#include <storage/lockdefs.h>
|
||||
#include <utils/array.h>
|
||||
#include <utils/builtins.h>
|
||||
#include <utils/jsonb.h>
|
||||
@ -1775,6 +1777,7 @@ chunk_api_call_chunk_drop_replica(const Chunk *chunk, const char *node_name, Oid
|
||||
* This chunk might have this data node as primary, change that association
|
||||
* if so. Then delete the chunk_id and node_name association.
|
||||
*/
|
||||
LockRelationOid(chunk->table_id, ShareUpdateExclusiveLock);
|
||||
chunk_update_foreign_server_if_needed(chunk, serverid, false);
|
||||
ts_chunk_data_node_delete_by_chunk_id_and_node_name(chunk->fd.id, node_name);
|
||||
}
|
||||
|
@ -25,6 +25,8 @@
|
||||
#include <nodes/parsenodes.h>
|
||||
#include <nodes/nodes.h>
|
||||
#include <nodes/value.h>
|
||||
#include <storage/lockdefs.h>
|
||||
#include <storage/lmgr.h>
|
||||
#include <utils/acl.h>
|
||||
#include <utils/builtins.h>
|
||||
#include <utils/array.h>
|
||||
@ -1157,6 +1159,8 @@ data_node_modify_hypertable_data_nodes(const char *node_name, List *hypertable_d
|
||||
{
|
||||
ChunkDataNode *cdn = lfirst(cs_lc);
|
||||
const Chunk *chunk = ts_chunk_get_by_id(cdn->fd.chunk_id, true);
|
||||
LockRelationOid(chunk->table_id, ShareUpdateExclusiveLock);
|
||||
|
||||
chunk_update_foreign_server_if_needed(chunk, cdn->foreign_server_oid, false);
|
||||
ts_chunk_data_node_delete_by_chunk_id_and_node_name(cdn->fd.chunk_id,
|
||||
NameStr(cdn->fd.node_name));
|
||||
|
@ -9,6 +9,8 @@
|
||||
#include <nodes/plannodes.h>
|
||||
#include <commands/explain.h>
|
||||
#include <foreign/fdwapi.h>
|
||||
#include <storage/lmgr.h>
|
||||
#include <storage/lockdefs.h>
|
||||
#include <utils/rel.h>
|
||||
#include <fmgr.h>
|
||||
#include <miscadmin.h>
|
||||
@ -452,6 +454,8 @@ fdw_chunk_update_stale_metadata(TsFdwModifyState *fmstate)
|
||||
/* get filtered list */
|
||||
List *serveroids = get_chunk_data_nodes(rel->rd_id);
|
||||
ListCell *lc;
|
||||
bool chunk_is_locked = false;
|
||||
|
||||
Assert(list_length(serveroids) == fmstate->num_data_nodes);
|
||||
|
||||
all_data_nodes = ts_chunk_data_node_scan_by_chunk_id(chunk->fd.id, CurrentMemoryContext);
|
||||
@ -471,6 +475,12 @@ fdw_chunk_update_stale_metadata(TsFdwModifyState *fmstate)
|
||||
if (!list_member_oid(serveroids, cdn->foreign_server_oid) &&
|
||||
!list_member_oid(fmstate->stale_data_nodes, cdn->foreign_server_oid))
|
||||
{
|
||||
if (!chunk_is_locked)
|
||||
{
|
||||
LockRelationOid(chunk->table_id, ShareUpdateExclusiveLock);
|
||||
chunk_is_locked = true;
|
||||
}
|
||||
|
||||
chunk_update_foreign_server_if_needed(chunk, cdn->foreign_server_oid, false);
|
||||
ts_chunk_data_node_delete_by_chunk_id_and_node_name(cdn->fd.chunk_id,
|
||||
NameStr(cdn->fd.node_name));
|
||||
|
57
tsl/test/isolation/expected/dist_ha_chunk_drop.out
Normal file
57
tsl/test/isolation/expected/dist_ha_chunk_drop.out
Normal file
@ -0,0 +1,57 @@
|
||||
Parsed test spec with 3 sessions
|
||||
|
||||
starting permutation: s1_init s1_set_unavailable s3_lock_enable s1_insert s2_insert s3_lock_release s1_set_available
|
||||
node_name
|
||||
-----------
|
||||
data_node_1
|
||||
(1 row)
|
||||
|
||||
node_name
|
||||
-----------
|
||||
data_node_2
|
||||
(1 row)
|
||||
|
||||
node_name
|
||||
-----------
|
||||
data_node_3
|
||||
(1 row)
|
||||
|
||||
node_name
|
||||
-----------
|
||||
data_node_4
|
||||
(1 row)
|
||||
|
||||
created
|
||||
-------
|
||||
t
|
||||
(1 row)
|
||||
|
||||
step s1_init: INSERT INTO metric1(ts, val, dev_id) SELECT s.*, 3.14, d.* FROM generate_series('2021-08-17 00:00:00'::timestamp, '2021-08-17 00:00:59'::timestamp, '1 s'::interval) s CROSS JOIN generate_series(1, 500) d;
|
||||
step s1_set_unavailable: SELECT alter_data_node('data_node_4', available=>false);
|
||||
alter_data_node
|
||||
--------------------------------------
|
||||
(data_node_4,localhost,55432,cdha_4,f)
|
||||
(1 row)
|
||||
|
||||
step s3_lock_enable: SELECT debug_waitpoint_enable('chunk_data_node_delete');
|
||||
debug_waitpoint_enable
|
||||
----------------------
|
||||
|
||||
(1 row)
|
||||
|
||||
step s1_insert: INSERT INTO metric1(ts, val, dev_id) SELECT s.*, 3.14, d.* FROM generate_series('2021-08-17 00:01:00'::timestamp, '2021-08-17 00:01:59'::timestamp, '1 s'::interval) s CROSS JOIN generate_series(1, 249) d; <waiting ...>
|
||||
step s2_insert: INSERT INTO metric1(ts, val, dev_id) SELECT s.*, 3.14, d.* FROM generate_series('2021-08-17 00:01:00'::timestamp, '2021-08-17 00:01:59'::timestamp, '1 s'::interval) s CROSS JOIN generate_series(250, 499) d; <waiting ...>
|
||||
step s3_lock_release: SELECT debug_waitpoint_release('chunk_data_node_delete');
|
||||
debug_waitpoint_release
|
||||
-----------------------
|
||||
|
||||
(1 row)
|
||||
|
||||
step s1_insert: <... completed>
|
||||
step s2_insert: <... completed>
|
||||
step s1_set_available: SELECT alter_data_node('data_node_4', available=>true);
|
||||
alter_data_node
|
||||
--------------------------------------
|
||||
(data_node_4,localhost,55432,cdha_4,t)
|
||||
(1 row)
|
||||
|
@ -1,6 +1,11 @@
|
||||
Parsed test spec with 3 sessions
|
||||
|
||||
starting permutation: s3_lock_enable s1_create_dist_rp s2_insert s3_lock_count s3_lock_release
|
||||
delete_data_node
|
||||
----------------
|
||||
t
|
||||
(1 row)
|
||||
|
||||
node_name
|
||||
-----------
|
||||
data_node_1
|
||||
@ -56,6 +61,11 @@ t
|
||||
step s2_insert: <... completed>
|
||||
|
||||
starting permutation: s2_begin s2_insert s3_lock_enable s1_create_dist_rp s3_lock_count s2_commit s3_lock_count s3_lock_release
|
||||
delete_data_node
|
||||
----------------
|
||||
t
|
||||
(1 row)
|
||||
|
||||
node_name
|
||||
-----------
|
||||
data_node_1
|
||||
@ -122,6 +132,11 @@ t
|
||||
step s2_commit: <... completed>
|
||||
|
||||
starting permutation: s3_lock_enable s1_create_dist_rp s2_create_dist_rp s3_lock_count s3_lock_release
|
||||
delete_data_node
|
||||
----------------
|
||||
t
|
||||
(1 row)
|
||||
|
||||
node_name
|
||||
-----------
|
||||
data_node_1
|
||||
@ -185,6 +200,11 @@ t
|
||||
|
||||
|
||||
starting permutation: s3_lock_enable s1_create_dist_rp s2_dist_exec s3_lock_count s3_lock_release
|
||||
delete_data_node
|
||||
----------------
|
||||
t
|
||||
(1 row)
|
||||
|
||||
node_name
|
||||
-----------
|
||||
data_node_1
|
||||
@ -240,6 +260,11 @@ t
|
||||
step s2_dist_exec: <... completed>
|
||||
|
||||
starting permutation: s3_lock_enable s1_create_dist_rp s2_create_dist_ht s3_lock_count s3_lock_release
|
||||
delete_data_node
|
||||
----------------
|
||||
t
|
||||
(1 row)
|
||||
|
||||
node_name
|
||||
-----------
|
||||
data_node_1
|
||||
@ -303,6 +328,11 @@ t
|
||||
|
||||
|
||||
starting permutation: s3_lock_enable s1_create_dist_rp s2_drop_dist_ht s3_lock_count s3_lock_release
|
||||
delete_data_node
|
||||
----------------
|
||||
t
|
||||
(1 row)
|
||||
|
||||
node_name
|
||||
-----------
|
||||
data_node_1
|
||||
@ -356,136 +386,3 @@ t
|
||||
(4 rows)
|
||||
|
||||
step s2_drop_dist_ht: <... completed>
|
||||
|
||||
starting permutation: s3_lock_enable s1_create_dist_rp s2_add_dn s3_lock_count s3_lock_count_fs s3_lock_release s3_lock_count_fs
|
||||
node_name
|
||||
-----------
|
||||
data_node_1
|
||||
(1 row)
|
||||
|
||||
node_name
|
||||
-----------
|
||||
data_node_2
|
||||
(1 row)
|
||||
|
||||
node_name
|
||||
-----------
|
||||
data_node_3
|
||||
(1 row)
|
||||
|
||||
created
|
||||
-------
|
||||
t
|
||||
(1 row)
|
||||
|
||||
step s3_lock_enable: SELECT debug_waitpoint_enable('create_distributed_restore_point_lock');
|
||||
debug_waitpoint_enable
|
||||
----------------------
|
||||
|
||||
(1 row)
|
||||
|
||||
step s1_create_dist_rp: SELECT restore_point > pg_lsn('0/0') as valid_lsn FROM create_distributed_restore_point('s1_test'); <waiting ...>
|
||||
step s2_add_dn: SELECT node_name FROM add_data_node('data_node_4', host => 'localhost', database => 'cdrp_4'); <waiting ...>
|
||||
step s3_lock_count:
|
||||
SELECT waitpoint_locks('create_distributed_restore_point_lock') as cdrp_locks,
|
||||
remote_txn_locks() as remote_txn_locks;
|
||||
|
||||
cdrp_locks|remote_txn_locks
|
||||
----------+----------------
|
||||
2| 1
|
||||
(1 row)
|
||||
|
||||
step s3_lock_count_fs: SELECT foreign_server_locks() as foreign_server_locks;
|
||||
foreign_server_locks
|
||||
--------------------
|
||||
2
|
||||
(1 row)
|
||||
|
||||
step s3_lock_release: SELECT debug_waitpoint_release('create_distributed_restore_point_lock');
|
||||
debug_waitpoint_release
|
||||
-----------------------
|
||||
|
||||
(1 row)
|
||||
|
||||
step s1_create_dist_rp: <... completed>
|
||||
valid_lsn
|
||||
---------
|
||||
t
|
||||
t
|
||||
t
|
||||
t
|
||||
(4 rows)
|
||||
|
||||
step s2_add_dn: <... completed>
|
||||
node_name
|
||||
-----------
|
||||
data_node_4
|
||||
(1 row)
|
||||
|
||||
step s3_lock_count_fs: SELECT foreign_server_locks() as foreign_server_locks;
|
||||
foreign_server_locks
|
||||
--------------------
|
||||
0
|
||||
(1 row)
|
||||
|
||||
|
||||
starting permutation: s3_lock_enable s1_create_dist_rp s2_del_dn s3_lock_count s3_lock_release
|
||||
node_name
|
||||
-----------
|
||||
data_node_1
|
||||
(1 row)
|
||||
|
||||
node_name
|
||||
-----------
|
||||
data_node_2
|
||||
(1 row)
|
||||
|
||||
node_name
|
||||
-----------
|
||||
data_node_3
|
||||
(1 row)
|
||||
|
||||
created
|
||||
-------
|
||||
t
|
||||
(1 row)
|
||||
|
||||
step s3_lock_enable: SELECT debug_waitpoint_enable('create_distributed_restore_point_lock');
|
||||
debug_waitpoint_enable
|
||||
----------------------
|
||||
|
||||
(1 row)
|
||||
|
||||
step s1_create_dist_rp: SELECT restore_point > pg_lsn('0/0') as valid_lsn FROM create_distributed_restore_point('s1_test'); <waiting ...>
|
||||
step s2_del_dn: SELECT * FROM delete_data_node('data_node_4'); <waiting ...>
|
||||
step s3_lock_count:
|
||||
SELECT waitpoint_locks('create_distributed_restore_point_lock') as cdrp_locks,
|
||||
remote_txn_locks() as remote_txn_locks;
|
||||
|
||||
cdrp_locks|remote_txn_locks
|
||||
----------+----------------
|
||||
2| 2
|
||||
(1 row)
|
||||
|
||||
step s3_lock_release: SELECT debug_waitpoint_release('create_distributed_restore_point_lock');
|
||||
debug_waitpoint_release
|
||||
-----------------------
|
||||
|
||||
(1 row)
|
||||
|
||||
step s1_create_dist_rp: <... completed>
|
||||
valid_lsn
|
||||
---------
|
||||
t
|
||||
t
|
||||
t
|
||||
t
|
||||
t
|
||||
(5 rows)
|
||||
|
||||
step s2_del_dn: <... completed>
|
||||
delete_data_node
|
||||
----------------
|
||||
t
|
||||
(1 row)
|
||||
|
||||
|
@ -6,6 +6,7 @@ set(TEST_TEMPLATES_MODULE_DEBUG
|
||||
reorder_vs_insert.spec.in
|
||||
reorder_vs_select.spec.in
|
||||
remote_create_chunk.spec.in
|
||||
dist_ha_chunk_drop.spec.in
|
||||
dist_restore_point.spec.in
|
||||
dist_cmd_exec.spec.in
|
||||
cagg_drop_chunks_iso.spec.in
|
||||
|
66
tsl/test/isolation/specs/dist_ha_chunk_drop.spec.in
Normal file
66
tsl/test/isolation/specs/dist_ha_chunk_drop.spec.in
Normal file
@ -0,0 +1,66 @@
|
||||
# This file and its contents are licensed under the Timescale License.
|
||||
# Please see the included NOTICE for copyright information and
|
||||
# LICENSE-TIMESCALE for a copy of the license.
|
||||
|
||||
#
|
||||
# Test concurrent insert into dist hypertable after a data node marked
|
||||
# as unavailable would produce 'tuple concurrently deleted` error.
|
||||
#
|
||||
# The problem occurs because of missing tuple level locking during scan and concurrent
|
||||
# delete from chunk_data_node table afterwards, which should be treated as
|
||||
# `SELECT … FOR UPDATE`.
|
||||
#
|
||||
setup
|
||||
{
|
||||
CREATE OR REPLACE FUNCTION debug_waitpoint_enable(TEXT) RETURNS VOID LANGUAGE C VOLATILE STRICT
|
||||
AS '@TS_MODULE_PATHNAME@', 'ts_debug_point_enable';
|
||||
|
||||
CREATE OR REPLACE FUNCTION debug_waitpoint_release(TEXT) RETURNS VOID LANGUAGE C VOLATILE STRICT
|
||||
AS '@TS_MODULE_PATHNAME@', 'ts_debug_point_release';
|
||||
|
||||
CREATE OR REPLACE FUNCTION debug_waitpoint_id(TEXT) RETURNS BIGINT LANGUAGE C VOLATILE STRICT
|
||||
AS '@TS_MODULE_PATHNAME@', 'ts_debug_point_id';
|
||||
|
||||
CREATE TABLE metric1(ts TIMESTAMPTZ NOT NULL, val FLOAT8 NOT NULL, dev_id INT4 NOT NULL);
|
||||
}
|
||||
|
||||
setup { SELECT node_name FROM add_data_node('data_node_1', host => 'localhost', database => 'cdha_1', if_not_exists => true); }
|
||||
setup { SELECT node_name FROM add_data_node('data_node_2', host => 'localhost', database => 'cdha_2', if_not_exists => true); }
|
||||
setup { SELECT node_name FROM add_data_node('data_node_3', host => 'localhost', database => 'cdha_3', if_not_exists => true); }
|
||||
setup { SELECT node_name FROM add_data_node('data_node_4', host => 'localhost', database => 'cdha_4', if_not_exists => true); }
|
||||
setup { SELECT created FROM create_distributed_hypertable('metric1', 'ts', 'dev_id', chunk_time_interval => INTERVAL '1 hour', replication_factor => 4); }
|
||||
|
||||
teardown
|
||||
{
|
||||
DROP TABLE metric1;
|
||||
}
|
||||
|
||||
# bootstrap cluster with data
|
||||
session "s1"
|
||||
setup
|
||||
{
|
||||
SET application_name = 's1';
|
||||
}
|
||||
step "s1_init" { INSERT INTO metric1(ts, val, dev_id) SELECT s.*, 3.14, d.* FROM generate_series('2021-08-17 00:00:00'::timestamp, '2021-08-17 00:00:59'::timestamp, '1 s'::interval) s CROSS JOIN generate_series(1, 500) d; }
|
||||
step "s1_set_unavailable" { SELECT alter_data_node('data_node_4', available=>false); }
|
||||
step "s1_set_available" { SELECT alter_data_node('data_node_4', available=>true); }
|
||||
step "s1_insert" { INSERT INTO metric1(ts, val, dev_id) SELECT s.*, 3.14, d.* FROM generate_series('2021-08-17 00:01:00'::timestamp, '2021-08-17 00:01:59'::timestamp, '1 s'::interval) s CROSS JOIN generate_series(1, 249) d; }
|
||||
|
||||
# concurrent session
|
||||
session "s2"
|
||||
setup
|
||||
{
|
||||
SET application_name = 's2';
|
||||
}
|
||||
step "s2_insert" { INSERT INTO metric1(ts, val, dev_id) SELECT s.*, 3.14, d.* FROM generate_series('2021-08-17 00:01:00'::timestamp, '2021-08-17 00:01:59'::timestamp, '1 s'::interval) s CROSS JOIN generate_series(250, 499) d; }
|
||||
|
||||
# locking session
|
||||
session "s3"
|
||||
setup
|
||||
{
|
||||
SET application_name = 's3';
|
||||
}
|
||||
step "s3_lock_enable" { SELECT debug_waitpoint_enable('chunk_data_node_delete'); }
|
||||
step "s3_lock_release" { SELECT debug_waitpoint_release('chunk_data_node_delete'); }
|
||||
|
||||
permutation "s1_init" "s1_set_unavailable" "s3_lock_enable" "s1_insert" "s2_insert" "s3_lock_release" "s1_set_available"
|
@ -32,10 +32,11 @@ setup
|
||||
|
||||
CREATE TABLE IF NOT EXISTS disttable(time timestamptz NOT NULL, device int, temp float);
|
||||
}
|
||||
setup { SELECT true AS delete_data_node FROM delete_data_node('data_node_4', if_exists => true); }
|
||||
setup { SELECT node_name FROM add_data_node('data_node_1', host => 'localhost', database => 'cdrp_1', if_not_exists => true); }
|
||||
setup { SELECT node_name FROM add_data_node('data_node_2', host => 'localhost', database => 'cdrp_2', if_not_exists => true); }
|
||||
setup { SELECT node_name FROM add_data_node('data_node_3', host => 'localhost', database => 'cdrp_3', if_not_exists => true); }
|
||||
setup { SELECT created FROM create_distributed_hypertable('disttable', 'time', 'device'); }
|
||||
setup { SELECT created FROM create_distributed_hypertable('disttable', 'time', 'device', data_nodes => ARRAY['data_node_1', 'data_node_2', 'data_node_3']); }
|
||||
|
||||
teardown
|
||||
{
|
||||
@ -63,8 +64,6 @@ step "s2_create_dist_rp" { SELECT restore_point > pg_lsn('0/0') as valid_lsn FRO
|
||||
step "s2_insert" { INSERT INTO disttable VALUES ('2019-08-02 10:45', 0, 0.0); }
|
||||
step "s2_begin" { BEGIN; }
|
||||
step "s2_commit" { COMMIT; }
|
||||
step "s2_add_dn" { SELECT node_name FROM add_data_node('data_node_4', host => 'localhost', database => 'cdrp_4'); }
|
||||
step "s2_del_dn" { SELECT * FROM delete_data_node('data_node_4'); }
|
||||
step "s2_create_dist_ht" {
|
||||
CREATE TABLE disttable2(time timestamptz NOT NULL, device int, temp float);
|
||||
SELECT created FROM create_distributed_hypertable('disttable2', 'time', 'device');
|
||||
@ -85,7 +84,6 @@ step "s3_lock_count" {
|
||||
SELECT waitpoint_locks('create_distributed_restore_point_lock') as cdrp_locks,
|
||||
remote_txn_locks() as remote_txn_locks;
|
||||
}
|
||||
step "s3_lock_count_fs" { SELECT foreign_server_locks() as foreign_server_locks; }
|
||||
|
||||
# case 1: new transaction DML/commit during the create_distributed_restore_point()
|
||||
permutation "s3_lock_enable" "s1_create_dist_rp" "s2_insert" "s3_lock_count" "s3_lock_release"
|
||||
@ -104,9 +102,3 @@ permutation "s3_lock_enable" "s1_create_dist_rp" "s2_create_dist_ht" "s3_lock_co
|
||||
|
||||
# case 6: concurrent DDL/commit during the create_distributed_restore_point()
|
||||
permutation "s3_lock_enable" "s1_create_dist_rp" "s2_drop_dist_ht" "s3_lock_count" "s3_lock_release"
|
||||
|
||||
# case 7: concurrent add_data_node() during the create_distributed_restore_point()
|
||||
permutation "s3_lock_enable" "s1_create_dist_rp" "s2_add_dn" "s3_lock_count" "s3_lock_count_fs" "s3_lock_release" "s3_lock_count_fs"
|
||||
|
||||
# case 8: concurrent delete_data_node() during the create_distributed_restore_point()
|
||||
permutation "s3_lock_enable" "s1_create_dist_rp" "s2_del_dn" "s3_lock_count" "s3_lock_release"
|
||||
|
Loading…
x
Reference in New Issue
Block a user