Fix move chunk cleanup logic

Add a new stage "complete" in the "chunk_copy_operation" to
indicate successful move/copy chunk operations. Make the
"cleanup_copy_chunk_operation" procedure more robust and make it only
delete chunk operation entries from the catalog without doing any other
unwanted cleanup if called on successful operations.
This commit is contained in:
Dmitry Simonenko 2022-05-19 15:52:12 +03:00 committed by Dmitry Simonenko
parent 8375b9aa53
commit 54d6b41e65
2 changed files with 36 additions and 2 deletions

View File

@ -61,6 +61,7 @@
#define CCS_ATTACH_CHUNK "attach_chunk"
#define CCS_ATTACH_COMPRESSED_CHUNK "attach_compressed_chunk"
#define CCS_DELETE_CHUNK "delete_chunk"
#define CCS_COMPLETE "complete"
typedef struct ChunkCopyStage ChunkCopyStage;
typedef struct ChunkCopy ChunkCopy;
@ -1025,6 +1026,9 @@ static const ChunkCopyStage chunk_copy_stages[] = {
*/
{ CCS_DELETE_CHUNK, chunk_copy_stage_delete_chunk, NULL },
/* Operation complete */
{ CCS_COMPLETE, NULL, NULL },
/* Done Marker */
{ NULL, NULL, NULL }
};
@ -1044,7 +1048,9 @@ chunk_copy_execute(ChunkCopy *cc)
SPI_start_transaction();
cc->stage = stage;
cc->stage->function(cc);
if (cc->stage->function)
cc->stage->function(cc);
/* Mark current stage as completed and update the catalog */
chunk_copy_operation_update(cc);
@ -1231,6 +1237,13 @@ chunk_copy_cleanup(const char *operation_id)
(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
errmsg("invalid chunk copy operation identifier. Entry not found")));
/* If it's a completed operation, return immediately after deleting catalog entry */
if (namestrcmp(&cc->fd.completed_stage, CCS_COMPLETE) == 0)
{
chunk_copy_operation_delete_by_id(NameStr(cc->fd.operation_id));
return;
}
/* Identify the last completed stage for this activity. */
stage_idx = 0;
for (stage = &chunk_copy_stages[stage_idx]; stage->name != NULL;

View File

@ -8,7 +8,7 @@ use warnings;
use AccessNode;
use DataNode;
use TestLib;
use Test::More tests => 274;
use Test::More tests => 283;
#Initialize all the multi-node instances
my $an = AccessNode->create('an');
@ -83,6 +83,11 @@ while ($curr_index < $arrSize)
$curr_index++;
}
#_timescaledb_catalog.chunk_copy_operation catalog should be empty due to the cleanup above
$an->psql_is(
'postgres', "SELECT * from _timescaledb_catalog.chunk_copy_operation",
"", "AN catalog is empty as expected");
for my $node ($an, $dn1, $dn2)
{
$node->safe_psql('postgres', "CREATE ROLE testrole LOGIN");
@ -124,6 +129,22 @@ $an->safe_psql('postgres',
"SET ROLE testrole; CALL timescaledb_experimental.move_chunk(chunk=>'_timescaledb_internal._dist_hyper_1_1_chunk', source_node=> 'dn1', destination_node => 'dn2')"
);
#An entry for the above move should exist with "complete" stage in the catalog now
$an->psql_is(
'postgres',
"SELECT operation_id, completed_stage, source_node_name, dest_node_name, delete_on_source_node from _timescaledb_catalog.chunk_copy_operation",
"ts_copy_1_1|complete|dn1|dn2|t",
"AN catalog is as expected");
#Run cleanup on this operstion. It should just delete the catalog entry since the
#activity has completed successfully. Rest of the checks below should succeed
$an->safe_psql('postgres',
"CALL timescaledb_experimental.cleanup_copy_chunk_operation(operation_id=>'ts_copy_1_1');"
);
$an->psql_is(
'postgres', "SELECT * from _timescaledb_catalog.chunk_copy_operation",
"", "AN catalog is empty as expected");
#Query datanode1 after the above move
$dn1->psql_is(
'postgres',