add tombstones on rename and fix several concurrency bugs with renames in progress

This commit is contained in:
Jon Fu 2022-08-23 15:50:58 -07:00
parent 8f011c9f73
commit a958ddb58a
3 changed files with 69 additions and 53 deletions

View File

@ -1025,8 +1025,9 @@ ACTOR template <class Transaction>
Future<Void> managementClusterRemoveTenantFromGroup(Transaction tr,
TenantName tenantName,
TenantMapEntry tenantEntry,
DataClusterMetadata* clusterMetadata) {
state bool updateClusterCapacity = !tenantEntry.tenantGroup.present();
DataClusterMetadata* clusterMetadata,
bool isRenamePair = false) {
state bool updateClusterCapacity = !tenantEntry.tenantGroup.present() && !isRenamePair;
if (tenantEntry.tenantGroup.present()) {
ManagementClusterMetadata::tenantMetadata().tenantGroupTenantIndex.erase(
tr, Tuple::makeTuple(tenantEntry.tenantGroup.get(), tenantName));
@ -1356,6 +1357,12 @@ struct DeleteTenantImpl {
throw tenant_not_found();
}
if (tenantEntry.get().tenantState == TenantState::REMOVING) {
if (tenantEntry.get().renamePair.present()) {
self->pairName = tenantEntry.get().renamePair.get();
}
}
self->tenantId = tenantEntry.get().id;
wait(self->ctx.setCluster(tr, tenantEntry.get().assignedCluster.get()));
return tenantEntry.get().tenantState == TenantState::REMOVING;
@ -1455,7 +1462,7 @@ struct DeleteTenantImpl {
// Remove the tenant from its tenant group
wait(managementClusterRemoveTenantFromGroup(
tr, tenantName, tenantEntry.get(), &self->ctx.dataClusterMetadata.get()));
tr, tenantName, tenantEntry.get(), &self->ctx.dataClusterMetadata.get(), pairDelete));
wait(pairFuture);
return Void();
@ -1734,7 +1741,7 @@ struct RenameTenantImpl {
// Remove the tenant from its tenant group
wait(managementClusterRemoveTenantFromGroup(
tr, self->oldName, tenantEntry, &self->ctx.dataClusterMetadata.get()));
tr, self->oldName, tenantEntry, &self->ctx.dataClusterMetadata.get(), true));
return Void();
}
@ -1758,15 +1765,6 @@ struct RenameTenantImpl {
throw tenant_removed();
}
// Check cluster capacity. If we would exceed the amount due to temporary extra tenants
// then we deny the rename request altogether.
int64_t clusterTenantCount = wait(ManagementClusterMetadata::clusterTenantCount.getD(
tr, oldTenantEntry.assignedCluster.get(), Snapshot::False, 0));
if (clusterTenantCount + 1 > CLIENT_KNOBS->MAX_TENANTS_PER_CLUSTER) {
throw cluster_no_capacity();
}
// If the new entry is present, we can only continue if this is a retry of the same rename
// To check this, verify both entries are in the correct state
// and have each other as pairs
@ -1797,6 +1795,15 @@ struct RenameTenantImpl {
}
}
// Check cluster capacity. If we would exceed the amount due to temporary extra tenants
// then we deny the rename request altogether.
int64_t clusterTenantCount = wait(ManagementClusterMetadata::clusterTenantCount.getD(
tr, oldTenantEntry.assignedCluster.get(), Snapshot::False, 0));
if (clusterTenantCount + 1 > CLIENT_KNOBS->MAX_TENANTS_PER_CLUSTER) {
throw cluster_no_capacity();
}
TenantMapEntry updatedOldEntry = oldTenantEntry;
TenantMapEntry updatedNewEntry(updatedOldEntry);
ASSERT(updatedOldEntry.configurationSequenceNum == self->configurationSequenceNum);

View File

@ -271,6 +271,50 @@ Future<Optional<TenantMapEntry>> createTenant(Reference<DB> db,
}
}
ACTOR template <class Transaction>
Future<Void> markTenantTombstones(Transaction tr, int64_t tenantId) {
// In data clusters, we store a tombstone
state Future<KeyBackedRangeResult<int64_t>> latestTombstoneFuture =
TenantMetadata::tenantTombstones().getRange(tr, {}, {}, 1, Snapshot::False, Reverse::True);
state Optional<TenantTombstoneCleanupData> cleanupData = wait(TenantMetadata::tombstoneCleanupData().get(tr));
state Version transactionReadVersion = wait(safeThreadFutureToFuture(tr->getReadVersion()));
// If it has been long enough since we last cleaned up the tenant tombstones, we do that first
if (!cleanupData.present() || cleanupData.get().nextTombstoneEraseVersion <= transactionReadVersion) {
state int64_t deleteThroughId = cleanupData.present() ? cleanupData.get().nextTombstoneEraseId : -1;
// Delete all tombstones up through the one currently marked in the cleanup data
if (deleteThroughId >= 0) {
TenantMetadata::tenantTombstones().erase(tr, 0, deleteThroughId + 1);
}
KeyBackedRangeResult<int64_t> latestTombstone = wait(latestTombstoneFuture);
int64_t nextDeleteThroughId = std::max(deleteThroughId, tenantId);
if (!latestTombstone.results.empty()) {
nextDeleteThroughId = std::max(nextDeleteThroughId, latestTombstone.results[0]);
}
// The next cleanup will happen at or after TENANT_TOMBSTONE_CLEANUP_INTERVAL seconds have elapsed and
// will clean up tombstones through the most recently allocated ID.
TenantTombstoneCleanupData updatedCleanupData;
updatedCleanupData.tombstonesErasedThrough = deleteThroughId;
updatedCleanupData.nextTombstoneEraseId = nextDeleteThroughId;
updatedCleanupData.nextTombstoneEraseVersion =
transactionReadVersion +
CLIENT_KNOBS->TENANT_TOMBSTONE_CLEANUP_INTERVAL * CLIENT_KNOBS->VERSIONS_PER_SECOND;
TenantMetadata::tombstoneCleanupData().set(tr, updatedCleanupData);
// If the tenant being deleted is within the tombstone window, record the tombstone
if (tenantId > updatedCleanupData.tombstonesErasedThrough) {
TenantMetadata::tenantTombstones().insert(tr, tenantId);
}
} else if (tenantId > cleanupData.get().tombstonesErasedThrough) {
// If the tenant being deleted is within the tombstone window, record the tombstone
TenantMetadata::tenantTombstones().insert(tr, tenantId);
}
return Void();
}
// Deletes the tenant with the given name. If tenantId is specified, the tenant being deleted must also have the same
// ID. If no matching tenant is found, this function returns without deleting anything. This behavior allows the
// function to be used idempotently: if the transaction is retried after having succeeded, it will see that the tenant
@ -320,45 +364,7 @@ Future<Void> deleteTenantTransaction(Transaction tr,
}
if (clusterType == ClusterType::METACLUSTER_DATA) {
// In data clusters, we store a tombstone
state Future<KeyBackedRangeResult<int64_t>> latestTombstoneFuture =
TenantMetadata::tenantTombstones().getRange(tr, {}, {}, 1, Snapshot::False, Reverse::True);
state Optional<TenantTombstoneCleanupData> cleanupData = wait(TenantMetadata::tombstoneCleanupData().get(tr));
state Version transactionReadVersion = wait(safeThreadFutureToFuture(tr->getReadVersion()));
// If it has been long enough since we last cleaned up the tenant tombstones, we do that first
if (!cleanupData.present() || cleanupData.get().nextTombstoneEraseVersion <= transactionReadVersion) {
state int64_t deleteThroughId = cleanupData.present() ? cleanupData.get().nextTombstoneEraseId : -1;
// Delete all tombstones up through the one currently marked in the cleanup data
if (deleteThroughId >= 0) {
TenantMetadata::tenantTombstones().erase(tr, 0, deleteThroughId + 1);
}
KeyBackedRangeResult<int64_t> latestTombstone = wait(latestTombstoneFuture);
int64_t nextDeleteThroughId = std::max(deleteThroughId, tenantId.get());
if (!latestTombstone.results.empty()) {
nextDeleteThroughId = std::max(nextDeleteThroughId, latestTombstone.results[0]);
}
// The next cleanup will happen at or after TENANT_TOMBSTONE_CLEANUP_INTERVAL seconds have elapsed and
// will clean up tombstones through the most recently allocated ID.
TenantTombstoneCleanupData updatedCleanupData;
updatedCleanupData.tombstonesErasedThrough = deleteThroughId;
updatedCleanupData.nextTombstoneEraseId = nextDeleteThroughId;
updatedCleanupData.nextTombstoneEraseVersion =
transactionReadVersion +
CLIENT_KNOBS->TENANT_TOMBSTONE_CLEANUP_INTERVAL * CLIENT_KNOBS->VERSIONS_PER_SECOND;
TenantMetadata::tombstoneCleanupData().set(tr, updatedCleanupData);
// If the tenant being deleted is within the tombstone window, record the tombstone
if (tenantId.get() > updatedCleanupData.tombstonesErasedThrough) {
TenantMetadata::tenantTombstones().insert(tr, tenantId.get());
}
} else if (tenantId.get() > cleanupData.get().tombstonesErasedThrough) {
// If the tenant being deleted is within the tombstone window, record the tombstone
TenantMetadata::tenantTombstones().insert(tr, tenantId.get());
}
wait(markTenantTombstones(tr, tenantId.get()));
}
return Void();
@ -525,6 +531,10 @@ Future<Void> renameTenantTransaction(Transaction tr,
Tuple::makeTuple(oldEntry.get().tenantGroup.get(), newName));
}
if (clusterType == ClusterType::METACLUSTER_DATA) {
wait(markTenantTombstones(tr, tenantId.get()));
}
return Void();
}

View File

@ -154,7 +154,6 @@ private:
ASSERT(tenantMapEntry.id <= metadata.lastTenantId);
}
ASSERT(metadata.tenantIdIndex[tenantMapEntry.id] == tenantName);
ASSERT(!metadata.tenantTombstones.count(tenantMapEntry.id));
if (tenantMapEntry.tenantGroup.present()) {
auto tenantGroupMapItr = metadata.tenantGroupMap.find(tenantMapEntry.tenantGroup.get());