mirror of
https://github.com/apple/foundationdb.git
synced 2025-05-15 02:18:39 +08:00
add tombstones on rename and fix several concurrency bugs with renames in progress
This commit is contained in:
parent
8f011c9f73
commit
a958ddb58a
@ -1025,8 +1025,9 @@ ACTOR template <class Transaction>
|
||||
Future<Void> managementClusterRemoveTenantFromGroup(Transaction tr,
|
||||
TenantName tenantName,
|
||||
TenantMapEntry tenantEntry,
|
||||
DataClusterMetadata* clusterMetadata) {
|
||||
state bool updateClusterCapacity = !tenantEntry.tenantGroup.present();
|
||||
DataClusterMetadata* clusterMetadata,
|
||||
bool isRenamePair = false) {
|
||||
state bool updateClusterCapacity = !tenantEntry.tenantGroup.present() && !isRenamePair;
|
||||
if (tenantEntry.tenantGroup.present()) {
|
||||
ManagementClusterMetadata::tenantMetadata().tenantGroupTenantIndex.erase(
|
||||
tr, Tuple::makeTuple(tenantEntry.tenantGroup.get(), tenantName));
|
||||
@ -1356,6 +1357,12 @@ struct DeleteTenantImpl {
|
||||
throw tenant_not_found();
|
||||
}
|
||||
|
||||
if (tenantEntry.get().tenantState == TenantState::REMOVING) {
|
||||
if (tenantEntry.get().renamePair.present()) {
|
||||
self->pairName = tenantEntry.get().renamePair.get();
|
||||
}
|
||||
}
|
||||
|
||||
self->tenantId = tenantEntry.get().id;
|
||||
wait(self->ctx.setCluster(tr, tenantEntry.get().assignedCluster.get()));
|
||||
return tenantEntry.get().tenantState == TenantState::REMOVING;
|
||||
@ -1455,7 +1462,7 @@ struct DeleteTenantImpl {
|
||||
|
||||
// Remove the tenant from its tenant group
|
||||
wait(managementClusterRemoveTenantFromGroup(
|
||||
tr, tenantName, tenantEntry.get(), &self->ctx.dataClusterMetadata.get()));
|
||||
tr, tenantName, tenantEntry.get(), &self->ctx.dataClusterMetadata.get(), pairDelete));
|
||||
|
||||
wait(pairFuture);
|
||||
return Void();
|
||||
@ -1734,7 +1741,7 @@ struct RenameTenantImpl {
|
||||
|
||||
// Remove the tenant from its tenant group
|
||||
wait(managementClusterRemoveTenantFromGroup(
|
||||
tr, self->oldName, tenantEntry, &self->ctx.dataClusterMetadata.get()));
|
||||
tr, self->oldName, tenantEntry, &self->ctx.dataClusterMetadata.get(), true));
|
||||
|
||||
return Void();
|
||||
}
|
||||
@ -1758,15 +1765,6 @@ struct RenameTenantImpl {
|
||||
throw tenant_removed();
|
||||
}
|
||||
|
||||
// Check cluster capacity. If we would exceed the amount due to temporary extra tenants
|
||||
// then we deny the rename request altogether.
|
||||
int64_t clusterTenantCount = wait(ManagementClusterMetadata::clusterTenantCount.getD(
|
||||
tr, oldTenantEntry.assignedCluster.get(), Snapshot::False, 0));
|
||||
|
||||
if (clusterTenantCount + 1 > CLIENT_KNOBS->MAX_TENANTS_PER_CLUSTER) {
|
||||
throw cluster_no_capacity();
|
||||
}
|
||||
|
||||
// If the new entry is present, we can only continue if this is a retry of the same rename
|
||||
// To check this, verify both entries are in the correct state
|
||||
// and have each other as pairs
|
||||
@ -1797,6 +1795,15 @@ struct RenameTenantImpl {
|
||||
}
|
||||
}
|
||||
|
||||
// Check cluster capacity. If we would exceed the amount due to temporary extra tenants
|
||||
// then we deny the rename request altogether.
|
||||
int64_t clusterTenantCount = wait(ManagementClusterMetadata::clusterTenantCount.getD(
|
||||
tr, oldTenantEntry.assignedCluster.get(), Snapshot::False, 0));
|
||||
|
||||
if (clusterTenantCount + 1 > CLIENT_KNOBS->MAX_TENANTS_PER_CLUSTER) {
|
||||
throw cluster_no_capacity();
|
||||
}
|
||||
|
||||
TenantMapEntry updatedOldEntry = oldTenantEntry;
|
||||
TenantMapEntry updatedNewEntry(updatedOldEntry);
|
||||
ASSERT(updatedOldEntry.configurationSequenceNum == self->configurationSequenceNum);
|
||||
|
@ -271,6 +271,50 @@ Future<Optional<TenantMapEntry>> createTenant(Reference<DB> db,
|
||||
}
|
||||
}
|
||||
|
||||
ACTOR template <class Transaction>
|
||||
Future<Void> markTenantTombstones(Transaction tr, int64_t tenantId) {
|
||||
// In data clusters, we store a tombstone
|
||||
state Future<KeyBackedRangeResult<int64_t>> latestTombstoneFuture =
|
||||
TenantMetadata::tenantTombstones().getRange(tr, {}, {}, 1, Snapshot::False, Reverse::True);
|
||||
state Optional<TenantTombstoneCleanupData> cleanupData = wait(TenantMetadata::tombstoneCleanupData().get(tr));
|
||||
state Version transactionReadVersion = wait(safeThreadFutureToFuture(tr->getReadVersion()));
|
||||
|
||||
// If it has been long enough since we last cleaned up the tenant tombstones, we do that first
|
||||
if (!cleanupData.present() || cleanupData.get().nextTombstoneEraseVersion <= transactionReadVersion) {
|
||||
state int64_t deleteThroughId = cleanupData.present() ? cleanupData.get().nextTombstoneEraseId : -1;
|
||||
// Delete all tombstones up through the one currently marked in the cleanup data
|
||||
if (deleteThroughId >= 0) {
|
||||
TenantMetadata::tenantTombstones().erase(tr, 0, deleteThroughId + 1);
|
||||
}
|
||||
|
||||
KeyBackedRangeResult<int64_t> latestTombstone = wait(latestTombstoneFuture);
|
||||
int64_t nextDeleteThroughId = std::max(deleteThroughId, tenantId);
|
||||
if (!latestTombstone.results.empty()) {
|
||||
nextDeleteThroughId = std::max(nextDeleteThroughId, latestTombstone.results[0]);
|
||||
}
|
||||
|
||||
// The next cleanup will happen at or after TENANT_TOMBSTONE_CLEANUP_INTERVAL seconds have elapsed and
|
||||
// will clean up tombstones through the most recently allocated ID.
|
||||
TenantTombstoneCleanupData updatedCleanupData;
|
||||
updatedCleanupData.tombstonesErasedThrough = deleteThroughId;
|
||||
updatedCleanupData.nextTombstoneEraseId = nextDeleteThroughId;
|
||||
updatedCleanupData.nextTombstoneEraseVersion =
|
||||
transactionReadVersion +
|
||||
CLIENT_KNOBS->TENANT_TOMBSTONE_CLEANUP_INTERVAL * CLIENT_KNOBS->VERSIONS_PER_SECOND;
|
||||
|
||||
TenantMetadata::tombstoneCleanupData().set(tr, updatedCleanupData);
|
||||
|
||||
// If the tenant being deleted is within the tombstone window, record the tombstone
|
||||
if (tenantId > updatedCleanupData.tombstonesErasedThrough) {
|
||||
TenantMetadata::tenantTombstones().insert(tr, tenantId);
|
||||
}
|
||||
} else if (tenantId > cleanupData.get().tombstonesErasedThrough) {
|
||||
// If the tenant being deleted is within the tombstone window, record the tombstone
|
||||
TenantMetadata::tenantTombstones().insert(tr, tenantId);
|
||||
}
|
||||
return Void();
|
||||
}
|
||||
|
||||
// Deletes the tenant with the given name. If tenantId is specified, the tenant being deleted must also have the same
|
||||
// ID. If no matching tenant is found, this function returns without deleting anything. This behavior allows the
|
||||
// function to be used idempotently: if the transaction is retried after having succeeded, it will see that the tenant
|
||||
@ -320,45 +364,7 @@ Future<Void> deleteTenantTransaction(Transaction tr,
|
||||
}
|
||||
|
||||
if (clusterType == ClusterType::METACLUSTER_DATA) {
|
||||
// In data clusters, we store a tombstone
|
||||
state Future<KeyBackedRangeResult<int64_t>> latestTombstoneFuture =
|
||||
TenantMetadata::tenantTombstones().getRange(tr, {}, {}, 1, Snapshot::False, Reverse::True);
|
||||
state Optional<TenantTombstoneCleanupData> cleanupData = wait(TenantMetadata::tombstoneCleanupData().get(tr));
|
||||
state Version transactionReadVersion = wait(safeThreadFutureToFuture(tr->getReadVersion()));
|
||||
|
||||
// If it has been long enough since we last cleaned up the tenant tombstones, we do that first
|
||||
if (!cleanupData.present() || cleanupData.get().nextTombstoneEraseVersion <= transactionReadVersion) {
|
||||
state int64_t deleteThroughId = cleanupData.present() ? cleanupData.get().nextTombstoneEraseId : -1;
|
||||
// Delete all tombstones up through the one currently marked in the cleanup data
|
||||
if (deleteThroughId >= 0) {
|
||||
TenantMetadata::tenantTombstones().erase(tr, 0, deleteThroughId + 1);
|
||||
}
|
||||
|
||||
KeyBackedRangeResult<int64_t> latestTombstone = wait(latestTombstoneFuture);
|
||||
int64_t nextDeleteThroughId = std::max(deleteThroughId, tenantId.get());
|
||||
if (!latestTombstone.results.empty()) {
|
||||
nextDeleteThroughId = std::max(nextDeleteThroughId, latestTombstone.results[0]);
|
||||
}
|
||||
|
||||
// The next cleanup will happen at or after TENANT_TOMBSTONE_CLEANUP_INTERVAL seconds have elapsed and
|
||||
// will clean up tombstones through the most recently allocated ID.
|
||||
TenantTombstoneCleanupData updatedCleanupData;
|
||||
updatedCleanupData.tombstonesErasedThrough = deleteThroughId;
|
||||
updatedCleanupData.nextTombstoneEraseId = nextDeleteThroughId;
|
||||
updatedCleanupData.nextTombstoneEraseVersion =
|
||||
transactionReadVersion +
|
||||
CLIENT_KNOBS->TENANT_TOMBSTONE_CLEANUP_INTERVAL * CLIENT_KNOBS->VERSIONS_PER_SECOND;
|
||||
|
||||
TenantMetadata::tombstoneCleanupData().set(tr, updatedCleanupData);
|
||||
|
||||
// If the tenant being deleted is within the tombstone window, record the tombstone
|
||||
if (tenantId.get() > updatedCleanupData.tombstonesErasedThrough) {
|
||||
TenantMetadata::tenantTombstones().insert(tr, tenantId.get());
|
||||
}
|
||||
} else if (tenantId.get() > cleanupData.get().tombstonesErasedThrough) {
|
||||
// If the tenant being deleted is within the tombstone window, record the tombstone
|
||||
TenantMetadata::tenantTombstones().insert(tr, tenantId.get());
|
||||
}
|
||||
wait(markTenantTombstones(tr, tenantId.get()));
|
||||
}
|
||||
|
||||
return Void();
|
||||
@ -525,6 +531,10 @@ Future<Void> renameTenantTransaction(Transaction tr,
|
||||
Tuple::makeTuple(oldEntry.get().tenantGroup.get(), newName));
|
||||
}
|
||||
|
||||
if (clusterType == ClusterType::METACLUSTER_DATA) {
|
||||
wait(markTenantTombstones(tr, tenantId.get()));
|
||||
}
|
||||
|
||||
return Void();
|
||||
}
|
||||
|
||||
|
@ -154,7 +154,6 @@ private:
|
||||
ASSERT(tenantMapEntry.id <= metadata.lastTenantId);
|
||||
}
|
||||
ASSERT(metadata.tenantIdIndex[tenantMapEntry.id] == tenantName);
|
||||
ASSERT(!metadata.tenantTombstones.count(tenantMapEntry.id));
|
||||
|
||||
if (tenantMapEntry.tenantGroup.present()) {
|
||||
auto tenantGroupMapItr = metadata.tenantGroupMap.find(tenantMapEntry.tenantGroup.get());
|
||||
|
Loading…
x
Reference in New Issue
Block a user