1
0
mirror of https://github.com/apple/foundationdb.git synced 2025-05-18 03:42:10 +08:00

add tombstones on rename and fix several concurrency bugs with renames in progress

This commit is contained in:
Jon Fu 2022-08-23 15:50:58 -07:00
parent 8f011c9f73
commit a958ddb58a
3 changed files with 69 additions and 53 deletions
fdbclient/include/fdbclient
fdbserver/include/fdbserver/workloads

@ -1025,8 +1025,9 @@ ACTOR template <class Transaction>
Future<Void> managementClusterRemoveTenantFromGroup(Transaction tr, Future<Void> managementClusterRemoveTenantFromGroup(Transaction tr,
TenantName tenantName, TenantName tenantName,
TenantMapEntry tenantEntry, TenantMapEntry tenantEntry,
DataClusterMetadata* clusterMetadata) { DataClusterMetadata* clusterMetadata,
state bool updateClusterCapacity = !tenantEntry.tenantGroup.present(); bool isRenamePair = false) {
state bool updateClusterCapacity = !tenantEntry.tenantGroup.present() && !isRenamePair;
if (tenantEntry.tenantGroup.present()) { if (tenantEntry.tenantGroup.present()) {
ManagementClusterMetadata::tenantMetadata().tenantGroupTenantIndex.erase( ManagementClusterMetadata::tenantMetadata().tenantGroupTenantIndex.erase(
tr, Tuple::makeTuple(tenantEntry.tenantGroup.get(), tenantName)); tr, Tuple::makeTuple(tenantEntry.tenantGroup.get(), tenantName));
@ -1356,6 +1357,12 @@ struct DeleteTenantImpl {
throw tenant_not_found(); throw tenant_not_found();
} }
if (tenantEntry.get().tenantState == TenantState::REMOVING) {
if (tenantEntry.get().renamePair.present()) {
self->pairName = tenantEntry.get().renamePair.get();
}
}
self->tenantId = tenantEntry.get().id; self->tenantId = tenantEntry.get().id;
wait(self->ctx.setCluster(tr, tenantEntry.get().assignedCluster.get())); wait(self->ctx.setCluster(tr, tenantEntry.get().assignedCluster.get()));
return tenantEntry.get().tenantState == TenantState::REMOVING; return tenantEntry.get().tenantState == TenantState::REMOVING;
@ -1455,7 +1462,7 @@ struct DeleteTenantImpl {
// Remove the tenant from its tenant group // Remove the tenant from its tenant group
wait(managementClusterRemoveTenantFromGroup( wait(managementClusterRemoveTenantFromGroup(
tr, tenantName, tenantEntry.get(), &self->ctx.dataClusterMetadata.get())); tr, tenantName, tenantEntry.get(), &self->ctx.dataClusterMetadata.get(), pairDelete));
wait(pairFuture); wait(pairFuture);
return Void(); return Void();
@ -1734,7 +1741,7 @@ struct RenameTenantImpl {
// Remove the tenant from its tenant group // Remove the tenant from its tenant group
wait(managementClusterRemoveTenantFromGroup( wait(managementClusterRemoveTenantFromGroup(
tr, self->oldName, tenantEntry, &self->ctx.dataClusterMetadata.get())); tr, self->oldName, tenantEntry, &self->ctx.dataClusterMetadata.get(), true));
return Void(); return Void();
} }
@ -1758,15 +1765,6 @@ struct RenameTenantImpl {
throw tenant_removed(); throw tenant_removed();
} }
// Check cluster capacity. If we would exceed the amount due to temporary extra tenants
// then we deny the rename request altogether.
int64_t clusterTenantCount = wait(ManagementClusterMetadata::clusterTenantCount.getD(
tr, oldTenantEntry.assignedCluster.get(), Snapshot::False, 0));
if (clusterTenantCount + 1 > CLIENT_KNOBS->MAX_TENANTS_PER_CLUSTER) {
throw cluster_no_capacity();
}
// If the new entry is present, we can only continue if this is a retry of the same rename // If the new entry is present, we can only continue if this is a retry of the same rename
// To check this, verify both entries are in the correct state // To check this, verify both entries are in the correct state
// and have each other as pairs // and have each other as pairs
@ -1797,6 +1795,15 @@ struct RenameTenantImpl {
} }
} }
// Check cluster capacity. If we would exceed the amount due to temporary extra tenants
// then we deny the rename request altogether.
int64_t clusterTenantCount = wait(ManagementClusterMetadata::clusterTenantCount.getD(
tr, oldTenantEntry.assignedCluster.get(), Snapshot::False, 0));
if (clusterTenantCount + 1 > CLIENT_KNOBS->MAX_TENANTS_PER_CLUSTER) {
throw cluster_no_capacity();
}
TenantMapEntry updatedOldEntry = oldTenantEntry; TenantMapEntry updatedOldEntry = oldTenantEntry;
TenantMapEntry updatedNewEntry(updatedOldEntry); TenantMapEntry updatedNewEntry(updatedOldEntry);
ASSERT(updatedOldEntry.configurationSequenceNum == self->configurationSequenceNum); ASSERT(updatedOldEntry.configurationSequenceNum == self->configurationSequenceNum);

@ -271,6 +271,50 @@ Future<Optional<TenantMapEntry>> createTenant(Reference<DB> db,
} }
} }
ACTOR template <class Transaction>
Future<Void> markTenantTombstones(Transaction tr, int64_t tenantId) {
// In data clusters, we store a tombstone
state Future<KeyBackedRangeResult<int64_t>> latestTombstoneFuture =
TenantMetadata::tenantTombstones().getRange(tr, {}, {}, 1, Snapshot::False, Reverse::True);
state Optional<TenantTombstoneCleanupData> cleanupData = wait(TenantMetadata::tombstoneCleanupData().get(tr));
state Version transactionReadVersion = wait(safeThreadFutureToFuture(tr->getReadVersion()));
// If it has been long enough since we last cleaned up the tenant tombstones, we do that first
if (!cleanupData.present() || cleanupData.get().nextTombstoneEraseVersion <= transactionReadVersion) {
state int64_t deleteThroughId = cleanupData.present() ? cleanupData.get().nextTombstoneEraseId : -1;
// Delete all tombstones up through the one currently marked in the cleanup data
if (deleteThroughId >= 0) {
TenantMetadata::tenantTombstones().erase(tr, 0, deleteThroughId + 1);
}
KeyBackedRangeResult<int64_t> latestTombstone = wait(latestTombstoneFuture);
int64_t nextDeleteThroughId = std::max(deleteThroughId, tenantId);
if (!latestTombstone.results.empty()) {
nextDeleteThroughId = std::max(nextDeleteThroughId, latestTombstone.results[0]);
}
// The next cleanup will happen at or after TENANT_TOMBSTONE_CLEANUP_INTERVAL seconds have elapsed and
// will clean up tombstones through the most recently allocated ID.
TenantTombstoneCleanupData updatedCleanupData;
updatedCleanupData.tombstonesErasedThrough = deleteThroughId;
updatedCleanupData.nextTombstoneEraseId = nextDeleteThroughId;
updatedCleanupData.nextTombstoneEraseVersion =
transactionReadVersion +
CLIENT_KNOBS->TENANT_TOMBSTONE_CLEANUP_INTERVAL * CLIENT_KNOBS->VERSIONS_PER_SECOND;
TenantMetadata::tombstoneCleanupData().set(tr, updatedCleanupData);
// If the tenant being deleted is within the tombstone window, record the tombstone
if (tenantId > updatedCleanupData.tombstonesErasedThrough) {
TenantMetadata::tenantTombstones().insert(tr, tenantId);
}
} else if (tenantId > cleanupData.get().tombstonesErasedThrough) {
// If the tenant being deleted is within the tombstone window, record the tombstone
TenantMetadata::tenantTombstones().insert(tr, tenantId);
}
return Void();
}
// Deletes the tenant with the given name. If tenantId is specified, the tenant being deleted must also have the same // Deletes the tenant with the given name. If tenantId is specified, the tenant being deleted must also have the same
// ID. If no matching tenant is found, this function returns without deleting anything. This behavior allows the // ID. If no matching tenant is found, this function returns without deleting anything. This behavior allows the
// function to be used idempotently: if the transaction is retried after having succeeded, it will see that the tenant // function to be used idempotently: if the transaction is retried after having succeeded, it will see that the tenant
@ -320,45 +364,7 @@ Future<Void> deleteTenantTransaction(Transaction tr,
} }
if (clusterType == ClusterType::METACLUSTER_DATA) { if (clusterType == ClusterType::METACLUSTER_DATA) {
// In data clusters, we store a tombstone wait(markTenantTombstones(tr, tenantId.get()));
state Future<KeyBackedRangeResult<int64_t>> latestTombstoneFuture =
TenantMetadata::tenantTombstones().getRange(tr, {}, {}, 1, Snapshot::False, Reverse::True);
state Optional<TenantTombstoneCleanupData> cleanupData = wait(TenantMetadata::tombstoneCleanupData().get(tr));
state Version transactionReadVersion = wait(safeThreadFutureToFuture(tr->getReadVersion()));
// If it has been long enough since we last cleaned up the tenant tombstones, we do that first
if (!cleanupData.present() || cleanupData.get().nextTombstoneEraseVersion <= transactionReadVersion) {
state int64_t deleteThroughId = cleanupData.present() ? cleanupData.get().nextTombstoneEraseId : -1;
// Delete all tombstones up through the one currently marked in the cleanup data
if (deleteThroughId >= 0) {
TenantMetadata::tenantTombstones().erase(tr, 0, deleteThroughId + 1);
}
KeyBackedRangeResult<int64_t> latestTombstone = wait(latestTombstoneFuture);
int64_t nextDeleteThroughId = std::max(deleteThroughId, tenantId.get());
if (!latestTombstone.results.empty()) {
nextDeleteThroughId = std::max(nextDeleteThroughId, latestTombstone.results[0]);
}
// The next cleanup will happen at or after TENANT_TOMBSTONE_CLEANUP_INTERVAL seconds have elapsed and
// will clean up tombstones through the most recently allocated ID.
TenantTombstoneCleanupData updatedCleanupData;
updatedCleanupData.tombstonesErasedThrough = deleteThroughId;
updatedCleanupData.nextTombstoneEraseId = nextDeleteThroughId;
updatedCleanupData.nextTombstoneEraseVersion =
transactionReadVersion +
CLIENT_KNOBS->TENANT_TOMBSTONE_CLEANUP_INTERVAL * CLIENT_KNOBS->VERSIONS_PER_SECOND;
TenantMetadata::tombstoneCleanupData().set(tr, updatedCleanupData);
// If the tenant being deleted is within the tombstone window, record the tombstone
if (tenantId.get() > updatedCleanupData.tombstonesErasedThrough) {
TenantMetadata::tenantTombstones().insert(tr, tenantId.get());
}
} else if (tenantId.get() > cleanupData.get().tombstonesErasedThrough) {
// If the tenant being deleted is within the tombstone window, record the tombstone
TenantMetadata::tenantTombstones().insert(tr, tenantId.get());
}
} }
return Void(); return Void();
@ -525,6 +531,10 @@ Future<Void> renameTenantTransaction(Transaction tr,
Tuple::makeTuple(oldEntry.get().tenantGroup.get(), newName)); Tuple::makeTuple(oldEntry.get().tenantGroup.get(), newName));
} }
if (clusterType == ClusterType::METACLUSTER_DATA) {
wait(markTenantTombstones(tr, tenantId.get()));
}
return Void(); return Void();
} }

@ -154,7 +154,6 @@ private:
ASSERT(tenantMapEntry.id <= metadata.lastTenantId); ASSERT(tenantMapEntry.id <= metadata.lastTenantId);
} }
ASSERT(metadata.tenantIdIndex[tenantMapEntry.id] == tenantName); ASSERT(metadata.tenantIdIndex[tenantMapEntry.id] == tenantName);
ASSERT(!metadata.tenantTombstones.count(tenantMapEntry.id));
if (tenantMapEntry.tenantGroup.present()) { if (tenantMapEntry.tenantGroup.present()) {
auto tenantGroupMapItr = metadata.tenantGroupMap.find(tenantMapEntry.tenantGroup.get()); auto tenantGroupMapItr = metadata.tenantGroupMap.find(tenantMapEntry.tenantGroup.get());