/* * BackupContainer.h * * This source file is part of the FoundationDB open source project * * Copyright 2013-2018 Apple Inc. and the FoundationDB project authors * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ #ifndef FDBCLIENT_BACKUP_CONTAINER_H #define FDBCLIENT_BACKUP_CONTAINER_H #pragma once #include "flow/flow.h" #include "fdbrpc/IAsyncFile.h" #include "fdbclient/FDBTypes.h" #include "fdbclient/NativeAPI.actor.h" #include "fdbclient/ReadYourWrites.h" #include class ReadYourWritesTransaction; Future> timeKeeperEpochsFromVersion(Version const& v, Reference const& tr); Future timeKeeperVersionFromDatetime(std::string const& datetime, Database const& db); // Append-only file interface for writing backup data // Once finish() is called the file cannot be further written to. // Backup containers should not attempt to use files for which finish was not called or did not complete. // TODO: Move the log file and range file format encoding/decoding stuff to this file and behind interfaces. class IBackupFile { public: IBackupFile(const std::string& fileName) : m_fileName(fileName) {} virtual ~IBackupFile() {} // Backup files are append-only and cannot have more than 1 append outstanding at once. virtual Future append(const void* data, int len) = 0; virtual Future finish() = 0; inline std::string getFileName() const { return m_fileName; } virtual int64_t size() const = 0; virtual void addref() = 0; virtual void delref() = 0; Future appendStringRefWithLen(Standalone s); protected: std::string m_fileName; }; // Structures for various backup components // Mutation log version written by old FileBackupAgent static const uint32_t BACKUP_AGENT_MLOG_VERSION = 2001; // Mutation log version written by BackupWorker static const uint32_t PARTITIONED_MLOG_VERSION = 4110; // Snapshot file version written by FileBackupAgent static const uint32_t BACKUP_AGENT_SNAPSHOT_FILE_VERSION = 1001; struct LogFile { Version beginVersion; Version endVersion; uint32_t blockSize; std::string fileName; int64_t fileSize; int tagId = -1; // Log router tag. Non-negative for new backup format. int totalTags = -1; // Total number of log router tags. // Order by beginVersion, break ties with endVersion bool operator<(const LogFile& rhs) const { return beginVersion == rhs.beginVersion ? endVersion < rhs.endVersion : beginVersion < rhs.beginVersion; } // Returns if this log file contains a subset of content of the given file // by comparing version range and tag ID. bool isSubset(const LogFile& rhs) const { return beginVersion >= rhs.beginVersion && endVersion <= rhs.endVersion && tagId == rhs.tagId; } bool isPartitionedLog() const { return tagId >= 0 && tagId < totalTags; } std::string toString() const { std::stringstream ss; ss << "beginVersion:" << std::to_string(beginVersion) << " endVersion:" << std::to_string(endVersion) << " blockSize:" << std::to_string(blockSize) << " filename:" << fileName << " fileSize:" << std::to_string(fileSize) << " tagId: " << (tagId >= 0 ? std::to_string(tagId) : std::string("(None)")); return ss.str(); } }; struct RangeFile { Version version; uint32_t blockSize; std::string fileName; int64_t fileSize; // Order by version, break ties with name bool operator<(const RangeFile& rhs) const { return version == rhs.version ? fileName < rhs.fileName : version < rhs.version; } std::string toString() const { std::stringstream ss; ss << "version:" << std::to_string(version) << " blockSize:" << std::to_string(blockSize) << " fileName:" << fileName << " fileSize:" << std::to_string(fileSize); return ss.str(); } }; struct KeyspaceSnapshotFile { Version beginVersion; Version endVersion; std::string fileName; int64_t totalSize; Optional restorable; // Whether or not the snapshot can be used in a restore, if known bool isSingleVersion() const { return beginVersion == endVersion; } double expiredPct(Optional expiredEnd) const { double pctExpired = 0; if (expiredEnd.present() && expiredEnd.get() > beginVersion) { if (isSingleVersion()) { pctExpired = 1; } else { pctExpired = double(std::min(endVersion, expiredEnd.get()) - beginVersion) / (endVersion - beginVersion); } } return pctExpired * 100; } // Order by beginVersion, break ties with endVersion bool operator<(const KeyspaceSnapshotFile& rhs) const { return beginVersion == rhs.beginVersion ? endVersion < rhs.endVersion : beginVersion < rhs.beginVersion; } }; struct BackupFileList { std::vector ranges; std::vector logs; std::vector snapshots; void toStream(FILE* fout) const; }; // The byte counts here only include usable log files and byte counts from kvrange manifests struct BackupDescription { BackupDescription() : snapshotBytes(0) {} std::string url; std::vector snapshots; int64_t snapshotBytes; // The version before which everything has been deleted by an expire Optional expiredEndVersion; // The latest version before which at least some data has been deleted by an expire Optional unreliableEndVersion; // The minimum log version in the backup Optional minLogBegin; // The maximum log version in the backup Optional maxLogEnd; // The maximum log version for which there is contiguous log version coverage extending back to minLogBegin Optional contiguousLogEnd; // The maximum version which this backup can be used to restore to Optional maxRestorableVersion; // The minimum version which this backup can be used to restore to Optional minRestorableVersion; std::string extendedDetail; // Freeform container-specific info. bool partitioned; // If this backup contains partitioned mutation logs. // Resolves the versions above to timestamps using a given database's TimeKeeper data. // toString will use this information if present. Future resolveVersionTimes(Database cx); std::map versionTimeMap; std::string toString() const; std::string toJSON() const; }; struct RestorableFileSet { Version targetVersion; std::vector logs; std::vector ranges; // Range file's key ranges. Can be empty for backups generated before 6.3. std::map keyRanges; // Mutation logs continuous range [begin, end). Both can be invalidVersion // when the entire key space snapshot is at the target version. Version continuousBeginVersion, continuousEndVersion; KeyspaceSnapshotFile snapshot; // Info. for debug purposes }; /* IBackupContainer is an interface to a set of backup data, which contains * - backup metadata * - log files * - range files * - keyspace snapshot files defining a complete non overlapping key space snapshot * * Files in a container are identified by a name. This can be any string, whatever * makes sense for the underlying storage system. * * Reading files is done by file name. File names are discovered by getting a RestorableFileSet. * * For remote data stores that are filesystem-like, it's probably best to inherit BackupContainerFileSystem. */ class IBackupContainer { public: virtual void addref() = 0; virtual void delref() = 0; IBackupContainer() {} virtual ~IBackupContainer() {} // Create the container virtual Future create() = 0; virtual Future exists() = 0; // Open a log file or range file for writing virtual Future> writeLogFile(Version beginVersion, Version endVersion, int blockSize) = 0; virtual Future> writeRangeFile(Version snapshotBeginVersion, int snapshotFileCount, Version fileVersion, int blockSize) = 0; // Open a tagged log file for writing, where tagId is the log router tag's id. virtual Future> writeTaggedLogFile(Version beginVersion, Version endVersion, int blockSize, uint16_t tagId, int totalTags) = 0; // Write a KeyspaceSnapshotFile of range file names representing a full non overlapping // snapshot of the key ranges this backup is targeting. virtual Future writeKeyspaceSnapshotFile(const std::vector& fileNames, const std::vector>& beginEndKeys, int64_t totalBytes) = 0; // Open a file for read by name virtual Future> readFile(const std::string& name) = 0; // Returns the key ranges in the snapshot file. This is an expensive function // and should only be used in simulation for sanity check. virtual Future getSnapshotFileKeyRange(const RangeFile& file) = 0; struct ExpireProgress { std::string step; int total; int done; std::string toString() const; }; // Delete backup files which do not contain any data at or after (more recent than) expireEndVersion. // If force is false, then nothing will be deleted unless there is a restorable snapshot which // - begins at or after expireEndVersion // - ends at or before restorableBeginVersion // If force is true, data is deleted unconditionally which could leave the backup in an unusable state. This is not // recommended. Returns true if expiration was done. virtual Future expireData(Version expireEndVersion, bool force = false, ExpireProgress* progress = nullptr, Version restorableBeginVersion = std::numeric_limits::max()) = 0; // Delete entire container. During the process, if pNumDeleted is not null it will be // updated with the count of deleted files so that progress can be seen. virtual Future deleteContainer(int* pNumDeleted = nullptr) = 0; // Return key details about a backup's contents. // Unless deepScan is true, use cached metadata, if present, as initial contiguous available log range. // If logStartVersionOverride is given, log data prior to that version will be ignored for the purposes // of this describe operation. This can be used to calculate what the restorability of a backup would // be after deleting all data prior to logStartVersionOverride. virtual Future describeBackup(bool deepScan = false, Version logStartVersionOverride = invalidVersion) = 0; virtual Future dumpFileList(Version begin = 0, Version end = std::numeric_limits::max()) = 0; // Get exactly the files necessary to restore the key space filtered by the specified key ranges to targetVersion. // If targetVersion is 'latestVersion', use the minimum restorable version in a snapshot. // If logsOnly is set, only use log files in [beginVersion, targetVervions) in restore set. // Returns non-present if restoring to the given version is not possible. virtual Future> getRestoreSet(Version targetVersion, VectorRef keyRangesFilter = {}, bool logsOnly = false, Version beginVersion = -1) = 0; // Get an IBackupContainer based on a container spec string static Reference openContainer(const std::string& url, const Optional& encryptionKeyFileName = {}); static std::vector getURLFormats(); static Future> listContainers(const std::string& baseURL); std::string getURL() const { return URL; } static std::string lastOpenError; private: std::string URL; }; #endif