From 941543721dfaa73b3a41e102423f84f9c0b9c68a Mon Sep 17 00:00:00 2001
From: Andrew Kryczka <andrewkr@fb.com>
Date: Tue, 7 Sep 2021 13:25:24 -0700
Subject: [PATCH] Bytes read stat for `VerifyChecksum()` and
 `VerifyFileChecksums()` APIs (#8741)

Summary:
- Clarified some comments on compatibility for adding new ticker stats
- Added read I/O stats for `VerifyChecksum()` and `VerifyFileChecksums()` APIs

Pull Request resolved: https://github.com/facebook/rocksdb/pull/8741

Test Plan: new unit test

Reviewed By: zhichao-cao

Differential Revision: D30708578

Pulled By: ajkr

fbshipit-source-id: d06b961f7e199ae92c266b683e39870aa8f63449
---
 HISTORY.md                   |  1 +
 db/db_impl/db_impl.cc        | 13 ++++++++++
 db/db_statistics_test.cc     | 49 ++++++++++++++++++++++++++++++++++++
 include/rocksdb/statistics.h | 10 +++++---
 java/rocksjni/portal.h       | 26 ++++++++++++++++---
 monitoring/statistics.cc     |  1 +
 6 files changed, 93 insertions(+), 7 deletions(-)

diff --git a/HISTORY.md b/HISTORY.md
index 13c8aa6a54..db00424d09 100644
--- a/HISTORY.md
+++ b/HISTORY.md
@@ -10,6 +10,7 @@
 
 ### New Features
 * RemoteCompaction's interface now includes `db_name`, `db_id`, `session_id`, which could help the user uniquely identify compaction job between db instances and sessions.
+* Added a ticker statistic, "rocksdb.verify_checksum.read.bytes", reporting how many bytes were read from file to serve `VerifyChecksum()` and `VerifyFileChecksums()` queries.
 
 ### Public API change
 * Remove obsolete implementation details FullKey and ParseFullKey from public API
diff --git a/db/db_impl/db_impl.cc b/db/db_impl/db_impl.cc
index 40df71a75d..c05bde8572 100644
--- a/db/db_impl/db_impl.cc
+++ b/db/db_impl/db_impl.cc
@@ -4936,6 +4936,11 @@ Status DBImpl::VerifyChecksum(const ReadOptions& read_options) {
 
 Status DBImpl::VerifyChecksumInternal(const ReadOptions& read_options,
                                       bool use_file_checksum) {
+  // `bytes_read` stat is enabled based on compile-time support and cannot
+  // be dynamically toggled. So we do not need to worry about `PerfLevel`
+  // here, unlike many other `IOStatsContext` / `PerfContext` stats.
+  uint64_t prev_bytes_read = IOSTATS(bytes_read);
+
   Status s;
 
   if (use_file_checksum) {
@@ -4990,6 +4995,9 @@ Status DBImpl::VerifyChecksumInternal(const ReadOptions& read_options,
           s = ROCKSDB_NAMESPACE::VerifySstFileChecksum(opts, file_options_,
                                                        read_options, fname);
         }
+        RecordTick(stats_, VERIFY_CHECKSUM_READ_BYTES,
+                   IOSTATS(bytes_read) - prev_bytes_read);
+        prev_bytes_read = IOSTATS(bytes_read);
       }
     }
 
@@ -5004,6 +5012,9 @@ Status DBImpl::VerifyChecksumInternal(const ReadOptions& read_options,
         s = VerifyFullFileChecksum(meta->GetChecksumValue(),
                                    meta->GetChecksumMethod(), blob_file_name,
                                    read_options);
+        RecordTick(stats_, VERIFY_CHECKSUM_READ_BYTES,
+                   IOSTATS(bytes_read) - prev_bytes_read);
+        prev_bytes_read = IOSTATS(bytes_read);
         if (!s.ok()) {
           break;
         }
@@ -5035,6 +5046,8 @@ Status DBImpl::VerifyChecksumInternal(const ReadOptions& read_options,
       cfd->UnrefAndTryDelete();
     }
   }
+  RecordTick(stats_, VERIFY_CHECKSUM_READ_BYTES,
+             IOSTATS(bytes_read) - prev_bytes_read);
   return s;
 }
 
diff --git a/db/db_statistics_test.cc b/db/db_statistics_test.cc
index 6cd0440f4d..91ae972cb3 100644
--- a/db/db_statistics_test.cc
+++ b/db/db_statistics_test.cc
@@ -155,6 +155,55 @@ TEST_F(DBStatisticsTest, ExcludeTickers) {
   ASSERT_GT(options.statistics->getTickerCount(BYTES_READ), 0);
 }
 
+#ifndef ROCKSDB_LITE
+
+TEST_F(DBStatisticsTest, VerifyChecksumReadStat) {
+  Options options = CurrentOptions();
+  options.file_checksum_gen_factory = GetFileChecksumGenCrc32cFactory();
+  options.statistics = ROCKSDB_NAMESPACE::CreateDBStatistics();
+  Reopen(options);
+
+  // Expected to be populated regardless of `PerfLevel` in user thread
+  SetPerfLevel(kDisable);
+
+  {
+    // Scenario 0: only WAL data. Not verified so require ticker to be zero.
+    ASSERT_OK(Put("foo", "value"));
+    ASSERT_OK(db_->VerifyFileChecksums(ReadOptions()));
+    ASSERT_OK(db_->VerifyChecksum());
+    ASSERT_EQ(0,
+              options.statistics->getTickerCount(VERIFY_CHECKSUM_READ_BYTES));
+  }
+
+  // Create one SST.
+  ASSERT_OK(Flush());
+  std::unordered_map<std::string, uint64_t> table_files;
+  uint64_t table_files_size = 0;
+  GetAllDataFiles(kTableFile, &table_files, &table_files_size);
+
+  {
+    // Scenario 1: Table verified in `VerifyFileChecksums()`. This should read
+    // the whole file so we require the ticker stat exactly matches the file
+    // size.
+    ASSERT_OK(options.statistics->Reset());
+    ASSERT_OK(db_->VerifyFileChecksums(ReadOptions()));
+    ASSERT_EQ(table_files_size,
+              options.statistics->getTickerCount(VERIFY_CHECKSUM_READ_BYTES));
+  }
+
+  {
+    // Scenario 2: Table verified in `VerifyChecksum()`. This opens a
+    // `TableReader` to verify each block. It can involve duplicate reads of the
+    // same data so we set a lower-bound only.
+    ASSERT_OK(options.statistics->Reset());
+    ASSERT_OK(db_->VerifyChecksum());
+    ASSERT_GE(options.statistics->getTickerCount(VERIFY_CHECKSUM_READ_BYTES),
+              table_files_size);
+  }
+}
+
+#endif  // !ROCKSDB_LITE
+
 }  // namespace ROCKSDB_NAMESPACE
 
 int main(int argc, char** argv) {
diff --git a/include/rocksdb/statistics.h b/include/rocksdb/statistics.h
index 45da88121d..83f6ddee0c 100644
--- a/include/rocksdb/statistics.h
+++ b/include/rocksdb/statistics.h
@@ -18,12 +18,13 @@
 namespace ROCKSDB_NAMESPACE {
 
 /**
- * Keep adding ticker's here.
- *  1. Any ticker should be added before TICKER_ENUM_MAX.
+ * Keep adding tickers here.
+ *  1. Any ticker should be added immediately before TICKER_ENUM_MAX, taking
+ *     over its old value.
  *  2. Add a readable string in TickersNameMap below for the newly added ticker.
  *  3. Add a corresponding enum value to TickerType.java in the java API
  *  4. Add the enum conversions from Java and C++ to portal.h's toJavaTickerType
- * and toCppTickers
+ *     and toCppTickers
  */
 enum Tickers : uint32_t {
   // total block cache misses
@@ -404,6 +405,9 @@ enum Tickers : uint32_t {
   // Secondary cache statistics
   SECONDARY_CACHE_HITS,
 
+  // Bytes read by `VerifyChecksum()` and `VerifyFileChecksums()` APIs.
+  VERIFY_CHECKSUM_READ_BYTES,
+
   TICKER_ENUM_MAX
 };
 
diff --git a/java/rocksjni/portal.h b/java/rocksjni/portal.h
index 5c9be330ed..47a7714cd0 100644
--- a/java/rocksjni/portal.h
+++ b/java/rocksjni/portal.h
@@ -4876,7 +4876,7 @@ class TickerTypeJni {
       case ROCKSDB_NAMESPACE::Tickers::NUMBER_MULTIGET_KEYS_FOUND:
         return 0x5E;
       case ROCKSDB_NAMESPACE::Tickers::NO_ITERATOR_CREATED:
-        // -0x01 to fixate the new value that incorrectly changed TICKER_ENUM_MAX.
+        // -0x01 so we can skip over the already taken 0x5F (TICKER_ENUM_MAX).
         return -0x01;
       case ROCKSDB_NAMESPACE::Tickers::NO_ITERATOR_DELETED:
         return 0x60;
@@ -5002,8 +5002,17 @@ class TickerTypeJni {
         return -0x1D;
       case ROCKSDB_NAMESPACE::Tickers::SECONDARY_CACHE_HITS:
         return -0x1E;
+      case ROCKSDB_NAMESPACE::Tickers::VERIFY_CHECKSUM_READ_BYTES:
+        return -0x1F;
       case ROCKSDB_NAMESPACE::Tickers::TICKER_ENUM_MAX:
-        // 0x5F for backwards compatibility on current minor version.
+        // 0x5F was the max value in the initial copy of tickers to Java.
+        // Since these values are exposed directly to Java clients, we keep
+        // the value the same forever.
+        //
+        // TODO: This particular case seems confusing and unnecessary to pin the
+        // value since it's meant to be the number of tickers, not an actual
+        // ticker value. But we aren't yet in a position to fix it since the
+        // number of tickers doesn't fit in the Java representation (jbyte).
         return 0x5F;
       default:
         // undefined/default
@@ -5206,7 +5215,7 @@ class TickerTypeJni {
       case 0x5E:
         return ROCKSDB_NAMESPACE::Tickers::NUMBER_MULTIGET_KEYS_FOUND;
       case -0x01:
-        // -0x01 to fixate the new value that incorrectly changed TICKER_ENUM_MAX.
+        // -0x01 so we can skip over the already taken 0x5F (TICKER_ENUM_MAX).
         return ROCKSDB_NAMESPACE::Tickers::NO_ITERATOR_CREATED;
       case 0x60:
         return ROCKSDB_NAMESPACE::Tickers::NO_ITERATOR_DELETED;
@@ -5334,8 +5343,17 @@ class TickerTypeJni {
         return ROCKSDB_NAMESPACE::Tickers::MEMTABLE_GARBAGE_BYTES_AT_FLUSH;
       case -0x1E:
         return ROCKSDB_NAMESPACE::Tickers::SECONDARY_CACHE_HITS;
+      case -0x1F:
+        return ROCKSDB_NAMESPACE::Tickers::VERIFY_CHECKSUM_READ_BYTES;
       case 0x5F:
-        // 0x5F for backwards compatibility on current minor version.
+        // 0x5F was the max value in the initial copy of tickers to Java.
+        // Since these values are exposed directly to Java clients, we keep
+        // the value the same forever.
+        //
+        // TODO: This particular case seems confusing and unnecessary to pin the
+        // value since it's meant to be the number of tickers, not an actual
+        // ticker value. But we aren't yet in a position to fix it since the
+        // number of tickers doesn't fit in the Java representation (jbyte).
         return ROCKSDB_NAMESPACE::Tickers::TICKER_ENUM_MAX;
 
       default:
diff --git a/monitoring/statistics.cc b/monitoring/statistics.cc
index 7b30c7fcc0..d2e81ad967 100644
--- a/monitoring/statistics.cc
+++ b/monitoring/statistics.cc
@@ -206,6 +206,7 @@ const std::vector<std::pair<Tickers, std::string>> TickersNameMap = {
     {MEMTABLE_GARBAGE_BYTES_AT_FLUSH,
      "rocksdb.memtable.garbage.bytes.at.flush"},
     {SECONDARY_CACHE_HITS, "rocksdb.secondary.cache.hits"},
+    {VERIFY_CHECKSUM_READ_BYTES, "rocksdb.verify_checksum.read.bytes"},
 };
 
 const std::vector<std::pair<Histograms, std::string>> HistogramsNameMap = {