Make Log::Reader more robust

Summary: This diff does two things: (1) Log::Reader does not report a corruption when the last record in a log or manifest file is truncated (meaning that log writer died in the middle of the write). Inherited the code from LevelDB: https://code.google.com/p/leveldb/source/detail?r=269fc6ca9416129248db5ca57050cd5d39d177c8# (2) Turn off mmap writes for all writes to log and manifest files (2) is necessary because if we use mmap writes, the last record is not truncated, but is actually filled with zeros, making checksum fail. It is hard to recover from checksum failing. Test Plan: Added unit tests from LevelDB Actually recovered a "corrupted" MANIFEST file. Reviewers: dhruba, haobo Reviewed By: haobo CC: leveldb Differential Revision: https://reviews.facebook.net/D16119
2025-12-06 17:27:55 +00:00 · 2014-02-28 13:19:47 -08:00
parent a77527f2af
commit 58ca641d53
8 changed files with 74 additions and 26 deletions
--- a/db/log_reader.cc
+++ b/db/log_reader.cc
@@ -140,7 +140,9 @@ bool Reader::ReadRecord(Slice* record, std::string* scratch) {

      case kEof:
        if (in_fragmented_record) {
-          ReportCorruption(scratch->size(), "partial record without end(3)");
+          // This can be caused by the writer dying immediately after
+          //  writing a physical record but before completing the next; don't
+          //  treat it as a corruption, just ignore the entire logical record.
          scratch->clear();
        }
        return false;
@@ -264,13 +266,12 @@ unsigned int Reader::ReadPhysicalRecord(Slice* result) {
          eof_offset_ = buffer_.size();
        }
        continue;
-      } else if (buffer_.size() == 0) {
-        // End of file
-        return kEof;
      } else {
-        size_t drop_size = buffer_.size();
+        // Note that if buffer_ is non-empty, we have a truncated header at the
+        //  end of the file, which can be caused by the writer crashing in the
+        //  middle of writing the header. Instead of considering this an error,
+        //  just report EOF.
        buffer_.clear();
-        ReportCorruption(drop_size, "truncated record at end of file");
        return kEof;
      }
    }
@@ -284,14 +285,22 @@ unsigned int Reader::ReadPhysicalRecord(Slice* result) {
    if (kHeaderSize + length > buffer_.size()) {
      size_t drop_size = buffer_.size();
      buffer_.clear();
-      ReportCorruption(drop_size, "bad record length");
-      return kBadRecord;
+      if (!eof_) {
+        ReportCorruption(drop_size, "bad record length");
+        return kBadRecord;
+      }
+      // If the end of the file has been reached without reading |length| bytes
+      // of payload, assume the writer died in the middle of writing the record.
+      // Don't report a corruption.
+      return kEof;
    }

    if (type == kZeroType && length == 0) {
      // Skip zero length record without reporting any drops since
      // such records are produced by the mmap based writing code in
      // env_posix.cc that preallocates file regions.
+      // NOTE: this should never happen in DB written by new RocksDB versions,
+      // since we turn off mmap writes to manifest and log files
      buffer_.clear();
      return kBadRecord;
    }