From 9ba82786ce56d515cc07498bf46d06e2c62a6887 Mon Sep 17 00:00:00 2001 From: Haobo Xu Date: Thu, 20 Jun 2013 16:58:59 -0700 Subject: [PATCH] [RocksDB] Provide contiguous sequence number even in case of write failure Summary: Replication logic would be simplifeid if we can guarantee that write sequence number is always contiguous, even if write failure occurs. Dhruba and I looked at the sequence number generation part of the code. It seems fixable. Note that if WAL was successful and insert into memtable was not, we would be in an unfortunate state. The approach in this diff is : IO error is expected and error status will be returned to client, sequence number will not be advanced; In-mem error is not expected and we panic. Test Plan: make check; db_stress Reviewers: dhruba, sheki CC: leveldb Differential Revision: https://reviews.facebook.net/D11439 --- db/db_impl.cc | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) diff --git a/db/db_impl.cc b/db/db_impl.cc index c5c156feb4..7fda3ef7e6 100644 --- a/db/db_impl.cc +++ b/db/db_impl.cc @@ -10,6 +10,7 @@ #include #include #include +#include #include #include @@ -2214,13 +2215,19 @@ Status DBImpl::Write(const WriteOptions& options, WriteBatch* my_batch) { } if (status.ok()) { status = WriteBatchInternal::InsertInto(updates, mem_); + if (!status.ok()) { + // Panic for in-memory corruptions + // Note that existing logic was not sound. Any partial failure writing + // into the memtable would result in a state that some write ops might + // have succeeded in memtable but Status reports error for all writes. + throw std::runtime_error("In memory WriteBatch corruption!"); + } + versions_->SetLastSequence(last_sequence); + last_flushed_sequence_ = current_sequence; } mutex_.Lock(); } - last_flushed_sequence_ = current_sequence; if (updates == &tmp_batch_) tmp_batch_.Clear(); - - versions_->SetLastSequence(last_sequence); } while (true) {