Detect when a unit test child process crashes (RIPD-1592):

When a test suite starts and ends, it informs the parent process. If the parent
has received a start message without a matching end message it reports that a
child may have crashed in that suite.
This commit is contained in:
seelabs
2018-03-09 13:39:49 -05:00
parent deef322b07
commit 4b2afc8f42
7 changed files with 112 additions and 17 deletions

View File

@@ -4813,6 +4813,10 @@
<ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='debug|x64'">True</ExcludedFromBuild>
<ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='release|x64'">True</ExcludedFromBuild>
</ClCompile>
<ClCompile Include="..\..\src\test\basics\DetectCrash_test.cpp">
<ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='debug|x64'">True</ExcludedFromBuild>
<ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='release|x64'">True</ExcludedFromBuild>
</ClCompile>
<ClCompile Include="..\..\src\test\basics\hardened_hash_test.cpp">
<ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='debug|x64'">True</ExcludedFromBuild>
<ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='release|x64'">True</ExcludedFromBuild>

View File

@@ -5724,6 +5724,9 @@
<ClCompile Include="..\..\src\test\basics\contract_test.cpp">
<Filter>test\basics</Filter>
</ClCompile>
<ClCompile Include="..\..\src\test\basics\DetectCrash_test.cpp">
<Filter>test\basics</Filter>
</ClCompile>
<ClCompile Include="..\..\src\test\basics\hardened_hash_test.cpp">
<Filter>test\basics</Filter>
</ClCompile>

View File

@@ -229,9 +229,17 @@ static int runUnitTests(
int bad_child_exits = 0;
for(auto& c : children)
{
c.wait();
if (c.exit_code())
try
{
c.wait();
if (c.exit_code())
++bad_child_exits;
}
catch (...)
{
// wait throws if process was terminated with a signal
++bad_child_exits;
}
}
if (parent_runner.any_failed() || bad_child_exits)

View File

@@ -0,0 +1,46 @@
//------------------------------------------------------------------------------
/*
This file is part of rippled: https://github.com/ripple/rippled
Copyright (c) 2012, 2013 Ripple Labs Inc.
Permission to use, copy, modify, and/or distribute this software for any
purpose with or without fee is hereby granted, provided that the above
copyright notice and this permission notice appear in all copies.
THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
ANY SPECIAL , DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
*/
//==============================================================================
#include <BeastConfig.h>
#include <beast/unit_test/suite.hpp>
#include <exception>
namespace ripple {
namespace test {
struct DetectCrash_test : public beast::unit_test::suite
{
void testDetectCrash ()
{
testcase ("Detect Crash");
// Kill the process. This is used to test that the multi-process
// unit test will correctly report the crash.
std::terminate();
}
void run() override
{
testDetectCrash();
}
};
BEAST_DEFINE_TESTSUITE_MANUAL(DetectCrash,unit_test,beast);
} // test
} // ripple

View File

@@ -338,14 +338,11 @@ multi_runner_base<IsParent>::print_results(S& s)
template <bool IsParent>
void
multi_runner_base<IsParent>::message_queue_send(std::string const& s)
multi_runner_base<IsParent>::message_queue_send(MessageType mt, std::string const& s)
{
// Even though the message queue does _not_ live in shared memory, child
// processes (the only ones using "send" need to protect access with a mutex
// on the OSX platform (access does not appear to need to be protection on
// linux or windows). This is likely due to the different back end implementation
// of message queue in boost, though that has not been confirmed.
// must use a mutex since the two "sends" must happen in order
std::lock_guard<boost::interprocess::interprocess_mutex> l{inner_->m_};
message_queue_->send(&mt, sizeof(mt), /*priority*/ 0);
message_queue_->send(s.c_str(), s.size(), /*priority*/ 0);
}
@@ -381,18 +378,44 @@ multi_runner_parent::multi_runner_parent()
{
std::size_t recvd_size = 0;
unsigned int priority = 0;
this->message_queue_->receive(
buf.data(), buf.size(), recvd_size, priority);
if (!recvd_size)
continue;
assert (recvd_size == 1);
MessageType mt{*reinterpret_cast<MessageType*>(buf.data())};
this->message_queue_->receive(
buf.data(), buf.size(), recvd_size, priority);
if (recvd_size)
{
std::string s{buf.data(), recvd_size};
this->os_ << s;
this->os_.flush();
switch (mt)
{
case MessageType::log:
this->os_ << s;
this->os_.flush();
break;
case MessageType::test_start:
running_suites_.insert(std::move(s));
break;
case MessageType::test_end:
running_suites_.erase(s);
break;
default:
assert(0); // unknown message type
}
}
}
catch (std::exception const& e)
{
std::cerr << "Error: " << e.what()
<< " reading unit test message queue.\n";
return;
}
catch (...)
{
std::cerr << "Error reading unit test message queue.\n";
std::cerr << "Unknown error reading unit test message queue.\n";
return;
}
}
@@ -407,6 +430,12 @@ multi_runner_parent::~multi_runner_parent()
message_queue_thread_.join();
print_results(os_);
for (auto const& s : running_suites_)
{
os_ << "\nSuite: " << s
<< " failed to complete. The child process may have crashed.\n";
}
}
bool
@@ -475,12 +504,14 @@ void
multi_runner_child::on_suite_begin(beast::unit_test::suite_info const& info)
{
suite_results_ = detail::suite_results{info.full_name()};
message_queue_send(MessageType::test_start, suite_results_.name);
}
void
multi_runner_child::on_suite_end()
{
results_.add(suite_results_);
message_queue_send(MessageType::test_end, suite_results_.name);
}
void
@@ -496,7 +527,7 @@ multi_runner_child::on_case_begin(std::string const& name)
s << job_index_ << "> ";
s << suite_results_.name
<< (case_results_.name.empty() ? "" : (" " + case_results_.name)) << '\n';
message_queue_send(s.str());
message_queue_send(MessageType::log, s.str());
}
void
@@ -521,7 +552,7 @@ multi_runner_child::on_fail(std::string const& reason)
s << job_index_ << "> ";
s << "#" << case_results_.total << " failed" << (reason.empty() ? "" : ": ")
<< reason << '\n';
message_queue_send(s.str());
message_queue_send(MessageType::log, s.str());
}
void
@@ -534,7 +565,7 @@ multi_runner_child::on_log(std::string const& msg)
if (num_jobs_ > 1)
s << job_index_ << "> ";
s << msg;
message_queue_send(s.str());
message_queue_send(MessageType::log, s.str());
}
namespace detail {

View File

@@ -163,7 +163,8 @@ class multi_runner_base
protected:
std::unique_ptr<boost::interprocess::message_queue> message_queue_;
void message_queue_send(std::string const& s);
enum class MessageType : std::uint8_t {test_start, test_end, log};
void message_queue_send(MessageType mt, std::string const& s);
public:
multi_runner_base();
@@ -208,7 +209,8 @@ private:
std::ostream& os_;
std::atomic<bool> continue_message_queue_{true};
std::thread message_queue_thread_;
// track running suites so if a child crashes the culprit can be flagged
std::set<std::string> running_suites_;
public:
multi_runner_parent(multi_runner_parent const&) = delete;
multi_runner_parent&
@@ -335,7 +337,7 @@ multi_runner_child::run_multi(Pred pred)
// inform the parent
std::stringstream s;
s << job_index_ << "> failed Unhandled exception in test.\n";
message_queue_send(s.str());
message_queue_send(MessageType::log, s.str());
failed = true;
}
}

View File

@@ -22,6 +22,7 @@
#include <test/basics/Buffer_test.cpp>
#include <test/basics/CheckLibraryVersions_test.cpp>
#include <test/basics/contract_test.cpp>
#include <test/basics/DetectCrash_test.cpp>
#include <test/basics/hardened_hash_test.cpp>
#include <test/basics/KeyCache_test.cpp>
#include <test/basics/mulDiv_test.cpp>