mirror of
https://github.com/XRPLF/rippled.git
synced 2025-11-27 14:35:52 +00:00
Improve watchdog restart logic:
Stop attempting to restart the server after five consecutive restarts fail to remain operational for at least ten seconds.
This commit is contained in:
@@ -299,7 +299,7 @@ int run (int argc, char** argv)
|
|||||||
std::string logMe = DoSustain ();
|
std::string logMe = DoSustain ();
|
||||||
|
|
||||||
if (!logMe.empty ())
|
if (!logMe.empty ())
|
||||||
std::cerr << logMe;
|
std::cerr << logMe << std::endl;
|
||||||
}
|
}
|
||||||
|
|
||||||
// Run the unit tests if requested.
|
// Run the unit tests if requested.
|
||||||
|
|||||||
@@ -70,6 +70,17 @@ std::string StopSustain ()
|
|||||||
return "Terminating monitor";
|
return "Terminating monitor";
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static
|
||||||
|
bool checkChild(pid_t pid, int options)
|
||||||
|
{
|
||||||
|
int i;
|
||||||
|
|
||||||
|
if (waitpid (pChild, &i, options) == -1)
|
||||||
|
return false;
|
||||||
|
|
||||||
|
return kill (pChild, options) == 0;
|
||||||
|
}
|
||||||
|
|
||||||
std::string DoSustain ()
|
std::string DoSustain ()
|
||||||
{
|
{
|
||||||
pManager = getpid ();
|
pManager = getpid ();
|
||||||
@@ -78,6 +89,10 @@ std::string DoSustain ()
|
|||||||
signal (SIGUSR1, pass_signal);
|
signal (SIGUSR1, pass_signal);
|
||||||
signal (SIGUSR2, pass_signal);
|
signal (SIGUSR2, pass_signal);
|
||||||
|
|
||||||
|
// Number of times the child has exited in less than
|
||||||
|
// 15 seconds.
|
||||||
|
int fastExit = 0;
|
||||||
|
|
||||||
for (auto childCount = 1; ; ++childCount)
|
for (auto childCount = 1; ; ++childCount)
|
||||||
{
|
{
|
||||||
pChild = fork ();
|
pChild = fork ();
|
||||||
@@ -100,19 +115,26 @@ std::string DoSustain ()
|
|||||||
|
|
||||||
sleep (sleepBeforeWaiting);
|
sleep (sleepBeforeWaiting);
|
||||||
|
|
||||||
for (;;)
|
// If the child has already terminated count this
|
||||||
|
// as a fast exit and an indication that something
|
||||||
|
// went wrong:
|
||||||
|
if (!checkChild (pChild, WNOHANG))
|
||||||
{
|
{
|
||||||
int i;
|
if (++fastExit == 5)
|
||||||
waitpid (pChild, &i, 0);
|
_exit (0);
|
||||||
if (kill (pChild, 0))
|
|
||||||
break;
|
|
||||||
sleep (sleepBetweenWaits);
|
|
||||||
}
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
fastExit = 0;
|
||||||
|
|
||||||
|
while (checkChild (pChild, 0))
|
||||||
|
sleep(sleepBetweenWaits);
|
||||||
|
|
||||||
auto pc = std::to_string (pChild);
|
auto pc = std::to_string (pChild);
|
||||||
rename ("core", ("core." + pc).c_str ());
|
rename ("core", ("core." + pc).c_str ());
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
}
|
||||||
|
|
||||||
#else
|
#else
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user