From 433a4a1e0acd95ecffdd2401ba93d11342243db5 Mon Sep 17 00:00:00 2001 From: Savinda Senevirathne Date: Wed, 16 Jun 2021 14:05:43 +0530 Subject: [PATCH] Hp monitor loop to monitor container status and re spawn if crashed. (#8) --- src/hp_manager.cpp | 109 +++++++++++++++++++++++++++++++++++++++++++-- src/hp_manager.hpp | 8 +++- src/sqlite.cpp | 53 ++++++++++++++++++++++ src/sqlite.hpp | 4 ++ 4 files changed, 169 insertions(+), 5 deletions(-) diff --git a/src/hp_manager.cpp b/src/hp_manager.cpp index 38cc71f..58a602f 100644 --- a/src/hp_manager.cpp +++ b/src/hp_manager.cpp @@ -8,7 +8,7 @@ namespace hp { // Keep track of the ports of the most recent hp instance. ports last_assigned_ports; - + resources instance_resources; // This is defaults to true because it initialize last assigned ports when a new instance is created if there is no vacant ports available. @@ -21,6 +21,10 @@ namespace hp // Vector keeping vacant ports from destroyed instances. std::vector vacant_ports; + // This thread will monitor the status of the created instances. + std::thread hp_monitor_thread; + bool is_shutting_down = false; + /** * Initialize hp related environment. */ @@ -37,6 +41,9 @@ namespace hp // Populate the vacant ports vector with vacant ports of destroyed containers. sqlite::get_vacant_ports(db, vacant_ports); + // Monitor thread is temperory disabled until the implementation details are finalized. + // hp_monitor_thread = std::thread(hp_monitor_loop); + // Calculate the resources per instance. instance_resources.cpu_micro_seconds = conf::cfg.system.max_cpu_micro_seconds / conf::cfg.system.max_instance_count; instance_resources.mem_bytes = conf::cfg.system.max_mem_bytes / conf::cfg.system.max_instance_count; @@ -50,10 +57,64 @@ namespace hp */ void deinit() { + is_shutting_down = true; + if (hp_monitor_thread.joinable()) + hp_monitor_thread.join(); + if (db != NULL) sqlite::close_db(&db); } + /** + * Monitoring created container status. If any containers are crashed, then they are respawned. + * If the respawn fails, the current_status field is updated to 'exited' in the database. + */ + void hp_monitor_loop() + { + LOG_INFO << "HP instance monitor started."; + std::vector running_instance_names; + + util::mask_signal(); + + int counter = 0; + + while (!is_shutting_down) + { + // Check containers every 1 minute. One minute sleep is not added because if we do so, app will wait until the full + // time until the app closes in a SIGINT. + if (counter == 0 || counter == 600) + { + sqlite::get_running_instance_names(db, running_instance_names); + for (const auto &name : running_instance_names) + { + std::string status; + const int res = check_instance_status(name, status); + if (res == 0 && status != CONTAINER_STATES[STATES::RUNNING]) + { + if (docker_start(name) == -1) + { + // We only change the current status variable from the monitor loop. + // We try to start this container in next iteration as well untill the desired state is achieved. + if (sqlite::update_current_status_in_container(db, name, CONTAINER_STATES[STATES::EXITED]) == 0) + LOG_INFO << "Re-spinning " + name + " failed. Current status updated to 'exited' in DB."; + } + else + { + // Make the current field NULL because the instance is healthy now. + if (sqlite::update_current_status_in_container(db, name, {}) == 0) + LOG_INFO << "Re-spinning " + name + " successful."; + } + } + } + counter = 0; + } + counter++; + util::sleep(100); + } + + LOG_INFO << "HP instance monitor stopped."; + } + /** * Create a new instance of hotpocket. A new contract is created and then the docker images is run on that. * @param info Structure holding the generated instance info. @@ -169,8 +230,8 @@ namespace hp LOG_ERROR << "Given container is not stopped. name: " << container_name; return -1; } - const std::string command = "docker start " + container_name; - if (system(command.c_str()) != 0 || sqlite::update_status_in_container(db, container_name, CONTAINER_STATES[STATES::RUNNING]) == -1) + + if (docker_start(container_name) != 0 || sqlite::update_status_in_container(db, container_name, CONTAINER_STATES[STATES::RUNNING]) == -1) { LOG_ERROR << "Error when starting container. name: " << container_name; return -1; @@ -179,6 +240,18 @@ namespace hp return 0; } + /** + * Execute docker start command. + * @param container_name Name of the container. + * @return 0 on successful execution and -1 on error. + */ + int docker_start(const std::string &container_name) + { + const std::string command = "docker start " + container_name; + const int res = system(command.c_str()); + return res == 0 ? 0 : -1; + } + /** * Destroy the container with given name if exists. * @param container_name Name of the container. @@ -323,4 +396,34 @@ namespace hp return 0; } + /** + * Check the status of the given container using docker inspect command. + * @param name Name of the container. + * @param status The variable that holds the status of the container. + * @return 0 on success and -1 on error. + */ + int check_instance_status(std::string_view name, std::string &status) + { + std::string command("docker inspect --format='{{json .State.Status}}' "); + command.append(name); + FILE *fpipe = popen(command.c_str(), "r"); + + if (fpipe == NULL) + { + LOG_ERROR << "Error on popen for command " << command; + return -1; + } + char buffer[20]; + + fgets(buffer, 20, fpipe); + + status = buffer; + status = status.substr(1, status.length() - 3); + + if (pclose(fpipe) == 0) + return 0; + else + return -1; + } + } // namespace hp diff --git a/src/hp_manager.hpp b/src/hp_manager.hpp index 5a30003..62ef4df 100644 --- a/src/hp_manager.hpp +++ b/src/hp_manager.hpp @@ -5,13 +5,14 @@ namespace hp { - constexpr const char *CONTAINER_STATES[]{"RUNNING", "STOPPED", "DESTROYED"}; + constexpr const char *CONTAINER_STATES[]{"running", "stopped", "destroyed", "exited"}; enum STATES { RUNNING, STOPPED, - DESTROYED + DESTROYED, + EXITED }; // Stores port pair assigned to a container. @@ -46,13 +47,16 @@ namespace hp int init(); void deinit(); + void hp_monitor_loop(); int create_new_instance(instance_info &info, std::string_view owner_pubkey); int run_container(const std::string &folder_name, const ports &assigned_ports); int start_container(const std::string &container_name); + int docker_start(const std::string &container_name); int stop_container(const std::string &container_name); int destroy_container(const std::string &container_name); void kill_all_containers(); int create_contract(instance_info &info, const std::string &folder_name, const ports &assigned_ports); int write_json_file(const int fd, const jsoncons::ojson &d); + int check_instance_status(std::string_view name, std::string &status); } // namespace hp #endif \ No newline at end of file diff --git a/src/sqlite.cpp b/src/sqlite.cpp index 48a8af2..d71feb2 100644 --- a/src/sqlite.cpp +++ b/src/sqlite.cpp @@ -33,8 +33,12 @@ namespace sqlite constexpr const char *UPDATE_STATUS_IN_HP = "UPDATE instances SET status = ? WHERE name = ?"; + constexpr const char *UPDATE_CURRENT_STATUS_IN_HP = "UPDATE instances SET current_status = ? WHERE name = ?"; + constexpr const char *IS_CONTAINER_EXISTS = "SELECT * FROM instances WHERE name = ?"; + constexpr const char *GET_RUNNING_INSTANCE_NAMES = "SELECT name FROM instances WHERE status = ?"; + constexpr const char *IS_TABLE_EXISTS = "SELECT * FROM sqlite_master WHERE type='table' AND name = ?"; /** @@ -281,6 +285,7 @@ namespace sqlite table_column_info("owner_pubkey", COLUMN_DATA_TYPE::TEXT), table_column_info("time", COLUMN_DATA_TYPE::INT), table_column_info("status", COLUMN_DATA_TYPE::TEXT), + table_column_info("current_status", COLUMN_DATA_TYPE::TEXT), table_column_info("name", COLUMN_DATA_TYPE::TEXT, true), table_column_info("ip", COLUMN_DATA_TYPE::TEXT), table_column_info("peer_port", COLUMN_DATA_TYPE::INT), @@ -377,6 +382,28 @@ namespace sqlite return -1; } + /** + * Update the current status of the given container to the new value. + * @param db Database connection. + * @param container_name Name of the container whose status should be updated. + * @param current_status The new status of the container. + * @return 0 on success and -1 on error. + */ + int update_current_status_in_container(sqlite3 *db, std::string_view container_name, std::string_view current_status) + { + sqlite3_stmt *stmt; + if (sqlite3_prepare_v2(db, UPDATE_CURRENT_STATUS_IN_HP, -1, &stmt, 0) == SQLITE_OK && stmt != NULL && + sqlite3_bind_text(stmt, 1, current_status.data(), current_status.length(), SQLITE_STATIC) == SQLITE_OK && + sqlite3_bind_text(stmt, 2, container_name.data(), container_name.length(), SQLITE_STATIC) == SQLITE_OK && + sqlite3_step(stmt) == SQLITE_DONE) + { + sqlite3_finalize(stmt); + return 0; + } + LOG_ERROR << "Error updating container current status for " << container_name; + return -1; + } + /** * Get the max peer and user ports assigned for instances excluding destroyed instances. * @param db Database connection. @@ -431,4 +458,30 @@ namespace sqlite // Finalize and distroys the statement. sqlite3_finalize(stmt); } + + /** + * Populate the given vector with names of running hp instances. + * @param db Database connection. + * @param running_instance_names Vector to hold name of instances from database. + */ + void get_running_instance_names(sqlite3 *db, std::vector &running_instance_names) + { + running_instance_names.clear(); + + sqlite3_stmt *stmt; + std::string_view running_status(hp::CONTAINER_STATES[hp::STATES::RUNNING]); + + if (sqlite3_prepare_v2(db, GET_RUNNING_INSTANCE_NAMES, -1, &stmt, 0) == SQLITE_OK && stmt != NULL && + sqlite3_bind_text(stmt, 1, running_status.data(), running_status.length(), SQLITE_STATIC) == SQLITE_OK) + { + while (stmt != NULL && sqlite3_step(stmt) == SQLITE_ROW) + { + const std::string name(reinterpret_cast(sqlite3_column_text(stmt, 0))); + running_instance_names.push_back(name); + } + } + + // Finalize and distroys the statement. + sqlite3_finalize(stmt); + } } diff --git a/src/sqlite.hpp b/src/sqlite.hpp index 55e0262..b6c229b 100644 --- a/src/sqlite.hpp +++ b/src/sqlite.hpp @@ -69,8 +69,12 @@ namespace sqlite int update_status_in_container(sqlite3 *db, std::string_view container_name, std::string_view status); + int update_current_status_in_container(sqlite3 *db, std::string_view container_name, std::string_view status); + void get_max_ports(sqlite3 *db, hp::ports &max_ports); void get_vacant_ports(sqlite3 *db, std::vector &vacant_ports); + + void get_running_instance_names(sqlite3 *db, std::vector &running_instance_names); } #endif