Integrated ptrace tracking for state files.

This commit is contained in:
ravinsp
2019-11-09 07:38:10 +05:30
parent 7815390b25
commit 32ef41bfdf
10 changed files with 2122 additions and 52 deletions

View File

@@ -9,7 +9,7 @@
#include "../p2p/peer_session_handler.hpp"
#include "../hplog.hpp"
#include "../crypto.hpp"
#include "../proc.hpp"
#include "../proc/proc.hpp"
#include "ledger_handler.hpp"
#include "cons.hpp"
@@ -646,12 +646,13 @@ void run_contract_binary(const int64_t time_now, proc::contract_bufmap_t &userio
// todo:implement exchange of npl and hpsc bufs
proc::contract_bufmap_t nplbufmap;
proc::contract_iobuf_pair hpscbufpair;
hpscbufpair.inputs.push_back("A");
hpscbufpair.inputs.push_back("B");
hpscbufpair.inputs.push_back("C");
// This will hold a list of file blocks that was updated by the contract process.
// We then feed this information to state tracking logic.
proc::contract_fblockmap_t state_updates;
proc::exec_contract(
proc::contract_exec_args(time_now, useriobufmap, nplbufmap, hpscbufpair));
proc::contract_exec_args(time_now, useriobufmap, nplbufmap, hpscbufpair, state_updates));
}
/**

View File

@@ -3,7 +3,7 @@
#include "../pchheader.hpp"
#include "../util.hpp"
#include "../proc.hpp"
#include "../proc/proc.hpp"
#include "../p2p/p2p.hpp"
#include "../usr/user_input.hpp"

View File

@@ -6,7 +6,7 @@
#include "util.hpp"
#include "conf.hpp"
#include "crypto.hpp"
#include "proc.hpp"
#include "proc/proc.hpp"
#include "hplog.hpp"
#include "usr/usr.hpp"
#include "p2p/p2p.hpp"
@@ -157,6 +157,9 @@ int main(int argc, char **argv)
if (conf::init() != 0)
return -1;
// Set HP process cwd to the contract directory.
chdir(conf::ctx.contractDir.c_str());
hplog::init();
if (p2p::init() != 0 || usr::init() != 0 || cons::init() != 0)
@@ -166,7 +169,7 @@ int main(int argc, char **argv)
signal(SIGINT, signal_handler);
//we are waiting for peer to estasblish peer connections.
//otherwise we'll run into not enough peers propsing/stage desync deadlock directly now.
//otherwise we'll run into not enough peers propsing/stage desync deadlock directly now.
sleep(3);
while (true)
@@ -183,4 +186,3 @@ int main(int argc, char **argv)
std::cout << "exited normally\n";
return 0;
}

View File

@@ -47,8 +47,13 @@
#include <string>
#include <string_view>
#include <sys/ioctl.h>
#include <sys/ptrace.h>
#include <sys/stat.h>
#include <sys/types.h>
#include <sys/uio.h>
#include <sys/user.h>
#include <sys/wait.h>
#include <syscall.h>
#include <thread>
#include <unistd.h>
#include <unordered_map>

View File

@@ -1,7 +1,8 @@
#include "pchheader.hpp"
#include "../pchheader.hpp"
#include "../conf.hpp"
#include "../hplog.hpp"
#include "proc.hpp"
#include "conf.hpp"
#include "hplog.hpp"
#include "ptrace_capture.hpp"
namespace proc
{
@@ -29,7 +30,7 @@ contract_fdmap_t nplfds;
std::vector<int> hpscfds;
// Holds the contract process id (if currently executing).
__pid_t contract_pid;
pid_t contract_pid;
/**
* Executes the contract process and passes the specified arguments.
@@ -45,7 +46,7 @@ int exec_contract(const contract_exec_args &args)
if (feed_inputs(args) != 0)
return -1;
const __pid_t pid = fork();
const pid_t pid = fork();
if (pid > 0)
{
// HotPocket process.
@@ -54,8 +55,9 @@ int exec_contract(const contract_exec_args &args)
// Close all fds unused by HP process.
close_unused_fds(true);
// Wait for child process (contract process) to complete execution.
const int presult = await_contract_execution();
// Capture child process (contract process) until it completes execution.
// This call will return when the contract process exits.
const int presult = ptrace_capture(contract_pid, args.state_updates);
LOG_INFO << "Contract process ended.";
contract_pid = 0;
@@ -77,15 +79,14 @@ int exec_contract(const contract_exec_args &args)
// Close all fds unused by SC process.
close_unused_fds(false);
// Set the contract process working directory.
boost::filesystem::current_path(conf::ctx.contractDir);
// Write the contract input message from HotPocket to the stdin (0) of the contract process.
write_contract_args(args);
LOG_INFO << "Starting contract process...";
char *execv_args[] = {conf::cfg.binary.data(), conf::cfg.binargs.data(), NULL};
ptrace(PTRACE_TRACEME, 0, NULL, NULL);
execv(execv_args[0], execv_args);
}
else
@@ -97,23 +98,6 @@ int exec_contract(const contract_exec_args &args)
return 0;
}
/**
* Blocks the calling thread until the contract process compelted exeution (if running).
*
* @return 0 if contract process exited normally, exit code of contract process if abnormally exited.
*/
int await_contract_execution()
{
if (contract_pid > 0)
{
int scstatus;
waitpid(contract_pid, &scstatus, 0);
if (!WIFEXITED(scstatus))
return WEXITSTATUS(scstatus);
}
return 0;
}
/**
* Writes the contract args (JSON) into the stdin of the contract process.
* Args format:

View File

@@ -1,9 +1,9 @@
#ifndef _HP_PROC_
#define _HP_PROC_
#include "pchheader.hpp"
#include "usr/usr.hpp"
#include "util.hpp"
#include "../pchheader.hpp"
#include "../usr/usr.hpp"
#include "../util.hpp"
/**
* Contains helper functions regarding POSIX process execution and IPC between HP and SC.
@@ -32,6 +32,11 @@ typedef std::unordered_map<std::string, std::vector<int>> contract_fdmap_t;
// This is used to keep track of input/output buffers for a given public key (eg. user, npl)
typedef std::unordered_map<std::string, contract_iobuf_pair> contract_bufmap_t;
// Common typedef for a map of updated blocks of state files by the contract process.
// This is used as a hint in updating the state merkle tree.
// filename->modified blocks
typedef std::unordered_map<std::string, std::set<uint32_t>> contract_fblockmap_t;
/**
* Holds information that should be passed into the contract process.
*/
@@ -48,27 +53,31 @@ struct contract_exec_args
// Pair of HP<->SC JSON message buffers (mainly used for control messages).
// Input buffers for HP->SC messages, Output buffers for SC->HP messages.
contract_iobuf_pair &hpscbufs;
// The map of state files that was updated with updated block ids.
// Each block id N represents Nth 4MB block of the file.
contract_fblockmap_t &state_updates;
// Current HotPocket timestamp.
int64_t timestamp;
const int64_t timestamp;
contract_exec_args(
int64_t _timestamp,
contract_bufmap_t &_userbufs,
contract_bufmap_t &_nplbufs,
contract_iobuf_pair &_hpscbufs) :
userbufs(_userbufs),
nplbufs(_nplbufs),
hpscbufs(_hpscbufs)
int64_t timestamp,
contract_bufmap_t &userbufs,
contract_bufmap_t &nplbufs,
contract_iobuf_pair &hpscbufs,
contract_fblockmap_t &state_updates) :
userbufs(userbufs),
nplbufs(nplbufs),
hpscbufs(hpscbufs),
state_updates(state_updates),
timestamp(timestamp)
{
timestamp = _timestamp;
}
};
int exec_contract(const contract_exec_args &args);
int await_contract_execution();
//------Internal-use functions for this namespace.
int write_contract_args(const contract_exec_args &args);

248
src/proc/ptrace_capture.cpp Normal file
View File

@@ -0,0 +1,248 @@
// Code adopted from https://github.com/codetsunami/file-ptracer/blob/master/trace.cpp
#include "../pchheader.hpp"
#include "../hplog.hpp"
#include "proc.hpp"
#include "ptrace_syscalls.hpp"
#define REG(reg) reg.orig_rax
namespace proc
{
struct fd_info
{
std::string filepath; // absolute path to the file
unsigned long long cursor; // current position at which reads and writes will occur, as tracked
};
// File modifications are tracked in 4MB blocks.
static const int BLOCK_SIZE = 4 * 1024 * 1024;
/**
* Blocks the calling thread and captures the child process activity until it exits.
* @return 0 if child process exits normally, -1 if abnormally exited.
*/
int ptrace_capture(const pid_t child, contract_fblockmap_t &updated_blocks)
{
// Absorb the exec notification.
// This is because we would get a notification about execv() which is initiated by ourselves.
ptrace(PTRACE_SYSCALL, child, NULL, NULL);
int status;
if (!(waitpid(child, &status, 0) && !WIFEXITED(status)))
{
LOG_ERR << "ptrace1: Waitpid failed.";
return -1;
}
/*
egs.rdi - Stores the first argument
regs.rsi - Stores the second argument
regs.rdx - Stores the third argument
regs.r10 - Stores the fourth argument
regs.r8 - Stores the fifth argument
regs.r9 - Stores the sixth argument
*/
// map from child fd's to absolute filepath, updated in realtime
std::unordered_map<int, fd_info> fd_map;
while (true)
{
// this is the **first** PTRACE_SYSCALL of set of two for this system call
// this catches the syscall BEFORE execution and provides its arguments (if any)
// see near the end of the loop for the second
ptrace(PTRACE_SYSCALL, child, NULL, NULL);
int status;
if (!(waitpid(child, &status, 0) && !WIFEXITED(status)))
return 0;
// Get the registers.
user_regs_struct regs;
ptrace(PTRACE_GETREGS, child, NULL, &regs);
unsigned long long scall = REG(regs);
// this array holds 10 long words which are used to xfer the memory containing a filename
// from the child process to this process, for calls that specify a filename
unsigned long word_array[10];
word_array[0] = 0;
int has_filename = 0;
char *filenameptr = reinterpret_cast<char *>(word_array);
unsigned long long args[6];
args[0] = regs.rdi;
args[1] = regs.rsi;
args[2] = regs.rdx;
args[3] = regs.r10;
args[4] = regs.r8;
args[5] = regs.r9;
// std::cout << "scall: " << callname(REG(regs)) << "\n";
if (scall == SYS_creat && (has_filename = 1) ||
scall == SYS_open && (has_filename = 1) ||
scall == SYS_openat && (has_filename = 1) ||
scall == SYS_chdir && (has_filename = 1) ||
scall == SYS_close ||
scall == SYS_lseek ||
scall == SYS_write ||
scall == SYS_read ||
scall == SYS_pwrite64)
{
// nb: not all arguments are used by all calls
// std::cout << callname(REG(regs)) << "(";
// for (auto i = 0; i < 6; ++i)
// std::cout << args[i] << (i == 5 ? ")\n" : ", ");
if (has_filename)
{
char *childptr = (scall == SYS_openat ? (char *)((void *)regs.rsi) : (char *)((void *)regs.rdi));
for (int n = 0; n < 10; ++n)
word_array[n] = ptrace(PTRACE_PEEKDATA, child, childptr + (n * sizeof(unsigned long)), NULL);
// place a \0 at the very end of the memory for string function safety
filenameptr[sizeof(unsigned long) * 10 - 1] = '\0';
}
}
// this is the **second** PTRACE_SYSCALL which provides the RETURN VALUE
// of the syscall after it has been executed. to make use of this information
// we need to have collected the arguments to the syscall from the first PTRACE_SYSCALL
// near the start of the loop above
ptrace(PTRACE_SYSCALL, child, NULL, NULL);
if (!(waitpid(child, &status, 0) && !WIFEXITED(status)))
return 0;
ptrace(PTRACE_GETREGS, child, NULL, &regs);
if (scall == SYS_open || scall == SYS_openat || scall == SYS_creat)
{
// the target application is trying to open or create a file so we need to map its fd
int fd = (int)regs.rax;
if (fd < 0 || fd > 0xffff)
{
LOG_DBG << "syscall to open, openat or creat returned invalid fd: " << fd;
continue;
}
if (args[0] < 3) // we don't bother with stdin out and err: 0,1,2
continue;
// compute filepath
char buf[PATH_MAX];
realpath(filenameptr, buf);
// We ignore anything outside the state dir.
if (strncmp (buf, conf::ctx.stateDir.c_str(), conf::ctx.stateDir.size()) != 0)
continue;
fd_map[fd] = {std::string(buf), 0};
// std::cout << "\tadded fd_map[" << fd << "] = " << fd_map[fd].filepath << "\n";
}
else if (scall == SYS_close)
{
// the target app is closing an fd, so check if the close was successful and if it was update our map
if (args[0] < 3) // we don't bother with stdin out and err: 0,1,2
continue;
int fd = args[0];
int result = (int)regs.rax;
if (result != 0)
{
LOG_DBG << "syscall close in child did not return 0.";
continue;
}
if (fd_map.find(fd) == fd_map.end())
continue;
fd_map.erase(fd);
}
else if (scall == SYS_chdir)
{
int result = (int)regs.rax;
if (result != 0)
{
LOG_DBG << "syscall chdir in child did not return 0.";
continue;
}
// the easiest way to track the child process's current working directory without explicitly
// asking the kernel for it is just to mirror their successful chdir syscalls in the parent
// then the parent's working directory will always match the child's working directory
// and we can resolve all relative paths using realpath. this solution probably won't work in
// a production setting, so real path tracking will need to be implemented
chdir(filenameptr);
char buf[PATH_MAX];
getcwd(buf, PATH_MAX);
// std::cout << "\tchanging directory to match child: '" << buf << "'\n";
}
else if (scall == SYS_lseek || scall == SYS_read || scall == SYS_write || scall == SYS_pwrite64)
{
if (args[0] < 3)
continue;
int offset = (int)regs.rax;
int fd = args[0];
if (fd_map.find(fd) == fd_map.end())
continue;
if (offset <= 0)
{
LOG_DBG << "syscall on FD: " << fd << " returned offset:" << offset << ", ignoring.";
continue;
}
auto cursor_before = fd_map[fd].cursor;
if (scall != SYS_pwrite64)
fd_map[fd].cursor = (scall == SYS_lseek ? offset : (fd_map[fd].cursor + offset));
auto cursor_after = fd_map[fd].cursor;
// std::cout << "\tfd_map[" << fd << "].cursor = " << cursor_after << "\n";
// if there's been a write we need to record it
if (scall == SYS_write || scall == SYS_pwrite64)
{
const std::string &filepath = fd_map[fd].filepath;
// compute all block boundaries
uint32_t first_block = cursor_before / BLOCK_SIZE;
uint32_t last_block = cursor_after / BLOCK_SIZE;
// pwrite doesn't update cursor, but we need to record blocks changed by it
if (scall == SYS_pwrite64)
{
first_block = args[3] / BLOCK_SIZE;
last_block = (args[3] + offset) / BLOCK_SIZE;
}
// check if the map has an entry
if (updated_blocks.find(filepath) == updated_blocks.end())
updated_blocks[filepath] = {}; // map should copy string here
// add the updated blocks
for (uint32_t i = first_block; i <= last_block; ++i)
{
updated_blocks[filepath].insert(i);
//std::cout << "updated block " << fd_map[fd].filepath << " block " << i << "\n";
}
}
}
}
return 0;
}
} // namespace proc

View File

@@ -0,0 +1,12 @@
#ifndef _HP_PROC_PTRACE_CAPTURE_
#define _HP_PROC_PTRACE_CAPTURE_
#include "../pchheader.hpp"
#include "proc.hpp"
namespace proc
{
int ptrace_capture(const pid_t child, contract_fblockmap_t &updated_blocks);
}
#endif

1803
src/proc/ptrace_syscalls.hpp Normal file

File diff suppressed because it is too large Load Diff