/** @mainpage `liner` tool.
 *
 *  @section intro_sec Introduction
 *    The tool is designed to keep services running even the master process has stopped
 *    (in case of update, for example).
 *
 *  @section scenario_sec Typical Scenario
 *    The typical scenario should looks like following (all IPC communications are performed in
 *    message pack format, each state will be shown in process title):
 *
 *  @subsection owum_sec  State 1: Owum
 *    The tool should be started with only one argument - path to Unix-domain socket file
 *    which will be served by this tool. After the process start, it will create the socket file
 *    specified and will wait for master connection on it.
 *    If the master will close the connection during this state, the tool will exit too.
 *
 *  @subsection fetus_sec State 2: Fetus
 *    After the master connection will be accepted, the tool will send a version number to it.
 *    As a response on this the tool expects a list of arguments to be executed (first argument
 *    should be a name of the service, the tool is should run, it will be also set as process title).
 *    The rest array of arguments will be passed to @c exec*() system function call.
 *    If the master will close the connection during this state, the tool will exit too.
 *
 *  @subsection infant_sec State 3: Infant
 *    After the tool will fork, it will send subprocess PID to the master and will go into the normal
 *    functional mode.
 *    If the master connection will be lost on this stage, the tool will send @c SIGTERM to the
 *    sub-process and terminate immediately.
 *
 *  @subsection normal_sec State 4: Normal
 *    In normal mode the process will be sustainable to master lost. In case of master will be
 *    disconnected, the process will go into @ref orphan_sec "Orphan state".
 *
 *    It will also fetch subprocess'es STDOUT/STDERR streams and collect them internally into
 *    intermediate buffer. The buffer data will be send to the master as soon as possible with
 *    message of map of key @c "stdout" or @c "stderr" and string value with output buffer chunk.
 *    In case of buffer will be filled, the process will switch into @ref glutted_sec "Glutted state".
 *
 *    The last role of the tool is to collect return code of the sub-process in case of it has been
 *    stopped. The process will switch into @ref zombie_sec "Zombie state" and will wait till full
 *    send of parsed return code (in form of map) to the master process.
 *
 *  @subsection orphan_sec State 5: Orphan
 *    In case of master process will disconnect the tool during normal operational mode, the
 *    process will switch into "orphan" mode. In this mode the process will wait for new master
 *    connection for specific amount of time (about 15 minutes) and will die in case master will
 *    not connect in this period. After the master will be connected, the process will send a
 *    version to it and will switch back to "normal" operational mode.
 *
 *  @subsection adoptee_sec State 5.5: Adoptee
 *    This intermediate state is designed to suspend stdout/stderr data sent immediately after
 *    the master has been connected. The process will wait for command "adopt" from the master
 *    before switching into the regular state. The state is a next after the @ref orphan_sec
 *    process has been connected by a master.

 *  @subsection glutted_sec State 6: Glutted
 *    If the internal buffer, which stores subprocess'es output stream will exceed the limit
 *    (about 16MiB currently), the process will switch into "glutted" state. In this state the
 *    process will @b not listen on subprocess'es output streams, which will lead to subprocess
 *    blocking if it will keep writing to those streams.
 *
 *  @subsection zombie_sec State 7: Zombie
 *    If the tool will detect the subprocess dead, it will collect it return code and switch
 *    into "zombie" mode. In this mode the process will try to send rest of output buffers
 *    (if any) to the master process, and also a map with parsed return code.
 *    After the return code will be completely sent, the process will terminate.
 *
 *  @subsection corpse_sec State x: Corpse
 *    In case of abnormal termination (i.e., not planned termination, while the subprocess
 *    still running), the tool will switch into this state and will try to kill subprocess
 *    with @c SIGCILL signall continuously. After the subprocess will be terminated, the tool
 *    will terminate too.
 *
 *  Full tool's description is available here: http://wiki.yandex-team.ru/Skynet/Tools/Liner
 */

#ifndef _POSIX_C_SOURCE
#   define _POSIX_C_SOURCE 200809L
#endif
#if !defined(_GNU_SOURCE) && !defined(BSD)
#   define _GNU_SOURCE
#endif
#ifndef _DARWIN_C_SOURCE
#   define _DARWIN_C_SOURCE
#endif

#include <time.h>
#include <stdio.h>
#include <fcntl.h>
#include <unistd.h>
#include <assert.h>
#include <string.h>
#include <memory.h>
#include <stdlib.h>
#include <signal.h>
#include <sys/un.h>
#include <sys/wait.h>
#include <sys/time.h>
#include <sys/types.h>
#include <sys/socket.h>
#if (defined(__unix__) || defined(unix)) && !defined(USG)
#    include <sys/param.h> // For @c BSD macro
#endif

#include <pthread.h>

#include <msgpack/msgpack.h>

#include "socket.h"


/** Protocol version. */
static const uint8_t VERSION = 1;
/** Maximum amount of child output buffering - 16MiB.
 *  Tunable with @c MAX_OUT_BUF_SIZE environment variable. */
static size_t MAX_OUT_BUF_SIZE = 0x1000000;
/** Maximum amount of time in seconds to wait for master come back.
 *  Tunable with @c ORPHAN_TIME_TO_LIVE environment variable. */
static size_t ORPHAN_TIME_TO_LIVE = 900;
/** Number of attempts of child termination (@c SIGTERM) tries with pause of 1 second between them. */
static const size_t CHILD_TERM_TRIES = 3;
/** Stack size in bytes for PID waiter thread = 4KiB. */
static const size_t PID_WAITER_STACK_SIZE = 4 << 10;

/** @name Return codes. */
//@{
/** Some system call failed. */
//static const int RETCODE_SYSTEM   = -1;
/** Child process stopped (for any reason). */
static const int RETCODE_CHILD  = 0;
/** Invalid usage. */
static const int RETCODE_USAGE  = 1;
/** Termination signal caught. */
static const int RETCODE_SIGNAL = 2;
//@}

/** Subprocess state. */
typedef enum {
    STATE_REGULAR   = 0x0,
    STATE_OWUM      = 0x1,
    STATE_FETUS     = 0x2,
    STATE_INFANT    = 0x4,
    STATE_ORPHAN    = 0x8,
    STATE_ADOPTEE   = 0x10,
    STATE_GLUTTED   = 0x20,
    STATE_ZOMBIE    = 0x40,
    STATE_CORPSE    = 0x80,
} STATE;

/** Master command type. */
typedef enum { CMD_NONE, CMD_INFO, CMD_ADOPT, CMD_CONTEXT, CMD_TERM, CMD_TERMPG } COMMAND;

/** A structure accumulates all the data related to children process. */
typedef struct {
    /** Process ID. */
    pid_t           pid;
    /** Process name. @c NULL means the process did not own the socket, so it cannot remove it. */
    char*           name;
    /** Child state @see{STATE}. */
    int             state;
    /** Process standard output read pipe's file descriptor. */
    int             outfd;
    /** Process error output read pipe's file descriptor. */
    int             errfd;
    /** Process return code. */
    volatile int    retcode;
    /** Standard output buffer. */
    StringList*     outbuf;
    /** Error output buffer. */
    StringList*     errbuf;
    /** Context. */
    struct {
        char*       data;
        size_t      size;
    } context;
    /** PID waiter thread and control pipe file descriptor. */
    struct {
        pthread_t   th;
        int         ctl[2];
    } pid_waiter;
    /** Signal handler control pipe file descriptor. */
    struct {
        int             ctl[2];
    } sig_handler;
} Subprocess;
/** Global child object (needed by termination signal handler). */
Subprocess child;


/** @name A number of helper macros. */
//@{
#define CHILD_IS_ORPHAN(x)  ((x.state & STATE_ORPHAN) == STATE_ORPHAN)
#define CHILD_IS_ADOPTEE(x) ((x.state & STATE_ADOPTEE) == STATE_ADOPTEE)
#define CHILD_IS_GLUTTED(x) ((x.state & STATE_GLUTTED) == STATE_GLUTTED)
#define CHILD_IS_ZOMBIE(x)  ((x.state & STATE_ZOMBIE) == STATE_ZOMBIE)
#define CHILD_IS_CORPSE(x)  ((x.state & STATE_CORPSE) == STATE_CORPSE)
#define CHILD_MASTER_OK(x)  !CHILD_IS_ORPHAN(child) && !CHILD_IS_ADOPTEE(child)
//@}


/** A socket with send buffer.
 *  @note The buffer used only with send operations.
 */
typedef struct {
    /** The socket object. */
    Socket*             sock;
    /** Buffer to send. */
    msgpack_sbuffer*    sbuf;
    /** Buffer to read. */
    msgpack_unpacker*   rbuf;
    /** Send buffer data left. */
    size_t              left;
} BufferedSocket;

/** UNIX domain socket name. */
static char* sockname;
/** Removes served socket. */
static inline void removeSocket() {
    if (!sockname || !child.name) // Memory leak here? Who cares! We shutting down anyway..
        return;
    unlink(sockname);
    free(sockname);
}

/** Helper function, wich will pack string into the msgpack message stream. */
inline static void msgpack_pack_string(msgpack_packer* mpac, const char* const str, size_t len) {
    msgpack_pack_raw(mpac, len);
    msgpack_pack_raw_body(mpac, str, len);
}

/** Releases given buffered socket. */
static inline void releaseBufferedSocket(BufferedSocket* sock) {
    releasePeerSocket(sock->sock);
    if (sock->rbuf) msgpack_unpacker_free(sock->rbuf);
    if (sock->sbuf) msgpack_sbuffer_free(sock->sbuf);
    sock->sock = NULL;
    sock->rbuf = NULL;
    sock->sbuf = NULL;
    sock->left = 0;
}

/** Sent integer via RPC. */
static inline void sendInt(Socket* sock, int32_t x) {
    msgpack_sbuffer* sbuf = msgpack_sbuffer_new(); assert(sbuf);
    msgpack_packer* mpac = msgpack_packer_new(sbuf, msgpack_sbuffer_write); assert(mpac);
    msgpack_pack_int32(mpac, x);
    socketSendAll(sock, (uint8_t*)sbuf->data, sbuf->size);
    msgpack_sbuffer_free(sbuf);
    msgpack_packer_free(mpac);
}

/** Fetches command to execute and arguments list from via RPC. */
static inline char** recvArgs(Socket* sock) {
    msgpack_unpacker* mpac = msgpack_unpacker_new(0x2000);
    msgpack_unpacked msg;
    msgpack_unpacked_init(&msg);
    while (true) {
        ssize_t read = socketRecv(
            sock,
            (uint8_t*)msgpack_unpacker_buffer(mpac), msgpack_unpacker_buffer_capacity(mpac)
        );
        if (read < 1)
            fatal("Server closed the connection during handshake");
        msgpack_unpacker_buffer_consumed(mpac, read);
        if (msgpack_unpacker_next(mpac, &msg))
            break;
    }

    msgpack_object root = msg.data;
    assert(root.type == MSGPACK_OBJECT_ARRAY);
    char** args = sfmalloc((root.via.array.size + 1) * sizeof(char*));
    for (size_t i = 0; i < root.via.array.size; ++i) {
        const msgpack_object_raw* raw = &root.via.array.ptr[i].via.raw;
        args[i] = strndup(raw->ptr, raw->size);
    }
    args[root.via.array.size] = NULL;
    msgpack_unpacked_destroy(&msg);
    msgpack_unpacker_free(mpac);
    return args;
}

/** Fetches master's message data [chunk]. */
static inline ssize_t recvDataChunk(BufferedSocket* sock) {
    if (!sock->rbuf) sock->rbuf = msgpack_unpacker_new(0x100);

    ssize_t total = 0;
    while (true) {
        size_t want = msgpack_unpacker_buffer_capacity(sock->rbuf);
        ssize_t read = socketRecv(sock->sock, (uint8_t*)msgpack_unpacker_buffer(sock->rbuf), want);
        total += read;
        if (read > 0)
            msgpack_unpacker_buffer_consumed(sock->rbuf, read);
        if (want != (size_t)read) break;
        // Need more space!
        msgpack_unpacker_expand_buffer(sock->rbuf, want * 7 / 4);
    }
    return total > 0 ? total : 0;
}

/** Parses master's message. */
static inline COMMAND parseCMD(BufferedSocket* sock) {
    if (!sock->rbuf) return CMD_NONE;

    msgpack_unpacked msg;
    msgpack_unpacked_init(&msg);

    COMMAND ret = CMD_NONE;
    if (msgpack_unpacker_next(sock->rbuf, &msg)) {
        msgpack_object root = msg.data;
        if (root.type == MSGPACK_OBJECT_RAW && !strncmp(root.via.raw.ptr, "info", 4))
            ret = CMD_INFO;
        else if (root.type == MSGPACK_OBJECT_RAW && !strncmp(root.via.raw.ptr, "adopt", 5))
            ret = CMD_ADOPT;
        else if (root.type == MSGPACK_OBJECT_RAW && !strncmp(root.via.raw.ptr, "terminatepg", 11))
            ret = CMD_TERMPG;
        else if (root.type == MSGPACK_OBJECT_RAW && !strncmp(root.via.raw.ptr, "terminate", 9))
            ret = CMD_TERM;
        else if (root.type == MSGPACK_OBJECT_MAP) {
            msgpack_object key = root.via.map.ptr[0].key;
            msgpack_object val = root.via.map.ptr[0].val;
            if (
                root.via.map.size == 1 && key.type == MSGPACK_OBJECT_RAW && val.type == MSGPACK_OBJECT_RAW &&
                !strncmp(key.via.raw.ptr, "context", 7)
            ) {
                free(child.context.data);
                child.context.data = sfmalloc(child.context.size = val.via.raw.size);
                memcpy(child.context.data, val.via.raw.ptr, val.via.raw.size);
            } else
                fatal("Unknown command's map structure from master");

            ret = CMD_CONTEXT;
        } else {
            printf("Unknown command from master.\n");
            if(!CHILD_IS_ZOMBIE(child) && child.pid) {
                killpg(child.pid, SIGKILL);
                killpg(child.pid, SIGCONT);
                kill(child.pid, SIGKILL);
                kill(child.pid, SIGCONT);
            }
            killpg(0, SIGKILL); // Be a paranoidical paranoid.
        }
    }

    msgpack_unpacked_destroy(&msg);
    if (sock->rbuf->used == sock->rbuf->off) { // All the consumed buffer parsed - release the buffer.
        msgpack_unpacker_free(sock->rbuf);
        sock->rbuf = NULL;
    }
    return ret;
}

/** PID waiter's thread working loop. */
static void* pidWaiter() {
    int exitstatus;
    int rc = waitpid(child.pid, &exitstatus, 0);
    if (rc < 1)
        fprintf(stderr, "waitpid(%d) system call failed with code %d: %s", child.pid, rc, strerror(errno));
    child.retcode = exitstatus;
    // Signal main thread ..
    do {
        rc = write(child.pid_waiter.ctl[1], "\0", 1);
        if (rc < 0) {
            if (errno != EINTR)
                fatal("Unable to signal main thread about child exit.");
            continue;
        }
    } while (false);
    close(child.pid_waiter.ctl[1]);
    child.pid_waiter.ctl[1] = 0;
    // .. and nothing more! The simplest algorithm ever used!
    pthread_exit(0);
    return NULL;
}

static inline void pidWaiterCleanup() {
    if (!child.pid_waiter.ctl[0]) // Clean already.
        return;
    // Ignore any errors below.
    pthread_join(child.pid_waiter.th, NULL);
    close(child.pid_waiter.ctl[0]);
    if (child.pid_waiter.ctl[1]) close(child.pid_waiter.ctl[1]);
    memset(child.pid_waiter.ctl, 0, sizeof(child.pid_waiter.ctl));
}

/** Starts sub-process. */
static inline void createChild(const char* const* args, Subprocess* child, const int* cloexec) {
    int pipes[2][2]; // standard output and error stream pipes
    if (pipe(pipes[0]) < 0 || pipe(pipes[1]) < 0)
        fatal("Unable to create a pipe");

    pid_t cpid = fork();
    if (cpid < 0)
        fatal("Unable to fork");

    if (!cpid) { // Child
        // First of all, child has not own parent's stuff.
        child->name = sockname = NULL;
        // Now close unused pipes' ends.
        close(pipes[0][0]); close(pipes[1][0]);
        // Link standard output streams with our pipe ends
        dup2(pipes[0][1], STDOUT_FILENO);
        dup2(pipes[1][1], STDERR_FILENO);
        // And link standard input stream to @c /dev/null
        int nullfd = open("/dev/null", O_RDONLY);
        if (nullfd < 1)
            fatal("Unable to open '/dev/null'");
        dup2(nullfd, STDIN_FILENO);

        // Manual @c SOCK_CLOEXEC
        for (const int* pfd = cloexec; pfd && *pfd; ++pfd)
            close(*pfd);

        // Go to the dark side of the moon.
        int rc = execvp(args[0], (char* const*)args);

        // `exec()` failed. Prepare good error message.
        static char cmd[0x10000];
        size_t cmd_sz = 0;
        const char* const* arg = args;
        while (args && *arg && cmd_sz < sizeof(cmd)) {
            size_t offset = cmd_sz ? 1 : 0;
            if (offset) cmd[cmd_sz] = ' ';

            size_t len = min(sizeof(cmd), strlen(*arg) + offset);
            memcpy(cmd + cmd_sz + offset, *arg, len);
            cmd_sz += len + offset;
            ++arg;
        }
        cmd[min(cmd_sz, sizeof(cmd) - 1)] = '\0';
        fprintf(stderr, "Unable to execute command '%s': %s\n", cmd, strerror(errno));
        exit(rc);
        // Never returns.
    }

    // Parent process works here.
    close(pipes[0][1]); close(pipes[1][1]); // Close unused pipes end.
    child->outbuf = newStringList();
    child->errbuf = newStringList();

    // Set out/err child pipes non-blocking.
    fcntl(pipes[0][0], F_SETFL, fcntl(pipes[0][0], F_GETFL) | O_NONBLOCK);
    fcntl(pipes[1][0], F_SETFL, fcntl(pipes[1][0], F_GETFL) | O_NONBLOCK);
    child->outfd = pipes[0][0];
    child->errfd = pipes[1][0];

    child->state = STATE_INFANT;
    child->pid = cpid;

    // Create signal handler control pipe.
    if (pipe(child->sig_handler.ctl) < 0)
        fatal("Unable to create a control pipe");
    // Now create PID waiter control pipe and thread.
    if (pipe(child->pid_waiter.ctl) < 0)
        fatal("Unable to create a control pipe");
    // But first ajust its stack size - it should be pretty tiny.
    pthread_attr_t attr;
    if (pthread_attr_init(&attr))
            fatal("pthread_attr_init() call failed");
    pthread_attr_setstacksize(&attr, PID_WAITER_STACK_SIZE);
    if (pthread_create(&child->pid_waiter.th, &attr, pidWaiter, NULL))
        fatal("pthread_create() call failed");
}

/** Read all the avalilable child output stream data.
 *  @param p        Subprocess object.
 *  @param err      Flags either standard output or error stream should be read.
 *  @param mask     Select file descriptors bit mask.
 *  @param buf      Buffer to be used on read.
 *  @param buf_sz   Available buffer space to be used.
 *  @return         Amount of data read.
 */
static inline size_t readChildOut(Subprocess* p, bool err, fd_set* mask, char* buf, size_t buf_sz) {
    int fd = err ? p->errfd : p->outfd;
    if (!fd || !p->pid || !FD_ISSET(fd, mask)) return 0;

    size_t len = read(fd, buf, buf_sz);
    if (!len) { // Child process closed the stream (died?).
        if (!err) p->outfd = 0;
        else p->errfd = 0;
        close(fd);
    } else {
        appendString2List(err ? p->errbuf : p->outbuf, buf, len);
    }
    return len;
}

/** Send data via buffered socket. */
static inline size_t bufferedSocketSend(BufferedSocket* sock, msgpack_sbuffer* sbuf) {
    if (sbuf) {
        if (sock->sbuf) { // We have some data which is not sent yet.
            // Merge those two buffers together.
            msgpack_sbuffer* tmpbuf = msgpack_sbuffer_new(); assert(tmpbuf);
            msgpack_sbuffer_write(tmpbuf, sock->sbuf->data, sock->sbuf->size);
            msgpack_sbuffer_write(tmpbuf, sbuf->data, sbuf->size);
            msgpack_sbuffer_free(sock->sbuf);
            msgpack_sbuffer_free(sbuf);
            sbuf = tmpbuf;
        }
        sock->sbuf = sbuf;
        sock->left = sbuf->size;
    }

    ssize_t sent = socketSend(sock->sock, (uint8_t*)sock->sbuf->data + sock->sbuf->size - sock->left, sock->left);
    if ((size_t)sent == sock->left) {
        msgpack_sbuffer_free(sock->sbuf);
        sock->sbuf = NULL;
        sock->left = 0;
    } else if (sent < 0) // Socket error - master disconnected
        releaseBufferedSocket(sock);
    else
        sock->left -= sent;
    return sent;
}

/** Helper function. Appends an item from the given @c StringList to the message packet and
 *  releases the appended item. */
static inline void _appendOutChunk(msgpack_packer* mpac, StringList* buf, const char* const key, size_t key_len) {
    if (!buf->size) return;

    StringListItem *item = buf->first;
    msgpack_pack_string(mpac, key, key_len);
    msgpack_pack_string(mpac, item->data, item->size);

    // Release appended item.
    buf->first = item->next;
    buf->size -= item->size;
    if (!buf->first) buf->last = NULL;
    free(item->data);
    free(item);
}

/** Sends collected output buffer to the master. */
static inline size_t sendChildOut(BufferedSocket* sock, Subprocess* p) {
    size_t ready = !!p->outbuf->size + !!p->errbuf->size;
    if (sock->left || !ready) return 0;

    msgpack_sbuffer* sbuf = msgpack_sbuffer_new(); assert(sbuf);
    msgpack_packer* mpac = msgpack_packer_new(sbuf, msgpack_sbuffer_write); assert(mpac);

    // Send the actual child's output chunk.
    msgpack_pack_map(mpac, ready);
    if (p->outbuf->size)
        _appendOutChunk(mpac, p->outbuf, "stdout", 6);
    if (p->errbuf->size)
        _appendOutChunk(mpac, p->errbuf, "stderr", 6);
    msgpack_packer_free(mpac);

    return bufferedSocketSend(sock, sbuf);
}

/** Sends subprocess information to the master. */
static inline size_t sendInfo(BufferedSocket* sock, Subprocess* p) {
    static const char* const KEY_PID        = "pid";
    static const char* const KEY_PGID       = "pgid";
    static const char* const KEY_NAME       = "name";
    static const char* const KEY_VERSION    = "version";
    static const char* const KEY_CONTEXT    = "context";

    msgpack_sbuffer* sbuf = msgpack_sbuffer_new(); assert(sbuf);
    msgpack_packer* mpac = msgpack_packer_new(sbuf, msgpack_sbuffer_write); assert(mpac);
    msgpack_pack_map(mpac, p->context.data ? 5 : 4);

    msgpack_pack_string(mpac, KEY_PID, strlen(KEY_PID));
    msgpack_pack_int32(mpac, p->pid);

    msgpack_pack_string(mpac, KEY_PGID, strlen(KEY_PGID));
    msgpack_pack_int32(mpac, getpid());

    msgpack_pack_string(mpac, KEY_NAME, strlen(KEY_NAME));
    msgpack_pack_string(mpac, p->name, strlen(p->name));

    if (p->context.data) {
        msgpack_pack_string(mpac, KEY_CONTEXT, strlen(KEY_CONTEXT));
        msgpack_pack_string(mpac, p->context.data, p->context.size);
    }

    msgpack_pack_string(mpac, KEY_VERSION, strlen(KEY_VERSION));
    msgpack_pack_int8(mpac, VERSION);

    msgpack_packer_free(mpac);

    return bufferedSocketSend(sock, sbuf);
}

/** Sends subprocess return code to the master. */
static inline size_t sendRetcode(BufferedSocket* sock, int rc, bool zombie) {
    static const char* const KEY_ZOMBIE     = "zombie";
    static const char* const KEY_EXITED     = "exited";
    static const char* const KEY_EXITSTATUS = "exitstatus";
    static const char* const KEY_SIGNALED   = "signaled";
    static const char* const KEY_TERMSIG    = "termsig";
    static const char* const KEY_COREDUMP   = "coredump";

    msgpack_sbuffer* sbuf = msgpack_sbuffer_new(); assert(sbuf);
    msgpack_packer* mpac = msgpack_packer_new(sbuf, msgpack_sbuffer_write); assert(mpac);

    bool signaled = WIFSIGNALED(rc);
    msgpack_pack_map(mpac, 3 + signaled * 2 + zombie);

    if (zombie) {
        msgpack_pack_string(mpac, KEY_ZOMBIE, strlen(KEY_ZOMBIE));
        msgpack_pack_int8(mpac, zombie);
    }

    msgpack_pack_string(mpac, KEY_EXITED, strlen(KEY_EXITED));
    msgpack_pack_int8(mpac, WIFEXITED(rc));

    msgpack_pack_string(mpac, KEY_EXITSTATUS, strlen(KEY_EXITSTATUS));
    msgpack_pack_int8(mpac, WEXITSTATUS(rc));

    msgpack_pack_string(mpac, KEY_SIGNALED, strlen(KEY_SIGNALED));
    msgpack_pack_int8(mpac, signaled);

    if (signaled) {
        msgpack_pack_string(mpac, KEY_TERMSIG, strlen(KEY_TERMSIG));
        msgpack_pack_int8(mpac, WTERMSIG(rc));

        msgpack_pack_string(mpac, KEY_COREDUMP, strlen(KEY_COREDUMP));
        msgpack_pack_int8(mpac, WCOREDUMP(rc));
    }

    msgpack_packer_free(mpac);

    return bufferedSocketSend(sock, sbuf);
}

/** Sets process title with given name and state. */
static inline void title(const char* name, int state) {
    static const char* states[0xF];
    static int prev_state = -1;

    if (state == prev_state) return;
    prev_state = state;
    size_t count = 0;

    if ((state & STATE_OWUM) == STATE_OWUM)
        states[count++] = "owum";
    if ((state & STATE_FETUS) == STATE_FETUS)
        states[count++] = "fetus";
    if ((state & STATE_INFANT) == STATE_INFANT)
        states[count++] = "infant";
    if ((state & STATE_ORPHAN) == STATE_ORPHAN)
        states[count++] = "orphan";
    if ((state & STATE_ADOPTEE) == STATE_ADOPTEE)
        states[count++] = "adoptee";
    if ((state & STATE_GLUTTED) == STATE_GLUTTED)
        states[count++] = "glutted";
    if ((state & STATE_ZOMBIE) == STATE_ZOMBIE)
        states[count++] = "zombie";
    if ((state & STATE_CORPSE) == STATE_CORPSE)
        states[count++] = "corpse";

    states[count] = NULL;
    processTitle(name, states);
}

/** Signal post-processing. */
static void killChild(int signum) {
    int rc;
    size_t i;
    pid_t pid;

    if (signum)
        printf("Signal #%d caught.", signum);

    if (child.pid) {
        title(child.name, STATE_CORPSE);
        for (i = 0; i < CHILD_TERM_TRIES && !CHILD_IS_ZOMBIE(child) && child.pid; ++i) {
            kill(child.pid, SIGTERM);
            pid = waitpid(child.pid, &rc, WNOHANG);
            if (pid) break; // Even error occured.
            sleep(1);
        }

        while (pid < 1 && !CHILD_IS_ZOMBIE(child) && child.pid) {
            killpg(child.pid, SIGKILL);
            killpg(child.pid, SIGCONT); // Be a paparanoidical paranoid
            kill(child.pid, SIGKILL);
            kill(child.pid, SIGCONT); // Be a double paparanoidical paranoid
            pid = waitpid(child.pid, &rc, WNOHANG);
            if (pid) break; // Even error occured.
            sleep(1);
        }
        child.pid = 0;
    }
    if (signum != SIGSEGV && child.name && *child.name) {
        pidWaiterCleanup();
        free(child.name);
        child.name = NULL;
        if (child.context.data) {
            child.context.size = 0;
            free(child.context.data);
            child.context.data = NULL;
        }
    }

    for (int i = 0; i < 2; ++i)
        close(child.sig_handler.ctl[i]);
    memset(child.sig_handler.ctl, 0, sizeof(child.sig_handler.ctl));
    exit(RETCODE_SIGNAL);
}

/** Signal handler - just reports the event to the main loop. */
static void signalNotifier(int signum) {
    if (signum == SIGSEGV) {
        // The only one action we can try to perform on segmentation fault is to kill everybody.
        if (!CHILD_IS_ZOMBIE(child) && child.pid) {
            killpg(child.pid, SIGKILL);
            killpg(child.pid, SIGCONT);
            kill(child.pid, SIGKILL);
            kill(child.pid, SIGCONT);
        }
        killpg(0, SIGKILL);
        _exit(signum); // This point should never be reached.
    }

    // Signal main thread and report the signal number.
    while (child.sig_handler.ctl[1]) {
        ssize_t rc = write(child.sig_handler.ctl[1], &signum, sizeof(signum));
        if (rc < 0) {
            if (errno != EINTR)
                _exit(signum);  // Something VERY strange happens.
        } else {
            break;
        }
    }
}

/** Callback on normal process exit. */
static inline void onExit() {
    removeSocket();
    killChild(0); // Be a paranoid
    killpg(0, SIGKILL); // Be a paranoidical paranoid.
}

/** Callback for alarm signal. */
static inline void onAlarm(int signum) { (void)(signum); /* Do nothing here */ }


int main(int argc, char** argv) {
    // Read and write file descriptors masks for `select()`
    fd_set rfds_mask, wfds_mask;
    struct timeval tv = {0};
    FD_ZERO(&rfds_mask);
    FD_ZERO(&wfds_mask);

    if (argc != 2) {
        printf("Usage: %s <Unix domain socket path>\n", argv[0]);
        return RETCODE_USAGE;
    }

    // Tune process-wide constants.
    const char* env = getenv("MAX_OUT_BUF_SIZE");
    if (env) MAX_OUT_BUF_SIZE = atoll(env);
    env = getenv("ORPHAN_TIME_TO_LIVE");
    if (env) ORPHAN_TIME_TO_LIVE = atoll(env);
    if (MAX_OUT_BUF_SIZE * ORPHAN_TIME_TO_LIVE < 1)
        fatal("Incorrect constants provided with environment variables");

    // Empty @c SIGALRM processor.
    signal(SIGALRM, onAlarm);
    // Ignore other signals.
    signal(SIGHUP, SIG_IGN);
    signal(SIGPIPE, SIG_IGN);
    // Although @c SIGCHLD ignore is the default, automatic reaping will occurs if the disposition is set to
    // ignore explicitly (equals to setting the @c SA_NOCLDWAIT flag for the @c SIGCHLD signal),
    // so, NEVER set @c SIGCHLD to @c SIG_IGN explicitly.

    // Setup a process group and be its leader.
#ifdef BSD
    setpgrp(0, 0); // BSD style.
#else
    setpgrp(); // System V version.
#endif

    // Save any needed arguments for future used. They will not be available after
    // initialization of process title setter.
    sockname = strdup(argv[1]);
    // Setup cleanup callback.
    atexit(onExit);
    // Clobber `argv` array to be able to change own process name.
    initProcessTitle(argc, argv);

    // Show that we are ready to receive commands.
    title("", STATE_OWUM);

    // First of all, create the socket.
    Socket* srv = newServerSocket(sockname, 1);
    // Now we own the socket - its safe to remove it on exit.
    child.name = "";
    // Now we should wait for master's connection and initial handshake for limited time.
    alarm(ORPHAN_TIME_TO_LIVE);
    FD_SET(srv->fd, &rfds_mask);
    int retval = select(srv->fd + 1, &rfds_mask, NULL, NULL, NULL);
    if (retval < 1) // Nothing selected
        fatal("No master connected");

    // Aloha! Master connected!
    BufferedSocket peer = {0};
    peer.sock = acceptPeer(srv);

    // On OSX and BSD this socket will be in non-blocking mode, since it inherits that flag
    // from main socket. But we need it in blocking mode untill we spawn child process
    fcntl(peer.sock->fd, F_SETFL, fcntl(peer.sock->fd, F_GETFL) & ~O_NONBLOCK);

    title(child.name, STATE_FETUS);
    // Great, the master process connected to us. Wait for command.
    // We expect the command will be single and fit into one data chunk.
    if (recvDataChunk(&peer) < 1)
        fatal("Master dropped the connection unexpectedly");

    switch (parseCMD(&peer)) {
    case CMD_INFO:
        // Send our state information ..
        sendInfo(&peer, &child);
        // .. and wait till all data will be really sent.
        while (peer.left) bufferedSocketSend(&peer, NULL);
        break;
    case CMD_NONE:
        fatal("Unparsable command from the master during handshake");
        break; // Suppress IDE's warning.
    default:
        fatal("Unexpected command from the master during handshake");
        break; // Suppress compiler's warning.
    }

    // The master should send a list of arguments to be executed. Wait it forever again.
    char** args = recvArgs(peer.sock);
    // Initial stage passed - reset the timer (and don't ever use it!).
    alarm(0);

    // First argument is our name actually.
    child.name = strdup(args[0]);
    // Great. Initial phase passed.
    title(child.name, STATE_INFANT);
    // Ok, its time to do the actual job - start children process.
    createChild((const char* const*) args + 1, &child, (int[]){srv->fd, peer.sock->fd, 0});
    // Free arguments string array.
    releaseArray(args);
    sendInt(peer.sock, (int32_t)child.pid);

    // Passthrough signals to the child process.
    signal(SIGINT, signalNotifier);
    signal(SIGTSTP, signalNotifier);
    signal(SIGQUIT, signalNotifier);
    signal(SIGTERM, signalNotifier);
    signal(SIGABRT, signalNotifier);
    signal(SIGSEGV, signalNotifier);

    // So, the synchronous part ends here, going to run main "hard" mode -
    // be ready that child or master process can left us at any moment.
    child.state = STATE_REGULAR;
    // Set master connection socket non-blocking.
    fcntl(peer.sock->fd, F_SETFL, fcntl(peer.sock->fd, F_GETFL) | O_NONBLOCK);

    // Generic-purpose buffer.
    static const size_t BUFFER_SIZE = 0x2000;
    char buf[BUFFER_SIZE];
    // Timestamp, at which time master left us.
    time_t orphan_since = 0;
    tv.tv_sec = 0;
    while (!CHILD_IS_CORPSE(child)) {
        FD_ZERO(&rfds_mask); FD_ZERO(&wfds_mask);
        size_t out_buf_size = peer.left + child.outbuf->size + child.errbuf->size;
        if (out_buf_size > MAX_OUT_BUF_SIZE)
            child.state |= STATE_GLUTTED;
        else
            child.state &= ~STATE_GLUTTED;
        title(child.name, child.state);

        // Watch child output streams and master socket for read events
        int maxfd = 0;
        int rfds[5] = {0};
        bool exitstatus_ready = false;
        if (!CHILD_IS_ORPHAN(child)) {
            rfds[0] = peer.sock->fd;
            exitstatus_ready = child.pid && CHILD_IS_ZOMBIE(child) && !out_buf_size;
            if (CHILD_IS_ADOPTEE(child)) {
                // Not adopted yet - countinue TTL downcont.
                tv.tv_sec = orphan_since - time(NULL) + ORPHAN_TIME_TO_LIVE;
                // And still do not sent any data - wait for adoptation.
                exitstatus_ready = false;
                out_buf_size = 0;
            } else if (out_buf_size || exitstatus_ready) { // Master ok, any data for it?
                FD_SET(peer.sock->fd, &wfds_mask);
                maxfd = peer.sock->fd;
                tv.tv_sec = 0;
            }
        } else {
            rfds[0] = srv->fd;
            out_buf_size = 0; // Nobody will read subprocess output.
            tv.tv_sec = orphan_since - time(NULL) + ORPHAN_TIME_TO_LIVE;
        }

        if (!orphan_since)
            tv.tv_sec = 0;
        else if (tv.tv_sec <= 0) {
            printf("Timeout waiting for master.\n");
            if (!CHILD_IS_ZOMBIE(child) && child.pid) {
                killpg(child.pid, SIGKILL);
                killpg(child.pid, SIGCONT);
                kill(child.pid, SIGKILL);
                kill(child.pid, SIGCONT);
            }
            killpg(0, SIGKILL); // Be a paranoidical paranoid.
        }
        tv.tv_usec = 0;

        if (!CHILD_IS_GLUTTED(child) && child.pid) {
            rfds[1] = child.outfd;
            rfds[2] = child.errfd;
        }

        if (!CHILD_IS_ZOMBIE(child))
            rfds[3] = child.pid_waiter.ctl[0];

        rfds[4] = child.sig_handler.ctl[0];

        bool has_read_fds = false;
        for (size_t i = 0; i < sizeof(rfds) / sizeof(int); ++i) {
            if (!rfds[i])
                continue;

            FD_SET(rfds[i], &rfds_mask);
            if (rfds[i] > maxfd)
                maxfd = rfds[i];
            has_read_fds = true;
        }

        int retval = select(
            maxfd + 1,
            has_read_fds ? &rfds_mask : NULL,
            out_buf_size || exitstatus_ready ? &wfds_mask : NULL,
            NULL,
            tv.tv_sec ? &tv : NULL
        );
        if (retval < 0)
            fatal("`select` system call failed");

        if (FD_ISSET(child.sig_handler.ctl[0], &rfds_mask)) { // First of all, check pending signal events
            int signum;
            read(child.sig_handler.ctl[0], &signum, sizeof(signum));
            killChild(signum);
            break; // This point should never be reached.
        }

        if (!CHILD_IS_ZOMBIE(child) && FD_ISSET(child.pid_waiter.ctl[0], &rfds_mask)) { // PID waiter signaled
            // Actually, this means the sub-process has been terminated.
            child.state |= STATE_ZOMBIE;
            // you MUSTN'T be a paranoid here or you'd kill someone else
            // kill(child.pid, SIGKILL); // Be a paranoid.
            pidWaiterCleanup();
        }

        if (!CHILD_IS_ORPHAN(child) && FD_ISSET(peer.sock->fd, &rfds_mask)) { // Master says!
            if (!recvDataChunk(&peer)) { // Master disconnected. Negative value here means no more data on the socket.
                child.state |= STATE_ORPHAN;
                child.state &= ~STATE_ADOPTEE;
                releaseBufferedSocket(&peer);
                if (!orphan_since)
                    orphan_since = time(NULL);
                out_buf_size = 0; // Nobody to send data to.
            }

            COMMAND cmd;
            // Parse all the data, which is collected in the read buffer.
            while ((cmd = parseCMD(&peer)) != CMD_NONE) {
                switch (cmd) {
                case CMD_INFO:
                    sendInfo(&peer, &child);
                    break;
                case CMD_ADOPT:
                    child.state &= ~STATE_ADOPTEE;
                    orphan_since = 0;
                    break;
                case CMD_TERM:
                    child.state |= STATE_CORPSE;
                    title(child.name, child.state);
                    break;
                case CMD_TERMPG:
                    child.state |= STATE_CORPSE;
                    title(child.name, child.state);
                    printf("Killing own process group with SIGKILL.\n");
                    releaseServerSocket(srv);
                    if(!CHILD_IS_ZOMBIE(child) && child.pid) {
                        killpg(child.pid, SIGKILL);
                        killpg(child.pid, SIGCONT);
                        kill(child.pid, SIGKILL);
                        kill(child.pid, SIGCONT);
                    }
                    killpg(0, SIGKILL);
                    break; // This point should not be reached.
                case CMD_CONTEXT: // The context data already fetched with @c parseCmd() function.
                case CMD_NONE: // No parsable data received yet.
                    break;
                }
            }
        }
        if (CHILD_IS_ORPHAN(child) && FD_ISSET(srv->fd, &rfds_mask)) {
            // Aloha! New master connected! Accept the connection.
            peer.sock = acceptPeer(srv);
            child.state &= ~STATE_ORPHAN;
            child.state |= STATE_ADOPTEE;
            orphan_since = 0;
        }

        if (!CHILD_IS_GLUTTED(child)) {
            out_buf_size += readChildOut(&child, false, &rfds_mask, buf, BUFFER_SIZE);
            out_buf_size += readChildOut(&child, true, &rfds_mask, buf, BUFFER_SIZE);
        }

        if (CHILD_MASTER_OK(child) && out_buf_size && FD_ISSET(peer.sock->fd, &wfds_mask)) {
            out_buf_size -= !peer.left
                ? sendChildOut(&peer, &child)
                : bufferedSocketSend(&peer, NULL); // Some buffer data didn't send yet, process it first.
        }

        // During reading of child's output we can realize the child has gone.
        // Check this, and, if necessary, send appropriate message to master,
        // but do it only in case we read child's output completely and completely sent it.
        if (exitstatus_ready) {
            sendRetcode(&peer, child.retcode, child.outfd || child.errfd);
            child.pid = 0;
        }
    }

    releaseStringList(child.outbuf);
    releaseStringList(child.errbuf);

    if (!CHILD_IS_ORPHAN(child))
        releaseBufferedSocket(&peer);
    printf(
        !CHILD_IS_ZOMBIE(child)
        ? "WARN: Abnormal termination: subprocess still running!\n"
        : "Liner finished normally.\n"
    );

    releaseServerSocket(srv);

    return RETCODE_CHILD;
}
