#include <util/generic/string.h>
#include <util/generic/yexception.h>
#include <util/stream/file.h>

#include <array>
#include <cerrno>


/* Макросами можно настроить:
 *
 * Формат вывода:
 * - OUTPUT_RAW
 * - OUTPUT_TSKV
 *
 * Долю запросов определённого типа:
 * - STATUS_200_FRACTION
 * - CLCK_FRACTION
 * - ADS_FRACTION
 * - YARU_ROBOT_REQUEST_FRACTION
 * например, при clck_fraction=10 будет записываться каждый десятый запрос к clck
 *
 * Обязательную запись запросов определённого типа:
 * - NEVER_SKIP_ON_ERROR
 * - NEVER_SKIP_RETRY
 *
 * Сделано в виде макросов по таким причинам:
 * - В пушклиенте не задокументировано, чтобы можно было прокинуть параметры pipe-у
 * - Чтобы не тратить в рантайме время на чтение параметров / парсинг конфига
 */


#if !defined(OUTPUT_RAW) && !defined(OUTPUT_TSKV)
static_assert(false);
#endif

#if defined(STATUS_200_FRACTION) && STATUS_200_FRACTION == 1
#undef STATUS_200_FRACTION
#endif

#if defined(CLCK_FRACTION) && CLCK_FRACTION == 1
#undef CLCK_FRACTION
#endif

#if defined(ADS_FRACTION) && ADS_FRACTION == 1
#undef ADS_FRACTION
#endif

#if defined(YARU_ROBOT_REQUEST_FRACTION) && YARU_ROBOT_REQUEST_FRACTION == 1
#undef YARU_ROBOT_REQUEST_FRACTION
#endif

#ifndef NEVER_SKIP_ON_ERROR
#define NEVER_SKIP_ON_ERROR false
#endif

#ifndef NEVER_SKIP_RETRY
#define NEVER_SKIP_RETRY false
#endif


bool MaySkip200(const TStringBuf workflow) {
#ifdef STATUS_200_FRACTION
    constexpr uint64_t status200Frac = STATUS_200_FRACTION;
    static_assert(status200Frac > 1);

    TStringBuf status(workflow);
    status.NextTok("<::status:");
    if (status.StartsWith("200:")) {
        static uint64_t counter200 = 0;
        ++counter200;
        return counter200 % status200Frac != 0;
    }
    return false;
#else
    Y_UNUSED(workflow);
    return false;
#endif
}


bool MaySkipClck(const TStringBuf query) {
#ifdef CLCK_FRACTION
    constexpr uint64_t clckFrac = CLCK_FRACTION;
    static_assert(clckFrac > 1);

    if (query.StartsWith("/clck/")) {
        static uint64_t counterClck = 0;
        ++counterClck;
        return counterClck % clckFrac != 0;
    }
    return false;
#else
    Y_UNUSED(query);
    return false;
#endif
}


bool MaySkipAds(const TStringBuf query) {
#ifdef ADS_FRACTION
    constexpr uint64_t adsFrac = ADS_FRACTION;
    static_assert(adsFrac > 1);

    if (query.StartsWith("/ads/")) {
        static uint64_t counterAds = 0;
        ++counterAds;
        return counterAds % adsFrac != 0;
    }
    return false;
#else
    Y_UNUSED(query);
    return false;
#endif
}


bool MaySkipUpstream(const TStringBuf query) {
#if defined(CLCK_FRACTION) || defined(ADS_FRACTION)
    TStringBuf path(query);
    return (
        (path.SkipPrefix("GET ") || path.SkipPrefix("POST ")) && 
        (MaySkipClck(path) || MaySkipAds(path))
    );
#else
    Y_UNUSED(query);
    return false;
#endif
}


bool MaySkipYaruRobotRequest(const TStringBuf workflow) {
#ifdef YARU_ROBOT_REQUEST_FRACTION
    constexpr uint64_t yaruRobotRequestFrac = YARU_ROBOT_REQUEST_FRACTION;
    static_assert(yaruRobotRequestFrac > 1);

    TStringBuf workflowSearcher(workflow);
    if (
        workflowSearcher.NextTok("<::reason:") &&
        workflowSearcher.StartsWith("robot request:") &&
        workflowSearcher.NextTok("<::upstream:") && 
        workflowSearcher.StartsWith("yaru:")
    ) {
        static uint64_t counterYaruRobotRequest = 0;
        ++counterYaruRobotRequest;
        return counterYaruRobotRequest % yaruRobotRequestFrac != 0;
    }
    return false;
#else
    Y_UNUSED(workflow);
    return false;
#endif
}


bool ForceLogging(const TStringBuf workflow) {
    Y_UNUSED(workflow);

    bool force = false;
#if NEVER_SKIP_ON_ERROR
    force |= workflow.find("on_error") != TString::npos;
#endif
#if NEVER_SKIP_RETRY
    force |= workflow.find("retry") != TString::npos;
#endif

    return force;
}


#ifdef OUTPUT_RAW
void ProcessLineRawOutput(const TString& accessLog) {
    TStringBuf remainder(accessLog);
    TStringBuf query, workflow;

    bool fail = false;
#if defined(CLCK_FRACTION) || defined(ADS_FRACTION)
    fail |= !remainder.NextTok('"', query);
    fail |= !remainder.NextTok('"', query);
#endif
#if defined(STATUS_200_FRACTION) || defined(YARU_ROBOT_REQUEST_FRACTION) || NEVER_SKIP_ON_ERROR || NEVER_SKIP_RETRY
    while (remainder.NextTok('\t', workflow)) {}
    fail |= !remainder.empty();
#endif

    if (
        !fail && (
            ForceLogging(workflow) ||
            !(MaySkipUpstream(query) || MaySkip200(workflow) || MaySkipYaruRobotRequest(workflow))
        )
    ) {
        Cout << accessLog << '\n' << Flush;
    }
}
#endif


#ifdef OUTPUT_TSKV
void ProcessLineTSKVOutput(const TString& accessLog) {
    constexpr std::array<const TStringBuf, 7> fields = {
        "ip_port",
        "timestamp",
        "query",
        "work_time",
        "referer",
        "host",
        "workflow",
    };
    
    constexpr std::array<const TStringBuf, 8> fieldsExtended = {
        "ip_port",
        "dst_ip_port",
        "timestamp",
        "query",
        "work_time",
        "referer",
        "host",
        "workflow",
    };
    static_assert(fields.size() + 1 == fieldsExtended.size());

    TStringBuf pushClientPrefix[3];
    TStringBuf values[fields.size() + 1];

    TStringBuf remainder(accessLog);

    bool extended = false;
    bool fail = false;
    for (auto& prefix : pushClientPrefix) {
        fail |= !remainder.NextTok(';', prefix);
    }

    size_t i = 0;
    // before query field
    fail |= !remainder.NextTok('\t', values[i++]);
    fail |= !remainder.NextTok('\t', values[i++]);
    // query field
    fail |= !remainder.NextTok('\t', values[i++]);
    const TStringBuf query = values[i - 1].SubStr(1);

    for (; i < fields.size(); ++i) {
        fail |= !remainder.NextTok('\t', values[i]);
    }

    if (!fail && !remainder.empty()) {
        fail |= !remainder.NextTok('\t', values[fields.size()]);
        extended = true;
    }
    const TStringBuf workflow = extended ? values[fields.size()] : values[fields.size() - 1];

    fail |= !remainder.empty();
    if (fail) {
        // stderr is closed when run under push-client, no point in writing there
        return;
    }

    if (
        !ForceLogging(workflow) &&
        (MaySkipUpstream(query) || MaySkip200(workflow) || MaySkipYaruRobotRequest(workflow))
    ) {
        return;
    }

    for (auto& prefix : pushClientPrefix) {
        Cout << prefix << ';';
    }

    const auto fieldsBegin = extended ? fieldsExtended.begin() : fields.begin();
    const auto fieldsEnd   = extended ? fieldsExtended.end()   : fields.end();

    i = 0;
    auto it = fieldsBegin;
    for (; it != fieldsEnd; ++i, ++it) {
        Cout << *it << '=' << values[i] << '\t';
    }

    Cout << '\n' << Flush;
}
#endif


int main() {
    try {
        TString accessLog;
        while (Cin.ReadLine(accessLog)) {
#ifdef OUTPUT_RAW
            ProcessLineRawOutput(accessLog);
#elif defined(OUTPUT_TSKV)
            ProcessLineTSKVOutput(accessLog);
#endif
        }
    } catch (const TSystemError&) {
        if (errno != EPIPE) {
            throw;
        }
    }
    return 0;
}

